|
@@ -22,6 +22,7 @@
|
|
|
#include <linux/vmalloc.h>
|
|
|
#include <linux/workqueue.h>
|
|
|
#include <linux/mutex.h>
|
|
|
+#include <linux/delay.h>
|
|
|
#include <linux/device-mapper.h>
|
|
|
#include <linux/dm-kcopyd.h>
|
|
|
|
|
@@ -51,6 +52,8 @@ struct dm_kcopyd_client {
|
|
|
struct workqueue_struct *kcopyd_wq;
|
|
|
struct work_struct kcopyd_work;
|
|
|
|
|
|
+ struct dm_kcopyd_throttle *throttle;
|
|
|
+
|
|
|
/*
|
|
|
* We maintain three lists of jobs:
|
|
|
*
|
|
@@ -68,6 +71,117 @@ struct dm_kcopyd_client {
|
|
|
|
|
|
static struct page_list zero_page_list;
|
|
|
|
|
|
+static DEFINE_SPINLOCK(throttle_spinlock);
|
|
|
+
|
|
|
+/*
|
|
|
+ * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period.
|
|
|
+ * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided
|
|
|
+ * by 2.
|
|
|
+ */
|
|
|
+#define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ
|
|
|
+
|
|
|
+/*
|
|
|
+ * Sleep this number of milliseconds.
|
|
|
+ *
|
|
|
+ * The value was decided experimentally.
|
|
|
+ * Smaller values seem to cause an increased copy rate above the limit.
|
|
|
+ * The reason for this is unknown but possibly due to jiffies rounding errors
|
|
|
+ * or read/write cache inside the disk.
|
|
|
+ */
|
|
|
+#define SLEEP_MSEC 100
|
|
|
+
|
|
|
+/*
|
|
|
+ * Maximum number of sleep events. There is a theoretical livelock if more
|
|
|
+ * kcopyd clients do work simultaneously which this limit avoids.
|
|
|
+ */
|
|
|
+#define MAX_SLEEPS 10
|
|
|
+
|
|
|
+static void io_job_start(struct dm_kcopyd_throttle *t)
|
|
|
+{
|
|
|
+ unsigned throttle, now, difference;
|
|
|
+ int slept = 0, skew;
|
|
|
+
|
|
|
+ if (unlikely(!t))
|
|
|
+ return;
|
|
|
+
|
|
|
+try_again:
|
|
|
+ spin_lock_irq(&throttle_spinlock);
|
|
|
+
|
|
|
+ throttle = ACCESS_ONCE(t->throttle);
|
|
|
+
|
|
|
+ if (likely(throttle >= 100))
|
|
|
+ goto skip_limit;
|
|
|
+
|
|
|
+ now = jiffies;
|
|
|
+ difference = now - t->last_jiffies;
|
|
|
+ t->last_jiffies = now;
|
|
|
+ if (t->num_io_jobs)
|
|
|
+ t->io_period += difference;
|
|
|
+ t->total_period += difference;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Maintain sane values if we got a temporary overflow.
|
|
|
+ */
|
|
|
+ if (unlikely(t->io_period > t->total_period))
|
|
|
+ t->io_period = t->total_period;
|
|
|
+
|
|
|
+ if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) {
|
|
|
+ int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT);
|
|
|
+ t->total_period >>= shift;
|
|
|
+ t->io_period >>= shift;
|
|
|
+ }
|
|
|
+
|
|
|
+ skew = t->io_period - throttle * t->total_period / 100;
|
|
|
+
|
|
|
+ if (unlikely(skew > 0) && slept < MAX_SLEEPS) {
|
|
|
+ slept++;
|
|
|
+ spin_unlock_irq(&throttle_spinlock);
|
|
|
+ msleep(SLEEP_MSEC);
|
|
|
+ goto try_again;
|
|
|
+ }
|
|
|
+
|
|
|
+skip_limit:
|
|
|
+ t->num_io_jobs++;
|
|
|
+
|
|
|
+ spin_unlock_irq(&throttle_spinlock);
|
|
|
+}
|
|
|
+
|
|
|
+static void io_job_finish(struct dm_kcopyd_throttle *t)
|
|
|
+{
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ if (unlikely(!t))
|
|
|
+ return;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&throttle_spinlock, flags);
|
|
|
+
|
|
|
+ t->num_io_jobs--;
|
|
|
+
|
|
|
+ if (likely(ACCESS_ONCE(t->throttle) >= 100))
|
|
|
+ goto skip_limit;
|
|
|
+
|
|
|
+ if (!t->num_io_jobs) {
|
|
|
+ unsigned now, difference;
|
|
|
+
|
|
|
+ now = jiffies;
|
|
|
+ difference = now - t->last_jiffies;
|
|
|
+ t->last_jiffies = now;
|
|
|
+
|
|
|
+ t->io_period += difference;
|
|
|
+ t->total_period += difference;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Maintain sane values if we got a temporary overflow.
|
|
|
+ */
|
|
|
+ if (unlikely(t->io_period > t->total_period))
|
|
|
+ t->io_period = t->total_period;
|
|
|
+ }
|
|
|
+
|
|
|
+skip_limit:
|
|
|
+ spin_unlock_irqrestore(&throttle_spinlock, flags);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
static void wake(struct dm_kcopyd_client *kc)
|
|
|
{
|
|
|
queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
|
|
@@ -348,6 +462,8 @@ static void complete_io(unsigned long error, void *context)
|
|
|
struct kcopyd_job *job = (struct kcopyd_job *) context;
|
|
|
struct dm_kcopyd_client *kc = job->kc;
|
|
|
|
|
|
+ io_job_finish(kc->throttle);
|
|
|
+
|
|
|
if (error) {
|
|
|
if (job->rw & WRITE)
|
|
|
job->write_err |= error;
|
|
@@ -389,6 +505,8 @@ static int run_io_job(struct kcopyd_job *job)
|
|
|
.client = job->kc->io_client,
|
|
|
};
|
|
|
|
|
|
+ io_job_start(job->kc->throttle);
|
|
|
+
|
|
|
if (job->rw == READ)
|
|
|
r = dm_io(&io_req, 1, &job->source, NULL);
|
|
|
else
|
|
@@ -695,7 +813,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
|
|
|
/*-----------------------------------------------------------------
|
|
|
* Client setup
|
|
|
*---------------------------------------------------------------*/
|
|
|
-struct dm_kcopyd_client *dm_kcopyd_client_create(void)
|
|
|
+struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
|
|
|
{
|
|
|
int r = -ENOMEM;
|
|
|
struct dm_kcopyd_client *kc;
|
|
@@ -708,6 +826,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(void)
|
|
|
INIT_LIST_HEAD(&kc->complete_jobs);
|
|
|
INIT_LIST_HEAD(&kc->io_jobs);
|
|
|
INIT_LIST_HEAD(&kc->pages_jobs);
|
|
|
+ kc->throttle = throttle;
|
|
|
|
|
|
kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
|
|
|
if (!kc->job_pool)
|