|
@@ -133,6 +133,7 @@ enum {
|
|
|
|
|
|
struct global_cwq;
|
|
|
struct worker_pool;
|
|
|
+struct idle_rebind;
|
|
|
|
|
|
/*
|
|
|
* The poor guys doing the actual heavy lifting. All on-duty workers
|
|
@@ -154,7 +155,10 @@ struct worker {
|
|
|
unsigned long last_active; /* L: last active timestamp */
|
|
|
unsigned int flags; /* X: flags */
|
|
|
int id; /* I: worker id */
|
|
|
- struct work_struct rebind_work; /* L: rebind worker to cpu */
|
|
|
+
|
|
|
+ /* for rebinding worker to CPU */
|
|
|
+ struct idle_rebind *idle_rebind; /* L: for idle worker */
|
|
|
+ struct work_struct rebind_work; /* L: for busy worker */
|
|
|
};
|
|
|
|
|
|
struct worker_pool {
|
|
@@ -190,6 +194,8 @@ struct global_cwq {
|
|
|
|
|
|
struct worker_pool pools[2]; /* normal and highpri pools */
|
|
|
|
|
|
+ wait_queue_head_t rebind_hold; /* rebind hold wait */
|
|
|
+
|
|
|
struct task_struct *trustee; /* L: for gcwq shutdown */
|
|
|
unsigned int trustee_state; /* L: trustee state */
|
|
|
wait_queue_head_t trustee_wait; /* trustee wait */
|
|
@@ -1314,13 +1320,37 @@ __acquires(&gcwq->lock)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+struct idle_rebind {
|
|
|
+ int cnt; /* # workers to be rebound */
|
|
|
+ struct completion done; /* all workers rebound */
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * Rebind an idle @worker to its CPU. During CPU onlining, this has to
|
|
|
+ * happen synchronously for idle workers. worker_thread() will test
|
|
|
+ * %WORKER_REBIND before leaving idle and call this function.
|
|
|
+ */
|
|
|
+static void idle_worker_rebind(struct worker *worker)
|
|
|
+{
|
|
|
+ struct global_cwq *gcwq = worker->pool->gcwq;
|
|
|
+
|
|
|
+ /* CPU must be online at this point */
|
|
|
+ WARN_ON(!worker_maybe_bind_and_lock(worker));
|
|
|
+ if (!--worker->idle_rebind->cnt)
|
|
|
+ complete(&worker->idle_rebind->done);
|
|
|
+ spin_unlock_irq(&worker->pool->gcwq->lock);
|
|
|
+
|
|
|
+ /* we did our part, wait for rebind_workers() to finish up */
|
|
|
+ wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
- * Function for worker->rebind_work used to rebind unbound busy workers to
|
|
|
+ * Function for @worker->rebind.work used to rebind unbound busy workers to
|
|
|
* the associated cpu which is coming back online. This is scheduled by
|
|
|
* cpu up but can race with other cpu hotplug operations and may be
|
|
|
* executed twice without intervening cpu down.
|
|
|
*/
|
|
|
-static void worker_rebind_fn(struct work_struct *work)
|
|
|
+static void busy_worker_rebind_fn(struct work_struct *work)
|
|
|
{
|
|
|
struct worker *worker = container_of(work, struct worker, rebind_work);
|
|
|
struct global_cwq *gcwq = worker->pool->gcwq;
|
|
@@ -1331,6 +1361,112 @@ static void worker_rebind_fn(struct work_struct *work)
|
|
|
spin_unlock_irq(&gcwq->lock);
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * rebind_workers - rebind all workers of a gcwq to the associated CPU
|
|
|
+ * @gcwq: gcwq of interest
|
|
|
+ *
|
|
|
+ * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding
|
|
|
+ * is different for idle and busy ones.
|
|
|
+ *
|
|
|
+ * The idle ones should be rebound synchronously and idle rebinding should
|
|
|
+ * be complete before any worker starts executing work items with
|
|
|
+ * concurrency management enabled; otherwise, scheduler may oops trying to
|
|
|
+ * wake up non-local idle worker from wq_worker_sleeping().
|
|
|
+ *
|
|
|
+ * This is achieved by repeatedly requesting rebinding until all idle
|
|
|
+ * workers are known to have been rebound under @gcwq->lock and holding all
|
|
|
+ * idle workers from becoming busy until idle rebinding is complete.
|
|
|
+ *
|
|
|
+ * Once idle workers are rebound, busy workers can be rebound as they
|
|
|
+ * finish executing their current work items. Queueing the rebind work at
|
|
|
+ * the head of their scheduled lists is enough. Note that nr_running will
|
|
|
+ * be properbly bumped as busy workers rebind.
|
|
|
+ *
|
|
|
+ * On return, all workers are guaranteed to either be bound or have rebind
|
|
|
+ * work item scheduled.
|
|
|
+ */
|
|
|
+static void rebind_workers(struct global_cwq *gcwq)
|
|
|
+ __releases(&gcwq->lock) __acquires(&gcwq->lock)
|
|
|
+{
|
|
|
+ struct idle_rebind idle_rebind;
|
|
|
+ struct worker_pool *pool;
|
|
|
+ struct worker *worker;
|
|
|
+ struct hlist_node *pos;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ lockdep_assert_held(&gcwq->lock);
|
|
|
+
|
|
|
+ for_each_worker_pool(pool, gcwq)
|
|
|
+ lockdep_assert_held(&pool->manager_mutex);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Rebind idle workers. Interlocked both ways. We wait for
|
|
|
+ * workers to rebind via @idle_rebind.done. Workers will wait for
|
|
|
+ * us to finish up by watching %WORKER_REBIND.
|
|
|
+ */
|
|
|
+ init_completion(&idle_rebind.done);
|
|
|
+retry:
|
|
|
+ idle_rebind.cnt = 1;
|
|
|
+ INIT_COMPLETION(idle_rebind.done);
|
|
|
+
|
|
|
+ /* set REBIND and kick idle ones, we'll wait for these later */
|
|
|
+ for_each_worker_pool(pool, gcwq) {
|
|
|
+ list_for_each_entry(worker, &pool->idle_list, entry) {
|
|
|
+ if (worker->flags & WORKER_REBIND)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /* morph UNBOUND to REBIND */
|
|
|
+ worker->flags &= ~WORKER_UNBOUND;
|
|
|
+ worker->flags |= WORKER_REBIND;
|
|
|
+
|
|
|
+ idle_rebind.cnt++;
|
|
|
+ worker->idle_rebind = &idle_rebind;
|
|
|
+
|
|
|
+ /* worker_thread() will call idle_worker_rebind() */
|
|
|
+ wake_up_process(worker->task);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (--idle_rebind.cnt) {
|
|
|
+ spin_unlock_irq(&gcwq->lock);
|
|
|
+ wait_for_completion(&idle_rebind.done);
|
|
|
+ spin_lock_irq(&gcwq->lock);
|
|
|
+ /* busy ones might have become idle while waiting, retry */
|
|
|
+ goto retry;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * All idle workers are rebound and waiting for %WORKER_REBIND to
|
|
|
+ * be cleared inside idle_worker_rebind(). Clear and release.
|
|
|
+ * Clearing %WORKER_REBIND from this foreign context is safe
|
|
|
+ * because these workers are still guaranteed to be idle.
|
|
|
+ */
|
|
|
+ for_each_worker_pool(pool, gcwq)
|
|
|
+ list_for_each_entry(worker, &pool->idle_list, entry)
|
|
|
+ worker->flags &= ~WORKER_REBIND;
|
|
|
+
|
|
|
+ wake_up_all(&gcwq->rebind_hold);
|
|
|
+
|
|
|
+ /* rebind busy workers */
|
|
|
+ for_each_busy_worker(worker, i, pos, gcwq) {
|
|
|
+ struct work_struct *rebind_work = &worker->rebind_work;
|
|
|
+
|
|
|
+ /* morph UNBOUND to REBIND */
|
|
|
+ worker->flags &= ~WORKER_UNBOUND;
|
|
|
+ worker->flags |= WORKER_REBIND;
|
|
|
+
|
|
|
+ if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
|
|
|
+ work_data_bits(rebind_work)))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /* wq doesn't matter, use the default one */
|
|
|
+ debug_work_activate(rebind_work);
|
|
|
+ insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
|
|
|
+ worker->scheduled.next,
|
|
|
+ work_color_to_flags(WORK_NO_COLOR));
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static struct worker *alloc_worker(void)
|
|
|
{
|
|
|
struct worker *worker;
|
|
@@ -1339,7 +1475,7 @@ static struct worker *alloc_worker(void)
|
|
|
if (worker) {
|
|
|
INIT_LIST_HEAD(&worker->entry);
|
|
|
INIT_LIST_HEAD(&worker->scheduled);
|
|
|
- INIT_WORK(&worker->rebind_work, worker_rebind_fn);
|
|
|
+ INIT_WORK(&worker->rebind_work, busy_worker_rebind_fn);
|
|
|
/* on creation a worker is in !idle && prep state */
|
|
|
worker->flags = WORKER_PREP;
|
|
|
}
|
|
@@ -1829,6 +1965,9 @@ __acquires(&gcwq->lock)
|
|
|
|
|
|
lockdep_copy_map(&lockdep_map, &work->lockdep_map);
|
|
|
#endif
|
|
|
+ WARN_ON_ONCE(!(worker->flags & (WORKER_UNBOUND | WORKER_REBIND)) &&
|
|
|
+ raw_smp_processor_id() != gcwq->cpu);
|
|
|
+
|
|
|
/*
|
|
|
* A single work shouldn't be executed concurrently by
|
|
|
* multiple workers on a single cpu. Check whether anyone is
|
|
@@ -1946,11 +2085,20 @@ static int worker_thread(void *__worker)
|
|
|
woke_up:
|
|
|
spin_lock_irq(&gcwq->lock);
|
|
|
|
|
|
- /* DIE can be set only while we're idle, checking here is enough */
|
|
|
- if (worker->flags & WORKER_DIE) {
|
|
|
+ /*
|
|
|
+ * DIE can be set only while idle and REBIND set while busy has
|
|
|
+ * @worker->rebind_work scheduled. Checking here is enough.
|
|
|
+ */
|
|
|
+ if (unlikely(worker->flags & (WORKER_REBIND | WORKER_DIE))) {
|
|
|
spin_unlock_irq(&gcwq->lock);
|
|
|
- worker->task->flags &= ~PF_WQ_WORKER;
|
|
|
- return 0;
|
|
|
+
|
|
|
+ if (worker->flags & WORKER_DIE) {
|
|
|
+ worker->task->flags &= ~PF_WQ_WORKER;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ idle_worker_rebind(worker);
|
|
|
+ goto woke_up;
|
|
|
}
|
|
|
|
|
|
worker_leave_idle(worker);
|
|
@@ -3468,42 +3616,6 @@ static int __cpuinit trustee_thread(void *__gcwq)
|
|
|
}
|
|
|
} while (i && rc >= 0);
|
|
|
|
|
|
- /*
|
|
|
- * At this point, either draining has completed and no worker
|
|
|
- * is left, or cpu down has been canceled or the cpu is being
|
|
|
- * brought back up. There shouldn't be any idle one left.
|
|
|
- * Tell the remaining busy ones to rebind once it finishes the
|
|
|
- * currently scheduled works by scheduling the rebind_work.
|
|
|
- */
|
|
|
- for_each_worker_pool(pool, gcwq)
|
|
|
- WARN_ON(!list_empty(&pool->idle_list));
|
|
|
-
|
|
|
- /* if we're reassociating, clear DISASSOCIATED */
|
|
|
- if (gcwq->trustee_state == TRUSTEE_RELEASE)
|
|
|
- gcwq->flags &= ~GCWQ_DISASSOCIATED;
|
|
|
-
|
|
|
- for_each_busy_worker(worker, i, pos, gcwq) {
|
|
|
- struct work_struct *rebind_work = &worker->rebind_work;
|
|
|
-
|
|
|
- /*
|
|
|
- * Rebind_work may race with future cpu hotplug
|
|
|
- * operations. Use a separate flag to mark that
|
|
|
- * rebinding is scheduled.
|
|
|
- */
|
|
|
- worker->flags |= WORKER_REBIND;
|
|
|
- worker->flags &= ~WORKER_UNBOUND;
|
|
|
-
|
|
|
- /* queue rebind_work, wq doesn't matter, use the default one */
|
|
|
- if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
|
|
|
- work_data_bits(rebind_work)))
|
|
|
- continue;
|
|
|
-
|
|
|
- debug_work_activate(rebind_work);
|
|
|
- insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
|
|
|
- worker->scheduled.next,
|
|
|
- work_color_to_flags(WORK_NO_COLOR));
|
|
|
- }
|
|
|
-
|
|
|
gcwq_release_management(gcwq);
|
|
|
|
|
|
/* notify completion */
|
|
@@ -3609,13 +3721,16 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
|
|
|
wait_trustee_state(gcwq, TRUSTEE_DONE);
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
- * Either DISASSOCIATED is already cleared or no worker is
|
|
|
- * left on the gcwq. Safe to clear DISASSOCIATED without
|
|
|
- * claiming managers.
|
|
|
- */
|
|
|
+ spin_unlock_irq(&gcwq->lock);
|
|
|
+ gcwq_claim_management(gcwq);
|
|
|
+ spin_lock_irq(&gcwq->lock);
|
|
|
+
|
|
|
gcwq->flags &= ~GCWQ_DISASSOCIATED;
|
|
|
|
|
|
+ rebind_workers(gcwq);
|
|
|
+
|
|
|
+ gcwq_release_management(gcwq);
|
|
|
+
|
|
|
/*
|
|
|
* Trustee is done and there might be no worker left.
|
|
|
* Put the first_idle in and request a real manager to
|
|
@@ -3910,6 +4025,8 @@ static int __init init_workqueues(void)
|
|
|
ida_init(&pool->worker_ida);
|
|
|
}
|
|
|
|
|
|
+ init_waitqueue_head(&gcwq->rebind_hold);
|
|
|
+
|
|
|
gcwq->trustee_state = TRUSTEE_DONE;
|
|
|
init_waitqueue_head(&gcwq->trustee_wait);
|
|
|
}
|