|
@@ -66,6 +66,7 @@ enum {
|
|
|
|
|
|
/* pool flags */
|
|
|
POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
|
|
|
+ POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
|
|
|
|
|
|
/* worker flags */
|
|
|
WORKER_STARTED = 1 << 0, /* started */
|
|
@@ -682,7 +683,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
|
|
|
/* Do we have too many workers and should some go away? */
|
|
|
static bool too_many_workers(struct worker_pool *pool)
|
|
|
{
|
|
|
- bool managing = mutex_is_locked(&pool->manager_mutex);
|
|
|
+ bool managing = pool->flags & POOL_MANAGING_WORKERS;
|
|
|
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
|
|
|
int nr_busy = pool->nr_workers - nr_idle;
|
|
|
|
|
@@ -1632,6 +1633,15 @@ static void idle_worker_rebind(struct worker *worker)
|
|
|
|
|
|
/* we did our part, wait for rebind_workers() to finish up */
|
|
|
wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
|
|
|
+
|
|
|
+ /*
|
|
|
+ * rebind_workers() shouldn't finish until all workers passed the
|
|
|
+ * above WORKER_REBIND wait. Tell it when done.
|
|
|
+ */
|
|
|
+ spin_lock_irq(&worker->pool->gcwq->lock);
|
|
|
+ if (!--worker->idle_rebind->cnt)
|
|
|
+ complete(&worker->idle_rebind->done);
|
|
|
+ spin_unlock_irq(&worker->pool->gcwq->lock);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1645,8 +1655,16 @@ static void busy_worker_rebind_fn(struct work_struct *work)
|
|
|
struct worker *worker = container_of(work, struct worker, rebind_work);
|
|
|
struct global_cwq *gcwq = worker->pool->gcwq;
|
|
|
|
|
|
- if (worker_maybe_bind_and_lock(worker))
|
|
|
- worker_clr_flags(worker, WORKER_REBIND);
|
|
|
+ worker_maybe_bind_and_lock(worker);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * %WORKER_REBIND must be cleared even if the above binding failed;
|
|
|
+ * otherwise, we may confuse the next CPU_UP cycle or oops / get
|
|
|
+ * stuck by calling idle_worker_rebind() prematurely. If CPU went
|
|
|
+ * down again inbetween, %WORKER_UNBOUND would be set, so clearing
|
|
|
+ * %WORKER_REBIND is always safe.
|
|
|
+ */
|
|
|
+ worker_clr_flags(worker, WORKER_REBIND);
|
|
|
|
|
|
spin_unlock_irq(&gcwq->lock);
|
|
|
}
|
|
@@ -1702,12 +1720,15 @@ retry:
|
|
|
/* set REBIND and kick idle ones, we'll wait for these later */
|
|
|
for_each_worker_pool(pool, gcwq) {
|
|
|
list_for_each_entry(worker, &pool->idle_list, entry) {
|
|
|
+ unsigned long worker_flags = worker->flags;
|
|
|
+
|
|
|
if (worker->flags & WORKER_REBIND)
|
|
|
continue;
|
|
|
|
|
|
- /* morph UNBOUND to REBIND */
|
|
|
- worker->flags &= ~WORKER_UNBOUND;
|
|
|
- worker->flags |= WORKER_REBIND;
|
|
|
+ /* morph UNBOUND to REBIND atomically */
|
|
|
+ worker_flags &= ~WORKER_UNBOUND;
|
|
|
+ worker_flags |= WORKER_REBIND;
|
|
|
+ ACCESS_ONCE(worker->flags) = worker_flags;
|
|
|
|
|
|
idle_rebind.cnt++;
|
|
|
worker->idle_rebind = &idle_rebind;
|
|
@@ -1725,26 +1746,16 @@ retry:
|
|
|
goto retry;
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
- * All idle workers are rebound and waiting for %WORKER_REBIND to
|
|
|
- * be cleared inside idle_worker_rebind(). Clear and release.
|
|
|
- * Clearing %WORKER_REBIND from this foreign context is safe
|
|
|
- * because these workers are still guaranteed to be idle.
|
|
|
- */
|
|
|
- for_each_worker_pool(pool, gcwq)
|
|
|
- list_for_each_entry(worker, &pool->idle_list, entry)
|
|
|
- worker->flags &= ~WORKER_REBIND;
|
|
|
-
|
|
|
- wake_up_all(&gcwq->rebind_hold);
|
|
|
-
|
|
|
- /* rebind busy workers */
|
|
|
+ /* all idle workers are rebound, rebind busy workers */
|
|
|
for_each_busy_worker(worker, i, pos, gcwq) {
|
|
|
+ unsigned long worker_flags = worker->flags;
|
|
|
struct work_struct *rebind_work = &worker->rebind_work;
|
|
|
struct workqueue_struct *wq;
|
|
|
|
|
|
- /* morph UNBOUND to REBIND */
|
|
|
- worker->flags &= ~WORKER_UNBOUND;
|
|
|
- worker->flags |= WORKER_REBIND;
|
|
|
+ /* morph UNBOUND to REBIND atomically */
|
|
|
+ worker_flags &= ~WORKER_UNBOUND;
|
|
|
+ worker_flags |= WORKER_REBIND;
|
|
|
+ ACCESS_ONCE(worker->flags) = worker_flags;
|
|
|
|
|
|
if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
|
|
|
work_data_bits(rebind_work)))
|
|
@@ -1765,6 +1776,34 @@ retry:
|
|
|
worker->scheduled.next,
|
|
|
work_color_to_flags(WORK_NO_COLOR));
|
|
|
}
|
|
|
+
|
|
|
+ /*
|
|
|
+ * All idle workers are rebound and waiting for %WORKER_REBIND to
|
|
|
+ * be cleared inside idle_worker_rebind(). Clear and release.
|
|
|
+ * Clearing %WORKER_REBIND from this foreign context is safe
|
|
|
+ * because these workers are still guaranteed to be idle.
|
|
|
+ *
|
|
|
+ * We need to make sure all idle workers passed WORKER_REBIND wait
|
|
|
+ * in idle_worker_rebind() before returning; otherwise, workers can
|
|
|
+ * get stuck at the wait if hotplug cycle repeats.
|
|
|
+ */
|
|
|
+ idle_rebind.cnt = 1;
|
|
|
+ INIT_COMPLETION(idle_rebind.done);
|
|
|
+
|
|
|
+ for_each_worker_pool(pool, gcwq) {
|
|
|
+ list_for_each_entry(worker, &pool->idle_list, entry) {
|
|
|
+ worker->flags &= ~WORKER_REBIND;
|
|
|
+ idle_rebind.cnt++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ wake_up_all(&gcwq->rebind_hold);
|
|
|
+
|
|
|
+ if (--idle_rebind.cnt) {
|
|
|
+ spin_unlock_irq(&gcwq->lock);
|
|
|
+ wait_for_completion(&idle_rebind.done);
|
|
|
+ spin_lock_irq(&gcwq->lock);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
static struct worker *alloc_worker(void)
|
|
@@ -2110,9 +2149,45 @@ static bool manage_workers(struct worker *worker)
|
|
|
struct worker_pool *pool = worker->pool;
|
|
|
bool ret = false;
|
|
|
|
|
|
- if (!mutex_trylock(&pool->manager_mutex))
|
|
|
+ if (pool->flags & POOL_MANAGING_WORKERS)
|
|
|
return ret;
|
|
|
|
|
|
+ pool->flags |= POOL_MANAGING_WORKERS;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * To simplify both worker management and CPU hotplug, hold off
|
|
|
+ * management while hotplug is in progress. CPU hotplug path can't
|
|
|
+ * grab %POOL_MANAGING_WORKERS to achieve this because that can
|
|
|
+ * lead to idle worker depletion (all become busy thinking someone
|
|
|
+ * else is managing) which in turn can result in deadlock under
|
|
|
+ * extreme circumstances. Use @pool->manager_mutex to synchronize
|
|
|
+ * manager against CPU hotplug.
|
|
|
+ *
|
|
|
+ * manager_mutex would always be free unless CPU hotplug is in
|
|
|
+ * progress. trylock first without dropping @gcwq->lock.
|
|
|
+ */
|
|
|
+ if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
|
|
|
+ spin_unlock_irq(&pool->gcwq->lock);
|
|
|
+ mutex_lock(&pool->manager_mutex);
|
|
|
+ /*
|
|
|
+ * CPU hotplug could have happened while we were waiting
|
|
|
+ * for manager_mutex. Hotplug itself can't handle us
|
|
|
+ * because manager isn't either on idle or busy list, and
|
|
|
+ * @gcwq's state and ours could have deviated.
|
|
|
+ *
|
|
|
+ * As hotplug is now excluded via manager_mutex, we can
|
|
|
+ * simply try to bind. It will succeed or fail depending
|
|
|
+ * on @gcwq's current state. Try it and adjust
|
|
|
+ * %WORKER_UNBOUND accordingly.
|
|
|
+ */
|
|
|
+ if (worker_maybe_bind_and_lock(worker))
|
|
|
+ worker->flags &= ~WORKER_UNBOUND;
|
|
|
+ else
|
|
|
+ worker->flags |= WORKER_UNBOUND;
|
|
|
+
|
|
|
+ ret = true;
|
|
|
+ }
|
|
|
+
|
|
|
pool->flags &= ~POOL_MANAGE_WORKERS;
|
|
|
|
|
|
/*
|
|
@@ -2122,6 +2197,7 @@ static bool manage_workers(struct worker *worker)
|
|
|
ret |= maybe_destroy_workers(pool);
|
|
|
ret |= maybe_create_worker(pool);
|
|
|
|
|
|
+ pool->flags &= ~POOL_MANAGING_WORKERS;
|
|
|
mutex_unlock(&pool->manager_mutex);
|
|
|
return ret;
|
|
|
}
|