12 years ago · df2d5ae499
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -257,6 +257,7 @@ struct workqueue_struct {
 
				 	/* hot fields used during command issue, aligned to cacheline */
			
 
				 	unsigned int		flags ____cacheline_aligned; /* WQ: WQ_* flags */
			
 
				 	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
			
 
				+	struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */
			
 
				 };
			
 
				 
			
 
				 static struct kmem_cache *pwq_cache;
			
@@ -525,6 +526,22 @@ static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
 
				 				      pwqs_node);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
			
 
				+ * @wq: the target workqueue
			
 
				+ * @node: the node ID
			
 
				+ *
			
 
				+ * This must be called either with pwq_lock held or sched RCU read locked.
			
 
				+ * If the pwq needs to be used beyond the locking in effect, the caller is
			
 
				+ * responsible for guaranteeing that the pwq stays online.
			
 
				+ */
			
 
				+static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
			
 
				+						  int node)
			
 
				+{
			
 
				+	assert_rcu_or_wq_mutex(wq);
			
 
				+	return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
			
 
				+}
			
 
				+
			
 
				 static unsigned int work_color_to_flags(int color)
			
 
				 {
			
 
				 	return color << WORK_STRUCT_COLOR_SHIFT;
			
@@ -1278,14 +1295,14 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 
				 	    WARN_ON_ONCE(!is_chained_work(wq)))
			
 
				 		return;
			
 
				 retry:
			
 
				+	if (req_cpu == WORK_CPU_UNBOUND)
			
 
				+		cpu = raw_smp_processor_id();
			
 
				+
			
 
				 	/* pwq which will be used unless @work is executing elsewhere */
			
 
				-	if (!(wq->flags & WQ_UNBOUND)) {
			
 
				-		if (cpu == WORK_CPU_UNBOUND)
			
 
				-			cpu = raw_smp_processor_id();
			
 
				+	if (!(wq->flags & WQ_UNBOUND))
			
 
				 		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
			
 
				-	} else {
			
 
				-		pwq = first_pwq(wq);
			
 
				-	}
			
 
				+	else
			
 
				+		pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
			
 
				 
			
 
				 	/*
			
 
				 	 * If @work was previously on a different pool, it might still be
			
@@ -1315,8 +1332,8 @@ retry:
 
				 	 * pwq is determined and locked.  For unbound pools, we could have
			
 
				 	 * raced with pwq release and it could already be dead.  If its
			
 
				 	 * refcnt is zero, repeat pwq selection.  Note that pwqs never die
			
 
				-	 * without another pwq replacing it as the first pwq or while a
			
 
				-	 * work item is executing on it, so the retying is guaranteed to
			
 
				+	 * without another pwq replacing it in the numa_pwq_tbl or while
			
 
				+	 * work items are executing on it, so the retrying is guaranteed to
			
 
				 	 * make forward-progress.
			
 
				 	 */
			
 
				 	if (unlikely(!pwq->refcnt)) {
			
@@ -3614,6 +3631,8 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 
				 			      struct worker_pool *pool,
			
 
				 			      struct pool_workqueue **p_last_pwq)
			
 
				 {
			
 
				+	int node;
			
 
				+
			
 
				 	BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
			
 
				 
			
 
				 	pwq->pool = pool;
			
@@ -3640,8 +3659,11 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 
				 	/* link in @pwq */
			
 
				 	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
			
 
				 
			
 
				-	if (wq->flags & WQ_UNBOUND)
			
 
				+	if (wq->flags & WQ_UNBOUND) {
			
 
				 		copy_workqueue_attrs(wq->unbound_attrs, pool->attrs);
			
 
				+		for_each_node(node)
			
 
				+			rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
			
 
				+	}
			
 
				 
			
 
				 	mutex_unlock(&wq->mutex);
			
 
				 }
			
@@ -3761,12 +3783,16 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 
				 					       struct lock_class_key *key,
			
 
				 					       const char *lock_name, ...)
			
 
				 {
			
 
				+	size_t tbl_size = 0;
			
 
				 	va_list args;
			
 
				 	struct workqueue_struct *wq;
			
 
				 	struct pool_workqueue *pwq;
			
 
				 
			
 
				 	/* allocate wq and format name */
			
 
				-	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
			
 
				+	if (flags & WQ_UNBOUND)
			
 
				+		tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
			
 
				+
			
 
				+	wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
			
 
				 	if (!wq)
			
 
				 		return NULL;
			
 
				 
			
@@ -3994,7 +4020,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
 
				 	if (!(wq->flags & WQ_UNBOUND))
			
 
				 		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
			
 
				 	else
			
 
				-		pwq = first_pwq(wq);
			
 
				+		pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
			
 
				 
			
 
				 	ret = !list_empty(&pwq->delayed_works);
			
 
				 	rcu_read_unlock_sched();