16 years ago · 3b6766fe66
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1026,6 +1026,31 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 
															 	mutex_unlock(&callback_mutex);
														
 
															 }
														
 
															+/*
														
 
															+ * Rebind task's vmas to cpuset's new mems_allowed, and migrate pages to new
														
 
															+ * nodes if memory_migrate flag is set. Called with cgroup_mutex held.
														
 
															+ */
														
 
															+static void cpuset_change_nodemask(struct task_struct *p,
														
 
															+				   struct cgroup_scanner *scan)
														
 
															+{
														
 
															+	struct mm_struct *mm;
														
 
															+	struct cpuset *cs;
														
 
															+	int migrate;
														
 
															+	const nodemask_t *oldmem = scan->data;
														
 
															+
														
 
															+	mm = get_task_mm(p);
														
 
															+	if (!mm)
														
 
															+		return;
														
 
															+
														
 
															+	cs = cgroup_cs(scan->cg);
														
 
															+	migrate = is_memory_migrate(cs);
														
 
															+
														
 
															+	mpol_rebind_mm(mm, &cs->mems_allowed);
														
 
															+	if (migrate)
														
 
															+		cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
														
 
															+	mmput(mm);
														
 
															+}
														
 
															+
														
 
															 static void *cpuset_being_rebound;
														
 
															 /**
														
@@ -1038,88 +1063,32 @@ static void *cpuset_being_rebound;
 
															  */
														
 
															 static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
														
 
															 {
														
 
															-	struct task_struct *p;
														
 
															-	struct mm_struct **mmarray;
														
 
															-	int i, n, ntasks;
														
 
															-	int migrate;
														
 
															-	int fudge;
														
 
															-	struct cgroup_iter it;
														
 
															 	int retval;
														
 
															+	struct cgroup_scanner scan;
														
 
															 	cpuset_being_rebound = cs;		/* causes mpol_dup() rebind */
														
 
															-	fudge = 10;				/* spare mmarray[] slots */
														
 
															-	fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
														
 
															-	retval = -ENOMEM;
														
 
															-
														
 
															-	/*
														
 
															-	 * Allocate mmarray[] to hold mm reference for each task
														
 
															-	 * in cpuset cs.  Can't kmalloc GFP_KERNEL while holding
														
 
															-	 * tasklist_lock.  We could use GFP_ATOMIC, but with a
														
 
															-	 * few more lines of code, we can retry until we get a big
														
 
															-	 * enough mmarray[] w/o using GFP_ATOMIC.
														
 
															-	 */
														
 
															-	while (1) {
														
 
															-		ntasks = cgroup_task_count(cs->css.cgroup);  /* guess */
														
 
															-		ntasks += fudge;
														
 
															-		mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
														
 
															-		if (!mmarray)
														
 
															-			goto done;
														
 
															-		read_lock(&tasklist_lock);		/* block fork */
														
 
															-		if (cgroup_task_count(cs->css.cgroup) <= ntasks)
														
 
															-			break;				/* got enough */
														
 
															-		read_unlock(&tasklist_lock);		/* try again */
														
 
															-		kfree(mmarray);
														
 
															-	}
														
 
															-
														
 
															-	n = 0;
														
 
															-
														
 
															-	/* Load up mmarray[] with mm reference for each task in cpuset. */
														
 
															-	cgroup_iter_start(cs->css.cgroup, &it);
														
 
															-	while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
														
 
															-		struct mm_struct *mm;
														
 
															-
														
 
															-		if (n >= ntasks) {
														
 
															-			printk(KERN_WARNING
														
 
															-				"Cpuset mempolicy rebind incomplete.\n");
														
 
															-			break;
														
 
															-		}
														
 
															-		mm = get_task_mm(p);
														
 
															-		if (!mm)
														
 
															-			continue;
														
 
															-		mmarray[n++] = mm;
														
 
															-	}
														
 
															-	cgroup_iter_end(cs->css.cgroup, &it);
														
 
															-	read_unlock(&tasklist_lock);
														
 
															+	scan.cg = cs->css.cgroup;
														
 
															+	scan.test_task = NULL;
														
 
															+	scan.process_task = cpuset_change_nodemask;
														
 
															+	scan.heap = NULL;
														
 
															+	scan.data = (nodemask_t *)oldmem;
														
 
															 	/*
														
 
															-	 * Now that we've dropped the tasklist spinlock, we can
														
 
															-	 * rebind the vma mempolicies of each mm in mmarray[] to their
														
 
															-	 * new cpuset, and release that mm.  The mpol_rebind_mm()
														
 
															-	 * call takes mmap_sem, which we couldn't take while holding
														
 
															-	 * tasklist_lock.  Forks can happen again now - the mpol_dup()
														
 
															-	 * cpuset_being_rebound check will catch such forks, and rebind
														
 
															-	 * their vma mempolicies too.  Because we still hold the global
														
 
															-	 * cgroup_mutex, we know that no other rebind effort will
														
 
															-	 * be contending for the global variable cpuset_being_rebound.
														
 
															+	 * The mpol_rebind_mm() call takes mmap_sem, which we couldn't
														
 
															+	 * take while holding tasklist_lock.  Forks can happen - the
														
 
															+	 * mpol_dup() cpuset_being_rebound check will catch such forks,
														
 
															+	 * and rebind their vma mempolicies too.  Because we still hold
														
 
															+	 * the global cgroup_mutex, we know that no other rebind effort
														
 
															+	 * will be contending for the global variable cpuset_being_rebound.
														
 
															 	 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
														
 
															 	 * is idempotent.  Also migrate pages in each mm to new nodes.
														
 
															 	 */
														
 
															-	migrate = is_memory_migrate(cs);
														
 
															-	for (i = 0; i < n; i++) {
														
 
															-		struct mm_struct *mm = mmarray[i];
														
 
															-
														
 
															-		mpol_rebind_mm(mm, &cs->mems_allowed);
														
 
															-		if (migrate)
														
 
															-			cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
														
 
															-		mmput(mm);
														
 
															-	}
														
 
															+	retval = cgroup_scan_tasks(&scan);
														
 
															 	/* We're done rebinding vmas to this cpuset's new mems_allowed. */
														
 
															-	kfree(mmarray);
														
 
															 	cpuset_being_rebound = NULL;
														
 
															-	retval = 0;
														
 
															-done:
														
 
															+
														
 
															 	return retval;
														
 
															 }