|
@@ -1026,6 +1026,31 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
|
|
mutex_unlock(&callback_mutex);
|
|
mutex_unlock(&callback_mutex);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Rebind task's vmas to cpuset's new mems_allowed, and migrate pages to new
|
|
|
|
+ * nodes if memory_migrate flag is set. Called with cgroup_mutex held.
|
|
|
|
+ */
|
|
|
|
+static void cpuset_change_nodemask(struct task_struct *p,
|
|
|
|
+ struct cgroup_scanner *scan)
|
|
|
|
+{
|
|
|
|
+ struct mm_struct *mm;
|
|
|
|
+ struct cpuset *cs;
|
|
|
|
+ int migrate;
|
|
|
|
+ const nodemask_t *oldmem = scan->data;
|
|
|
|
+
|
|
|
|
+ mm = get_task_mm(p);
|
|
|
|
+ if (!mm)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ cs = cgroup_cs(scan->cg);
|
|
|
|
+ migrate = is_memory_migrate(cs);
|
|
|
|
+
|
|
|
|
+ mpol_rebind_mm(mm, &cs->mems_allowed);
|
|
|
|
+ if (migrate)
|
|
|
|
+ cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
|
|
|
|
+ mmput(mm);
|
|
|
|
+}
|
|
|
|
+
|
|
static void *cpuset_being_rebound;
|
|
static void *cpuset_being_rebound;
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -1038,88 +1063,32 @@ static void *cpuset_being_rebound;
|
|
*/
|
|
*/
|
|
static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
|
|
static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
|
|
{
|
|
{
|
|
- struct task_struct *p;
|
|
|
|
- struct mm_struct **mmarray;
|
|
|
|
- int i, n, ntasks;
|
|
|
|
- int migrate;
|
|
|
|
- int fudge;
|
|
|
|
- struct cgroup_iter it;
|
|
|
|
int retval;
|
|
int retval;
|
|
|
|
+ struct cgroup_scanner scan;
|
|
|
|
|
|
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
|
|
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
|
|
|
|
|
|
- fudge = 10; /* spare mmarray[] slots */
|
|
|
|
- fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
|
|
|
|
- retval = -ENOMEM;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Allocate mmarray[] to hold mm reference for each task
|
|
|
|
- * in cpuset cs. Can't kmalloc GFP_KERNEL while holding
|
|
|
|
- * tasklist_lock. We could use GFP_ATOMIC, but with a
|
|
|
|
- * few more lines of code, we can retry until we get a big
|
|
|
|
- * enough mmarray[] w/o using GFP_ATOMIC.
|
|
|
|
- */
|
|
|
|
- while (1) {
|
|
|
|
- ntasks = cgroup_task_count(cs->css.cgroup); /* guess */
|
|
|
|
- ntasks += fudge;
|
|
|
|
- mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
|
|
|
|
- if (!mmarray)
|
|
|
|
- goto done;
|
|
|
|
- read_lock(&tasklist_lock); /* block fork */
|
|
|
|
- if (cgroup_task_count(cs->css.cgroup) <= ntasks)
|
|
|
|
- break; /* got enough */
|
|
|
|
- read_unlock(&tasklist_lock); /* try again */
|
|
|
|
- kfree(mmarray);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- n = 0;
|
|
|
|
-
|
|
|
|
- /* Load up mmarray[] with mm reference for each task in cpuset. */
|
|
|
|
- cgroup_iter_start(cs->css.cgroup, &it);
|
|
|
|
- while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
|
|
|
|
- struct mm_struct *mm;
|
|
|
|
-
|
|
|
|
- if (n >= ntasks) {
|
|
|
|
- printk(KERN_WARNING
|
|
|
|
- "Cpuset mempolicy rebind incomplete.\n");
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
- mm = get_task_mm(p);
|
|
|
|
- if (!mm)
|
|
|
|
- continue;
|
|
|
|
- mmarray[n++] = mm;
|
|
|
|
- }
|
|
|
|
- cgroup_iter_end(cs->css.cgroup, &it);
|
|
|
|
- read_unlock(&tasklist_lock);
|
|
|
|
|
|
+ scan.cg = cs->css.cgroup;
|
|
|
|
+ scan.test_task = NULL;
|
|
|
|
+ scan.process_task = cpuset_change_nodemask;
|
|
|
|
+ scan.heap = NULL;
|
|
|
|
+ scan.data = (nodemask_t *)oldmem;
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Now that we've dropped the tasklist spinlock, we can
|
|
|
|
- * rebind the vma mempolicies of each mm in mmarray[] to their
|
|
|
|
- * new cpuset, and release that mm. The mpol_rebind_mm()
|
|
|
|
- * call takes mmap_sem, which we couldn't take while holding
|
|
|
|
- * tasklist_lock. Forks can happen again now - the mpol_dup()
|
|
|
|
- * cpuset_being_rebound check will catch such forks, and rebind
|
|
|
|
- * their vma mempolicies too. Because we still hold the global
|
|
|
|
- * cgroup_mutex, we know that no other rebind effort will
|
|
|
|
- * be contending for the global variable cpuset_being_rebound.
|
|
|
|
|
|
+ * The mpol_rebind_mm() call takes mmap_sem, which we couldn't
|
|
|
|
+ * take while holding tasklist_lock. Forks can happen - the
|
|
|
|
+ * mpol_dup() cpuset_being_rebound check will catch such forks,
|
|
|
|
+ * and rebind their vma mempolicies too. Because we still hold
|
|
|
|
+ * the global cgroup_mutex, we know that no other rebind effort
|
|
|
|
+ * will be contending for the global variable cpuset_being_rebound.
|
|
* It's ok if we rebind the same mm twice; mpol_rebind_mm()
|
|
* It's ok if we rebind the same mm twice; mpol_rebind_mm()
|
|
* is idempotent. Also migrate pages in each mm to new nodes.
|
|
* is idempotent. Also migrate pages in each mm to new nodes.
|
|
*/
|
|
*/
|
|
- migrate = is_memory_migrate(cs);
|
|
|
|
- for (i = 0; i < n; i++) {
|
|
|
|
- struct mm_struct *mm = mmarray[i];
|
|
|
|
-
|
|
|
|
- mpol_rebind_mm(mm, &cs->mems_allowed);
|
|
|
|
- if (migrate)
|
|
|
|
- cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
|
|
|
|
- mmput(mm);
|
|
|
|
- }
|
|
|
|
|
|
+ retval = cgroup_scan_tasks(&scan);
|
|
|
|
|
|
/* We're done rebinding vmas to this cpuset's new mems_allowed. */
|
|
/* We're done rebinding vmas to this cpuset's new mems_allowed. */
|
|
- kfree(mmarray);
|
|
|
|
cpuset_being_rebound = NULL;
|
|
cpuset_being_rebound = NULL;
|
|
- retval = 0;
|
|
|
|
-done:
|
|
|
|
|
|
+
|
|
return retval;
|
|
return retval;
|
|
}
|
|
}
|
|
|
|
|