|
@@ -818,7 +818,7 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
|
|
|
|
|
|
for_each_subsys(cgrp->root, ss)
|
|
|
if (ss->pre_destroy) {
|
|
|
- ret = ss->pre_destroy(ss, cgrp);
|
|
|
+ ret = ss->pre_destroy(cgrp);
|
|
|
if (ret)
|
|
|
break;
|
|
|
}
|
|
@@ -846,7 +846,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
|
|
|
* Release the subsystem state objects.
|
|
|
*/
|
|
|
for_each_subsys(cgrp->root, ss)
|
|
|
- ss->destroy(ss, cgrp);
|
|
|
+ ss->destroy(cgrp);
|
|
|
|
|
|
cgrp->root->number_of_cgroups--;
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
@@ -1015,7 +1015,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
|
|
|
list_move(&ss->sibling, &root->subsys_list);
|
|
|
ss->root = root;
|
|
|
if (ss->bind)
|
|
|
- ss->bind(ss, cgrp);
|
|
|
+ ss->bind(cgrp);
|
|
|
mutex_unlock(&ss->hierarchy_mutex);
|
|
|
/* refcount was already taken, and we're keeping it */
|
|
|
} else if (bit & removed_bits) {
|
|
@@ -1025,7 +1025,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
|
|
|
BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
|
|
|
mutex_lock(&ss->hierarchy_mutex);
|
|
|
if (ss->bind)
|
|
|
- ss->bind(ss, dummytop);
|
|
|
+ ss->bind(dummytop);
|
|
|
dummytop->subsys[i]->cgroup = dummytop;
|
|
|
cgrp->subsys[i] = NULL;
|
|
|
subsys[i]->root = &rootnode;
|
|
@@ -1763,6 +1763,7 @@ EXPORT_SYMBOL_GPL(cgroup_path);
|
|
|
struct task_and_cgroup {
|
|
|
struct task_struct *task;
|
|
|
struct cgroup *cgrp;
|
|
|
+ struct css_set *cg;
|
|
|
};
|
|
|
|
|
|
struct cgroup_taskset {
|
|
@@ -1843,11 +1844,10 @@ EXPORT_SYMBOL_GPL(cgroup_taskset_size);
|
|
|
* will already exist. If not set, this function might sleep, and can fail with
|
|
|
* -ENOMEM. Must be called with cgroup_mutex and threadgroup locked.
|
|
|
*/
|
|
|
-static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
|
|
|
- struct task_struct *tsk, bool guarantee)
|
|
|
+static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
|
|
|
+ struct task_struct *tsk, struct css_set *newcg)
|
|
|
{
|
|
|
struct css_set *oldcg;
|
|
|
- struct css_set *newcg;
|
|
|
|
|
|
/*
|
|
|
* We are synchronized through threadgroup_lock() against PF_EXITING
|
|
@@ -1857,23 +1857,6 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
|
|
|
WARN_ON_ONCE(tsk->flags & PF_EXITING);
|
|
|
oldcg = tsk->cgroups;
|
|
|
|
|
|
- /* locate or allocate a new css_set for this task. */
|
|
|
- if (guarantee) {
|
|
|
- /* we know the css_set we want already exists. */
|
|
|
- struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
|
|
|
- read_lock(&css_set_lock);
|
|
|
- newcg = find_existing_css_set(oldcg, cgrp, template);
|
|
|
- BUG_ON(!newcg);
|
|
|
- get_css_set(newcg);
|
|
|
- read_unlock(&css_set_lock);
|
|
|
- } else {
|
|
|
- might_sleep();
|
|
|
- /* find_css_set will give us newcg already referenced. */
|
|
|
- newcg = find_css_set(oldcg, cgrp);
|
|
|
- if (!newcg)
|
|
|
- return -ENOMEM;
|
|
|
- }
|
|
|
-
|
|
|
task_lock(tsk);
|
|
|
rcu_assign_pointer(tsk->cgroups, newcg);
|
|
|
task_unlock(tsk);
|
|
@@ -1892,7 +1875,6 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
|
|
|
put_css_set(oldcg);
|
|
|
|
|
|
set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
|
|
|
- return 0;
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -1910,6 +1892,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
|
|
|
struct cgroup *oldcgrp;
|
|
|
struct cgroupfs_root *root = cgrp->root;
|
|
|
struct cgroup_taskset tset = { };
|
|
|
+ struct css_set *newcg;
|
|
|
|
|
|
/* @tsk either already exited or can't exit until the end */
|
|
|
if (tsk->flags & PF_EXITING)
|
|
@@ -1925,7 +1908,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
|
|
|
|
|
|
for_each_subsys(root, ss) {
|
|
|
if (ss->can_attach) {
|
|
|
- retval = ss->can_attach(ss, cgrp, &tset);
|
|
|
+ retval = ss->can_attach(cgrp, &tset);
|
|
|
if (retval) {
|
|
|
/*
|
|
|
* Remember on which subsystem the can_attach()
|
|
@@ -1939,13 +1922,17 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
|
|
|
- if (retval)
|
|
|
+ newcg = find_css_set(tsk->cgroups, cgrp);
|
|
|
+ if (!newcg) {
|
|
|
+ retval = -ENOMEM;
|
|
|
goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg);
|
|
|
|
|
|
for_each_subsys(root, ss) {
|
|
|
if (ss->attach)
|
|
|
- ss->attach(ss, cgrp, &tset);
|
|
|
+ ss->attach(cgrp, &tset);
|
|
|
}
|
|
|
|
|
|
synchronize_rcu();
|
|
@@ -1967,7 +1954,7 @@ out:
|
|
|
*/
|
|
|
break;
|
|
|
if (ss->cancel_attach)
|
|
|
- ss->cancel_attach(ss, cgrp, &tset);
|
|
|
+ ss->cancel_attach(cgrp, &tset);
|
|
|
}
|
|
|
}
|
|
|
return retval;
|
|
@@ -1997,66 +1984,6 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
|
|
|
|
|
|
-/*
|
|
|
- * cgroup_attach_proc works in two stages, the first of which prefetches all
|
|
|
- * new css_sets needed (to make sure we have enough memory before committing
|
|
|
- * to the move) and stores them in a list of entries of the following type.
|
|
|
- * TODO: possible optimization: use css_set->rcu_head for chaining instead
|
|
|
- */
|
|
|
-struct cg_list_entry {
|
|
|
- struct css_set *cg;
|
|
|
- struct list_head links;
|
|
|
-};
|
|
|
-
|
|
|
-static bool css_set_check_fetched(struct cgroup *cgrp,
|
|
|
- struct task_struct *tsk, struct css_set *cg,
|
|
|
- struct list_head *newcg_list)
|
|
|
-{
|
|
|
- struct css_set *newcg;
|
|
|
- struct cg_list_entry *cg_entry;
|
|
|
- struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
|
|
|
-
|
|
|
- read_lock(&css_set_lock);
|
|
|
- newcg = find_existing_css_set(cg, cgrp, template);
|
|
|
- read_unlock(&css_set_lock);
|
|
|
-
|
|
|
- /* doesn't exist at all? */
|
|
|
- if (!newcg)
|
|
|
- return false;
|
|
|
- /* see if it's already in the list */
|
|
|
- list_for_each_entry(cg_entry, newcg_list, links)
|
|
|
- if (cg_entry->cg == newcg)
|
|
|
- return true;
|
|
|
-
|
|
|
- /* not found */
|
|
|
- return false;
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * Find the new css_set and store it in the list in preparation for moving the
|
|
|
- * given task to the given cgroup. Returns 0 or -ENOMEM.
|
|
|
- */
|
|
|
-static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
|
|
|
- struct list_head *newcg_list)
|
|
|
-{
|
|
|
- struct css_set *newcg;
|
|
|
- struct cg_list_entry *cg_entry;
|
|
|
-
|
|
|
- /* ensure a new css_set will exist for this thread */
|
|
|
- newcg = find_css_set(cg, cgrp);
|
|
|
- if (!newcg)
|
|
|
- return -ENOMEM;
|
|
|
- /* add it to the list */
|
|
|
- cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
|
|
|
- if (!cg_entry) {
|
|
|
- put_css_set(newcg);
|
|
|
- return -ENOMEM;
|
|
|
- }
|
|
|
- cg_entry->cg = newcg;
|
|
|
- list_add(&cg_entry->links, newcg_list);
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
/**
|
|
|
* cgroup_attach_proc - attach all threads in a threadgroup to a cgroup
|
|
|
* @cgrp: the cgroup to attach to
|
|
@@ -2070,20 +1997,12 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
int retval, i, group_size;
|
|
|
struct cgroup_subsys *ss, *failed_ss = NULL;
|
|
|
/* guaranteed to be initialized later, but the compiler needs this */
|
|
|
- struct css_set *oldcg;
|
|
|
struct cgroupfs_root *root = cgrp->root;
|
|
|
/* threadgroup list cursor and array */
|
|
|
struct task_struct *tsk;
|
|
|
struct task_and_cgroup *tc;
|
|
|
struct flex_array *group;
|
|
|
struct cgroup_taskset tset = { };
|
|
|
- /*
|
|
|
- * we need to make sure we have css_sets for all the tasks we're
|
|
|
- * going to move -before- we actually start moving them, so that in
|
|
|
- * case we get an ENOMEM we can bail out before making any changes.
|
|
|
- */
|
|
|
- struct list_head newcg_list;
|
|
|
- struct cg_list_entry *cg_entry, *temp_nobe;
|
|
|
|
|
|
/*
|
|
|
* step 0: in order to do expensive, possibly blocking operations for
|
|
@@ -2102,23 +2021,14 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
if (retval)
|
|
|
goto out_free_group_list;
|
|
|
|
|
|
- /* prevent changes to the threadgroup list while we take a snapshot. */
|
|
|
- read_lock(&tasklist_lock);
|
|
|
- if (!thread_group_leader(leader)) {
|
|
|
- /*
|
|
|
- * a race with de_thread from another thread's exec() may strip
|
|
|
- * us of our leadership, making while_each_thread unsafe to use
|
|
|
- * on this task. if this happens, there is no choice but to
|
|
|
- * throw this task away and try again (from cgroup_procs_write);
|
|
|
- * this is "double-double-toil-and-trouble-check locking".
|
|
|
- */
|
|
|
- read_unlock(&tasklist_lock);
|
|
|
- retval = -EAGAIN;
|
|
|
- goto out_free_group_list;
|
|
|
- }
|
|
|
-
|
|
|
tsk = leader;
|
|
|
i = 0;
|
|
|
+ /*
|
|
|
+ * Prevent freeing of tasks while we take a snapshot. Tasks that are
|
|
|
+ * already PF_EXITING could be freed from underneath us unless we
|
|
|
+ * take an rcu_read_lock.
|
|
|
+ */
|
|
|
+ rcu_read_lock();
|
|
|
do {
|
|
|
struct task_and_cgroup ent;
|
|
|
|
|
@@ -2128,24 +2038,24 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
|
|
|
/* as per above, nr_threads may decrease, but not increase. */
|
|
|
BUG_ON(i >= group_size);
|
|
|
- /*
|
|
|
- * saying GFP_ATOMIC has no effect here because we did prealloc
|
|
|
- * earlier, but it's good form to communicate our expectations.
|
|
|
- */
|
|
|
ent.task = tsk;
|
|
|
ent.cgrp = task_cgroup_from_root(tsk, root);
|
|
|
/* nothing to do if this task is already in the cgroup */
|
|
|
if (ent.cgrp == cgrp)
|
|
|
continue;
|
|
|
+ /*
|
|
|
+ * saying GFP_ATOMIC has no effect here because we did prealloc
|
|
|
+ * earlier, but it's good form to communicate our expectations.
|
|
|
+ */
|
|
|
retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
|
|
|
BUG_ON(retval != 0);
|
|
|
i++;
|
|
|
} while_each_thread(leader, tsk);
|
|
|
+ rcu_read_unlock();
|
|
|
/* remember the number of threads in the array for later. */
|
|
|
group_size = i;
|
|
|
tset.tc_array = group;
|
|
|
tset.tc_array_len = group_size;
|
|
|
- read_unlock(&tasklist_lock);
|
|
|
|
|
|
/* methods shouldn't be called if no task is actually migrating */
|
|
|
retval = 0;
|
|
@@ -2157,7 +2067,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
*/
|
|
|
for_each_subsys(root, ss) {
|
|
|
if (ss->can_attach) {
|
|
|
- retval = ss->can_attach(ss, cgrp, &tset);
|
|
|
+ retval = ss->can_attach(cgrp, &tset);
|
|
|
if (retval) {
|
|
|
failed_ss = ss;
|
|
|
goto out_cancel_attach;
|
|
@@ -2169,17 +2079,12 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
* step 2: make sure css_sets exist for all threads to be migrated.
|
|
|
* we use find_css_set, which allocates a new one if necessary.
|
|
|
*/
|
|
|
- INIT_LIST_HEAD(&newcg_list);
|
|
|
for (i = 0; i < group_size; i++) {
|
|
|
tc = flex_array_get(group, i);
|
|
|
- oldcg = tc->task->cgroups;
|
|
|
-
|
|
|
- /* if we don't already have it in the list get a new one */
|
|
|
- if (!css_set_check_fetched(cgrp, tc->task, oldcg,
|
|
|
- &newcg_list)) {
|
|
|
- retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
|
|
|
- if (retval)
|
|
|
- goto out_list_teardown;
|
|
|
+ tc->cg = find_css_set(tc->task->cgroups, cgrp);
|
|
|
+ if (!tc->cg) {
|
|
|
+ retval = -ENOMEM;
|
|
|
+ goto out_put_css_set_refs;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -2190,8 +2095,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
*/
|
|
|
for (i = 0; i < group_size; i++) {
|
|
|
tc = flex_array_get(group, i);
|
|
|
- retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true);
|
|
|
- BUG_ON(retval);
|
|
|
+ cgroup_task_migrate(cgrp, tc->cgrp, tc->task, tc->cg);
|
|
|
}
|
|
|
/* nothing is sensitive to fork() after this point. */
|
|
|
|
|
@@ -2200,7 +2104,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
*/
|
|
|
for_each_subsys(root, ss) {
|
|
|
if (ss->attach)
|
|
|
- ss->attach(ss, cgrp, &tset);
|
|
|
+ ss->attach(cgrp, &tset);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2209,21 +2113,22 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
|
|
|
synchronize_rcu();
|
|
|
cgroup_wakeup_rmdir_waiter(cgrp);
|
|
|
retval = 0;
|
|
|
-out_list_teardown:
|
|
|
- /* clean up the list of prefetched css_sets. */
|
|
|
- list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) {
|
|
|
- list_del(&cg_entry->links);
|
|
|
- put_css_set(cg_entry->cg);
|
|
|
- kfree(cg_entry);
|
|
|
+out_put_css_set_refs:
|
|
|
+ if (retval) {
|
|
|
+ for (i = 0; i < group_size; i++) {
|
|
|
+ tc = flex_array_get(group, i);
|
|
|
+ if (!tc->cg)
|
|
|
+ break;
|
|
|
+ put_css_set(tc->cg);
|
|
|
+ }
|
|
|
}
|
|
|
out_cancel_attach:
|
|
|
- /* same deal as in cgroup_attach_task */
|
|
|
if (retval) {
|
|
|
for_each_subsys(root, ss) {
|
|
|
if (ss == failed_ss)
|
|
|
break;
|
|
|
if (ss->cancel_attach)
|
|
|
- ss->cancel_attach(ss, cgrp, &tset);
|
|
|
+ ss->cancel_attach(cgrp, &tset);
|
|
|
}
|
|
|
}
|
|
|
out_free_group_list:
|
|
@@ -2245,22 +2150,14 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
|
|
|
if (!cgroup_lock_live_group(cgrp))
|
|
|
return -ENODEV;
|
|
|
|
|
|
+retry_find_task:
|
|
|
+ rcu_read_lock();
|
|
|
if (pid) {
|
|
|
- rcu_read_lock();
|
|
|
tsk = find_task_by_vpid(pid);
|
|
|
if (!tsk) {
|
|
|
rcu_read_unlock();
|
|
|
- cgroup_unlock();
|
|
|
- return -ESRCH;
|
|
|
- }
|
|
|
- if (threadgroup) {
|
|
|
- /*
|
|
|
- * RCU protects this access, since tsk was found in the
|
|
|
- * tid map. a race with de_thread may cause group_leader
|
|
|
- * to stop being the leader, but cgroup_attach_proc will
|
|
|
- * detect it later.
|
|
|
- */
|
|
|
- tsk = tsk->group_leader;
|
|
|
+ ret= -ESRCH;
|
|
|
+ goto out_unlock_cgroup;
|
|
|
}
|
|
|
/*
|
|
|
* even if we're attaching all tasks in the thread group, we
|
|
@@ -2271,29 +2168,38 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
|
|
|
cred->euid != tcred->uid &&
|
|
|
cred->euid != tcred->suid) {
|
|
|
rcu_read_unlock();
|
|
|
- cgroup_unlock();
|
|
|
- return -EACCES;
|
|
|
+ ret = -EACCES;
|
|
|
+ goto out_unlock_cgroup;
|
|
|
}
|
|
|
- get_task_struct(tsk);
|
|
|
- rcu_read_unlock();
|
|
|
- } else {
|
|
|
- if (threadgroup)
|
|
|
- tsk = current->group_leader;
|
|
|
- else
|
|
|
- tsk = current;
|
|
|
- get_task_struct(tsk);
|
|
|
- }
|
|
|
-
|
|
|
- threadgroup_lock(tsk);
|
|
|
+ } else
|
|
|
+ tsk = current;
|
|
|
|
|
|
if (threadgroup)
|
|
|
+ tsk = tsk->group_leader;
|
|
|
+ get_task_struct(tsk);
|
|
|
+ rcu_read_unlock();
|
|
|
+
|
|
|
+ threadgroup_lock(tsk);
|
|
|
+ if (threadgroup) {
|
|
|
+ if (!thread_group_leader(tsk)) {
|
|
|
+ /*
|
|
|
+ * a race with de_thread from another thread's exec()
|
|
|
+ * may strip us of our leadership, if this happens,
|
|
|
+ * there is no choice but to throw this task away and
|
|
|
+ * try again; this is
|
|
|
+ * "double-double-toil-and-trouble-check locking".
|
|
|
+ */
|
|
|
+ threadgroup_unlock(tsk);
|
|
|
+ put_task_struct(tsk);
|
|
|
+ goto retry_find_task;
|
|
|
+ }
|
|
|
ret = cgroup_attach_proc(cgrp, tsk);
|
|
|
- else
|
|
|
+ } else
|
|
|
ret = cgroup_attach_task(cgrp, tsk);
|
|
|
-
|
|
|
threadgroup_unlock(tsk);
|
|
|
|
|
|
put_task_struct(tsk);
|
|
|
+out_unlock_cgroup:
|
|
|
cgroup_unlock();
|
|
|
return ret;
|
|
|
}
|
|
@@ -2305,16 +2211,7 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
|
|
|
|
|
|
static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
|
|
|
{
|
|
|
- int ret;
|
|
|
- do {
|
|
|
- /*
|
|
|
- * attach_proc fails with -EAGAIN if threadgroup leadership
|
|
|
- * changes in the middle of the operation, in which case we need
|
|
|
- * to find the task_struct for the new leader and start over.
|
|
|
- */
|
|
|
- ret = attach_task_by_pid(cgrp, tgid, true);
|
|
|
- } while (ret == -EAGAIN);
|
|
|
- return ret;
|
|
|
+ return attach_task_by_pid(cgrp, tgid, true);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -2804,15 +2701,20 @@ static void cgroup_advance_iter(struct cgroup *cgrp,
|
|
|
* using their cgroups capability, we don't maintain the lists running
|
|
|
* through each css_set to its tasks until we see the list actually
|
|
|
* used - in other words after the first call to cgroup_iter_start().
|
|
|
- *
|
|
|
- * The tasklist_lock is not held here, as do_each_thread() and
|
|
|
- * while_each_thread() are protected by RCU.
|
|
|
*/
|
|
|
static void cgroup_enable_task_cg_lists(void)
|
|
|
{
|
|
|
struct task_struct *p, *g;
|
|
|
write_lock(&css_set_lock);
|
|
|
use_task_css_set_links = 1;
|
|
|
+ /*
|
|
|
+ * We need tasklist_lock because RCU is not safe against
|
|
|
+ * while_each_thread(). Besides, a forking task that has passed
|
|
|
+ * cgroup_post_fork() without seeing use_task_css_set_links = 1
|
|
|
+ * is not guaranteed to have its child immediately visible in the
|
|
|
+ * tasklist if we walk through it with RCU.
|
|
|
+ */
|
|
|
+ read_lock(&tasklist_lock);
|
|
|
do_each_thread(g, p) {
|
|
|
task_lock(p);
|
|
|
/*
|
|
@@ -2824,6 +2726,7 @@ static void cgroup_enable_task_cg_lists(void)
|
|
|
list_add(&p->cg_list, &p->cgroups->tasks);
|
|
|
task_unlock(p);
|
|
|
} while_each_thread(g, p);
|
|
|
+ read_unlock(&tasklist_lock);
|
|
|
write_unlock(&css_set_lock);
|
|
|
}
|
|
|
|
|
@@ -3043,6 +2946,38 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
|
|
|
*
|
|
|
*/
|
|
|
|
|
|
+/* which pidlist file are we talking about? */
|
|
|
+enum cgroup_filetype {
|
|
|
+ CGROUP_FILE_PROCS,
|
|
|
+ CGROUP_FILE_TASKS,
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * A pidlist is a list of pids that virtually represents the contents of one
|
|
|
+ * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
|
|
|
+ * a pair (one each for procs, tasks) for each pid namespace that's relevant
|
|
|
+ * to the cgroup.
|
|
|
+ */
|
|
|
+struct cgroup_pidlist {
|
|
|
+ /*
|
|
|
+ * used to find which pidlist is wanted. doesn't change as long as
|
|
|
+ * this particular list stays in the list.
|
|
|
+ */
|
|
|
+ struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
|
|
|
+ /* array of xids */
|
|
|
+ pid_t *list;
|
|
|
+ /* how many elements the above list has */
|
|
|
+ int length;
|
|
|
+ /* how many files are using the current array */
|
|
|
+ int use_count;
|
|
|
+ /* each of these stored in a list by its cgroup */
|
|
|
+ struct list_head links;
|
|
|
+ /* pointer to the cgroup we belong to, for list removal purposes */
|
|
|
+ struct cgroup *owner;
|
|
|
+ /* protects the other fields */
|
|
|
+ struct rw_semaphore mutex;
|
|
|
+};
|
|
|
+
|
|
|
/*
|
|
|
* The following two functions "fix" the issue where there are more pids
|
|
|
* than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
|
|
@@ -3827,7 +3762,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
|
|
set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
|
|
|
|
|
|
for_each_subsys(root, ss) {
|
|
|
- struct cgroup_subsys_state *css = ss->create(ss, cgrp);
|
|
|
+ struct cgroup_subsys_state *css = ss->create(cgrp);
|
|
|
|
|
|
if (IS_ERR(css)) {
|
|
|
err = PTR_ERR(css);
|
|
@@ -3841,7 +3776,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
|
|
}
|
|
|
/* At error, ->destroy() callback has to free assigned ID. */
|
|
|
if (clone_children(parent) && ss->post_clone)
|
|
|
- ss->post_clone(ss, cgrp);
|
|
|
+ ss->post_clone(cgrp);
|
|
|
}
|
|
|
|
|
|
cgroup_lock_hierarchy(root);
|
|
@@ -3875,7 +3810,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
|
|
|
|
|
for_each_subsys(root, ss) {
|
|
|
if (cgrp->subsys[ss->subsys_id])
|
|
|
- ss->destroy(ss, cgrp);
|
|
|
+ ss->destroy(cgrp);
|
|
|
}
|
|
|
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
@@ -4099,7 +4034,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
|
|
|
/* Create the top cgroup state for this subsystem */
|
|
|
list_add(&ss->sibling, &rootnode.subsys_list);
|
|
|
ss->root = &rootnode;
|
|
|
- css = ss->create(ss, dummytop);
|
|
|
+ css = ss->create(dummytop);
|
|
|
/* We don't handle early failures gracefully */
|
|
|
BUG_ON(IS_ERR(css));
|
|
|
init_cgroup_css(css, ss, dummytop);
|
|
@@ -4188,7 +4123,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
|
|
|
* no ss->create seems to need anything important in the ss struct, so
|
|
|
* this can happen first (i.e. before the rootnode attachment).
|
|
|
*/
|
|
|
- css = ss->create(ss, dummytop);
|
|
|
+ css = ss->create(dummytop);
|
|
|
if (IS_ERR(css)) {
|
|
|
/* failure case - need to deassign the subsys[] slot. */
|
|
|
subsys[i] = NULL;
|
|
@@ -4206,7 +4141,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
|
|
|
int ret = cgroup_init_idr(ss, css);
|
|
|
if (ret) {
|
|
|
dummytop->subsys[ss->subsys_id] = NULL;
|
|
|
- ss->destroy(ss, dummytop);
|
|
|
+ ss->destroy(dummytop);
|
|
|
subsys[i] = NULL;
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
|
return ret;
|
|
@@ -4304,7 +4239,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
|
|
|
* pointer to find their state. note that this also takes care of
|
|
|
* freeing the css_id.
|
|
|
*/
|
|
|
- ss->destroy(ss, dummytop);
|
|
|
+ ss->destroy(dummytop);
|
|
|
dummytop->subsys[ss->subsys_id] = NULL;
|
|
|
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
@@ -4580,7 +4515,7 @@ void cgroup_fork_callbacks(struct task_struct *child)
|
|
|
for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
if (ss->fork)
|
|
|
- ss->fork(ss, child);
|
|
|
+ ss->fork(child);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -4596,6 +4531,17 @@ void cgroup_fork_callbacks(struct task_struct *child)
|
|
|
*/
|
|
|
void cgroup_post_fork(struct task_struct *child)
|
|
|
{
|
|
|
+ /*
|
|
|
+ * use_task_css_set_links is set to 1 before we walk the tasklist
|
|
|
+ * under the tasklist_lock and we read it here after we added the child
|
|
|
+ * to the tasklist under the tasklist_lock as well. If the child wasn't
|
|
|
+ * yet in the tasklist when we walked through it from
|
|
|
+ * cgroup_enable_task_cg_lists(), then use_task_css_set_links value
|
|
|
+ * should be visible now due to the paired locking and barriers implied
|
|
|
+ * by LOCK/UNLOCK: it is written before the tasklist_lock unlock
|
|
|
+ * in cgroup_enable_task_cg_lists() and read here after the tasklist_lock
|
|
|
+ * lock on fork.
|
|
|
+ */
|
|
|
if (use_task_css_set_links) {
|
|
|
write_lock(&css_set_lock);
|
|
|
if (list_empty(&child->cg_list)) {
|
|
@@ -4682,7 +4628,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
|
|
|
struct cgroup *old_cgrp =
|
|
|
rcu_dereference_raw(cg->subsys[i])->cgroup;
|
|
|
struct cgroup *cgrp = task_cgroup(tsk, i);
|
|
|
- ss->exit(ss, cgrp, old_cgrp, tsk);
|
|
|
+ ss->exit(cgrp, old_cgrp, tsk);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -5137,8 +5083,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
|
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_CGROUP_DEBUG
|
|
|
-static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
|
|
|
- struct cgroup *cont)
|
|
|
+static struct cgroup_subsys_state *debug_create(struct cgroup *cont)
|
|
|
{
|
|
|
struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
|
|
|
|
|
@@ -5148,7 +5093,7 @@ static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
|
|
|
return css;
|
|
|
}
|
|
|
|
|
|
-static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
|
|
|
+static void debug_destroy(struct cgroup *cont)
|
|
|
{
|
|
|
kfree(cont->subsys[debug_subsys_id]);
|
|
|
}
|