|
@@ -57,10 +57,14 @@
|
|
|
|
|
|
static DEFINE_MUTEX(cgroup_mutex);
|
|
|
|
|
|
-/* Generate an array of cgroup subsystem pointers */
|
|
|
+/*
|
|
|
+ * Generate an array of cgroup subsystem pointers. At boot time, this is
|
|
|
+ * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are
|
|
|
+ * registered after that. The mutable section of this array is protected by
|
|
|
+ * cgroup_mutex.
|
|
|
+ */
|
|
|
#define SUBSYS(_x) &_x ## _subsys,
|
|
|
-
|
|
|
-static struct cgroup_subsys *subsys[] = {
|
|
|
+static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
|
|
|
#include <linux/cgroup_subsys.h>
|
|
|
};
|
|
|
|
|
@@ -448,8 +452,11 @@ static struct css_set *find_existing_css_set(
|
|
|
struct hlist_node *node;
|
|
|
struct css_set *cg;
|
|
|
|
|
|
- /* Built the set of subsystem state objects that we want to
|
|
|
- * see in the new css_set */
|
|
|
+ /*
|
|
|
+ * Build the set of subsystem state objects that we want to see in the
|
|
|
+ * new css_set. while subsystems can change globally, the entries here
|
|
|
+ * won't change, so no need for locking.
|
|
|
+ */
|
|
|
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
if (root->subsys_bits & (1UL << i)) {
|
|
|
/* Subsystem is in this hierarchy. So we want
|
|
@@ -884,7 +891,9 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
|
|
|
css_put(css);
|
|
|
}
|
|
|
|
|
|
-
|
|
|
+/*
|
|
|
+ * Call with cgroup_mutex held.
|
|
|
+ */
|
|
|
static int rebind_subsystems(struct cgroupfs_root *root,
|
|
|
unsigned long final_bits)
|
|
|
{
|
|
@@ -892,6 +901,8 @@ static int rebind_subsystems(struct cgroupfs_root *root,
|
|
|
struct cgroup *cgrp = &root->top_cgroup;
|
|
|
int i;
|
|
|
|
|
|
+ BUG_ON(!mutex_is_locked(&cgroup_mutex));
|
|
|
+
|
|
|
removed_bits = root->actual_subsys_bits & ~final_bits;
|
|
|
added_bits = final_bits & ~root->actual_subsys_bits;
|
|
|
/* Check that any added subsystems are currently free */
|
|
@@ -900,6 +911,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
if (!(bit & added_bits))
|
|
|
continue;
|
|
|
+ /*
|
|
|
+ * Nobody should tell us to do a subsys that doesn't exist:
|
|
|
+ * parse_cgroupfs_options should catch that case and refcounts
|
|
|
+ * ensure that subsystems won't disappear once selected.
|
|
|
+ */
|
|
|
+ BUG_ON(ss == NULL);
|
|
|
if (ss->root != &rootnode) {
|
|
|
/* Subsystem isn't free */
|
|
|
return -EBUSY;
|
|
@@ -919,6 +936,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
|
|
|
unsigned long bit = 1UL << i;
|
|
|
if (bit & added_bits) {
|
|
|
/* We're binding this subsystem to this hierarchy */
|
|
|
+ BUG_ON(ss == NULL);
|
|
|
BUG_ON(cgrp->subsys[i]);
|
|
|
BUG_ON(!dummytop->subsys[i]);
|
|
|
BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
|
|
@@ -932,6 +950,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
|
|
|
mutex_unlock(&ss->hierarchy_mutex);
|
|
|
} else if (bit & removed_bits) {
|
|
|
/* We're removing this subsystem */
|
|
|
+ BUG_ON(ss == NULL);
|
|
|
BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
|
|
|
BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
|
|
|
mutex_lock(&ss->hierarchy_mutex);
|
|
@@ -944,6 +963,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
|
|
|
mutex_unlock(&ss->hierarchy_mutex);
|
|
|
} else if (bit & final_bits) {
|
|
|
/* Subsystem state should already exist */
|
|
|
+ BUG_ON(ss == NULL);
|
|
|
BUG_ON(!cgrp->subsys[i]);
|
|
|
} else {
|
|
|
/* Subsystem state shouldn't exist */
|
|
@@ -986,14 +1006,18 @@ struct cgroup_sb_opts {
|
|
|
|
|
|
};
|
|
|
|
|
|
-/* Convert a hierarchy specifier into a bitmask of subsystems and
|
|
|
- * flags. */
|
|
|
+/*
|
|
|
+ * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
|
|
|
+ * with cgroup_mutex held to protect the subsys[] array.
|
|
|
+ */
|
|
|
static int parse_cgroupfs_options(char *data,
|
|
|
struct cgroup_sb_opts *opts)
|
|
|
{
|
|
|
char *token, *o = data ?: "all";
|
|
|
unsigned long mask = (unsigned long)-1;
|
|
|
|
|
|
+ BUG_ON(!mutex_is_locked(&cgroup_mutex));
|
|
|
+
|
|
|
#ifdef CONFIG_CPUSETS
|
|
|
mask = ~(1UL << cpuset_subsys_id);
|
|
|
#endif
|
|
@@ -1009,6 +1033,8 @@ static int parse_cgroupfs_options(char *data,
|
|
|
opts->subsys_bits = 0;
|
|
|
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
+ if (ss == NULL)
|
|
|
+ continue;
|
|
|
if (!ss->disabled)
|
|
|
opts->subsys_bits |= 1ul << i;
|
|
|
}
|
|
@@ -1053,6 +1079,8 @@ static int parse_cgroupfs_options(char *data,
|
|
|
int i;
|
|
|
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
ss = subsys[i];
|
|
|
+ if (ss == NULL)
|
|
|
+ continue;
|
|
|
if (!strcmp(token, ss->name)) {
|
|
|
if (!ss->disabled)
|
|
|
set_bit(i, &opts->subsys_bits);
|
|
@@ -1306,7 +1334,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
|
|
|
struct cgroupfs_root *new_root;
|
|
|
|
|
|
/* First find the desired set of subsystems */
|
|
|
+ mutex_lock(&cgroup_mutex);
|
|
|
ret = parse_cgroupfs_options(data, &opts);
|
|
|
+ mutex_unlock(&cgroup_mutex);
|
|
|
if (ret)
|
|
|
goto out_err;
|
|
|
|
|
@@ -2918,8 +2948,14 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
|
|
|
/* We need to take each hierarchy_mutex in a consistent order */
|
|
|
int i;
|
|
|
|
|
|
+ /*
|
|
|
+ * No worry about a race with rebind_subsystems that might mess up the
|
|
|
+ * locking order, since both parties are under cgroup_mutex.
|
|
|
+ */
|
|
|
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
+ if (ss == NULL)
|
|
|
+ continue;
|
|
|
if (ss->root == root)
|
|
|
mutex_lock(&ss->hierarchy_mutex);
|
|
|
}
|
|
@@ -2931,6 +2967,8 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
|
|
|
|
|
|
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
+ if (ss == NULL)
|
|
|
+ continue;
|
|
|
if (ss->root == root)
|
|
|
mutex_unlock(&ss->hierarchy_mutex);
|
|
|
}
|
|
@@ -3054,11 +3092,16 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
|
|
|
* synchronization other than RCU, and the subsystem linked
|
|
|
* list isn't RCU-safe */
|
|
|
int i;
|
|
|
+ /*
|
|
|
+ * We won't need to lock the subsys array, because the subsystems
|
|
|
+ * we're concerned about aren't going anywhere since our cgroup root
|
|
|
+ * has a reference on them.
|
|
|
+ */
|
|
|
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
struct cgroup_subsys_state *css;
|
|
|
- /* Skip subsystems not in this hierarchy */
|
|
|
- if (ss->root != cgrp->root)
|
|
|
+ /* Skip subsystems not present or not in this hierarchy */
|
|
|
+ if (ss == NULL || ss->root != cgrp->root)
|
|
|
continue;
|
|
|
css = cgrp->subsys[ss->subsys_id];
|
|
|
/* When called from check_for_release() it's possible
|
|
@@ -3279,7 +3322,8 @@ int __init cgroup_init_early(void)
|
|
|
for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
|
|
|
INIT_HLIST_HEAD(&css_set_table[i]);
|
|
|
|
|
|
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
+ /* at bootup time, we don't worry about modular subsystems */
|
|
|
+ for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
|
|
|
BUG_ON(!ss->name);
|
|
@@ -3314,7 +3358,8 @@ int __init cgroup_init(void)
|
|
|
if (err)
|
|
|
return err;
|
|
|
|
|
|
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
+ /* at bootup time, we don't worry about modular subsystems */
|
|
|
+ for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
if (!ss->early_init)
|
|
|
cgroup_init_subsys(ss);
|
|
@@ -3423,9 +3468,16 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
|
|
|
int i;
|
|
|
|
|
|
seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
|
|
|
+ /*
|
|
|
+ * ideally we don't want subsystems moving around while we do this.
|
|
|
+ * cgroup_mutex is also necessary to guarantee an atomic snapshot of
|
|
|
+ * subsys/hierarchy state.
|
|
|
+ */
|
|
|
mutex_lock(&cgroup_mutex);
|
|
|
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
+ if (ss == NULL)
|
|
|
+ continue;
|
|
|
seq_printf(m, "%s\t%d\t%d\t%d\n",
|
|
|
ss->name, ss->root->hierarchy_id,
|
|
|
ss->root->number_of_cgroups, !ss->disabled);
|
|
@@ -3483,7 +3535,12 @@ void cgroup_fork_callbacks(struct task_struct *child)
|
|
|
{
|
|
|
if (need_forkexit_callback) {
|
|
|
int i;
|
|
|
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
+ /*
|
|
|
+ * forkexit callbacks are only supported for builtin
|
|
|
+ * subsystems, and the builtin section of the subsys array is
|
|
|
+ * immutable, so we don't need to lock the subsys array here.
|
|
|
+ */
|
|
|
+ for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
if (ss->fork)
|
|
|
ss->fork(ss, child);
|
|
@@ -3552,7 +3609,11 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
|
|
|
struct css_set *cg;
|
|
|
|
|
|
if (run_callbacks && need_forkexit_callback) {
|
|
|
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
+ /*
|
|
|
+ * modular subsystems can't use callbacks, so no need to lock
|
|
|
+ * the subsys array
|
|
|
+ */
|
|
|
+ for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
if (ss->exit)
|
|
|
ss->exit(ss, tsk);
|
|
@@ -3844,8 +3905,11 @@ static int __init cgroup_disable(char *str)
|
|
|
while ((token = strsep(&str, ",")) != NULL) {
|
|
|
if (!*token)
|
|
|
continue;
|
|
|
-
|
|
|
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
+ /*
|
|
|
+ * cgroup_disable, being at boot time, can't know about module
|
|
|
+ * subsystems, so we don't worry about them.
|
|
|
+ */
|
|
|
+ for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
|
|
|
struct cgroup_subsys *ss = subsys[i];
|
|
|
|
|
|
if (!strcmp(token, ss->name)) {
|