|
@@ -132,6 +132,33 @@ list_for_each_entry(_ss, &_root->subsys_list, sibling)
|
|
|
#define for_each_root(_root) \
|
|
|
list_for_each_entry(_root, &roots, root_list)
|
|
|
|
|
|
+/* Each task_struct has an embedded css_set, so the get/put
|
|
|
+ * operation simply takes a reference count on all the cgroups
|
|
|
+ * referenced by subsystems in this css_set. This can end up
|
|
|
+ * multiple-counting some cgroups, but that's OK - the ref-count is
|
|
|
+ * just a busy/not-busy indicator; ensuring that we only count each
|
|
|
+ * cgroup once would require taking a global lock to ensure that no
|
|
|
+ * subsystems moved between hierarchies while we were doing so.
|
|
|
+ *
|
|
|
+ * Possible TODO: decide at boot time based on the number of
|
|
|
+ * registered subsystems and the number of CPUs or NUMA nodes whether
|
|
|
+ * it's better for performance to ref-count every subsystem, or to
|
|
|
+ * take a global lock and only add one ref count to each hierarchy.
|
|
|
+ */
|
|
|
+static void get_css_set(struct css_set *cg)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
|
|
|
+ atomic_inc(&cg->subsys[i]->cgroup->count);
|
|
|
+}
|
|
|
+
|
|
|
+static void put_css_set(struct css_set *cg)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
|
|
|
+ atomic_dec(&cg->subsys[i]->cgroup->count);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* There is one global cgroup mutex. We also require taking
|
|
|
* task_lock() when dereferencing a task's cgroup subsys pointers.
|
|
@@ -1587,3 +1614,97 @@ int __init cgroup_init(void)
|
|
|
out:
|
|
|
return err;
|
|
|
}
|
|
|
+
|
|
|
+/**
|
|
|
+ * cgroup_fork - attach newly forked task to its parents cgroup.
|
|
|
+ * @tsk: pointer to task_struct of forking parent process.
|
|
|
+ *
|
|
|
+ * Description: A task inherits its parent's cgroup at fork().
|
|
|
+ *
|
|
|
+ * A pointer to the shared css_set was automatically copied in
|
|
|
+ * fork.c by dup_task_struct(). However, we ignore that copy, since
|
|
|
+ * it was not made under the protection of RCU or cgroup_mutex, so
|
|
|
+ * might no longer be a valid cgroup pointer. attach_task() might
|
|
|
+ * have already changed current->cgroup, allowing the previously
|
|
|
+ * referenced cgroup to be removed and freed.
|
|
|
+ *
|
|
|
+ * At the point that cgroup_fork() is called, 'current' is the parent
|
|
|
+ * task, and the passed argument 'child' points to the child task.
|
|
|
+ */
|
|
|
+void cgroup_fork(struct task_struct *child)
|
|
|
+{
|
|
|
+ rcu_read_lock();
|
|
|
+ child->cgroups = rcu_dereference(current->cgroups);
|
|
|
+ get_css_set(&child->cgroups);
|
|
|
+ rcu_read_unlock();
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * cgroup_fork_callbacks - called on a new task very soon before
|
|
|
+ * adding it to the tasklist. No need to take any locks since no-one
|
|
|
+ * can be operating on this task
|
|
|
+ */
|
|
|
+void cgroup_fork_callbacks(struct task_struct *child)
|
|
|
+{
|
|
|
+ if (need_forkexit_callback) {
|
|
|
+ int i;
|
|
|
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
+ struct cgroup_subsys *ss = subsys[i];
|
|
|
+ if (ss->fork)
|
|
|
+ ss->fork(ss, child);
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * cgroup_exit - detach cgroup from exiting task
|
|
|
+ * @tsk: pointer to task_struct of exiting process
|
|
|
+ *
|
|
|
+ * Description: Detach cgroup from @tsk and release it.
|
|
|
+ *
|
|
|
+ * Note that cgroups marked notify_on_release force every task in
|
|
|
+ * them to take the global cgroup_mutex mutex when exiting.
|
|
|
+ * This could impact scaling on very large systems. Be reluctant to
|
|
|
+ * use notify_on_release cgroups where very high task exit scaling
|
|
|
+ * is required on large systems.
|
|
|
+ *
|
|
|
+ * the_top_cgroup_hack:
|
|
|
+ *
|
|
|
+ * Set the exiting tasks cgroup to the root cgroup (top_cgroup).
|
|
|
+ *
|
|
|
+ * We call cgroup_exit() while the task is still competent to
|
|
|
+ * handle notify_on_release(), then leave the task attached to the
|
|
|
+ * root cgroup in each hierarchy for the remainder of its exit.
|
|
|
+ *
|
|
|
+ * To do this properly, we would increment the reference count on
|
|
|
+ * top_cgroup, and near the very end of the kernel/exit.c do_exit()
|
|
|
+ * code we would add a second cgroup function call, to drop that
|
|
|
+ * reference. This would just create an unnecessary hot spot on
|
|
|
+ * the top_cgroup reference count, to no avail.
|
|
|
+ *
|
|
|
+ * Normally, holding a reference to a cgroup without bumping its
|
|
|
+ * count is unsafe. The cgroup could go away, or someone could
|
|
|
+ * attach us to a different cgroup, decrementing the count on
|
|
|
+ * the first cgroup that we never incremented. But in this case,
|
|
|
+ * top_cgroup isn't going away, and either task has PF_EXITING set,
|
|
|
+ * which wards off any attach_task() attempts, or task is a failed
|
|
|
+ * fork, never visible to attach_task.
|
|
|
+ *
|
|
|
+ */
|
|
|
+void cgroup_exit(struct task_struct *tsk, int run_callbacks)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+
|
|
|
+ if (run_callbacks && need_forkexit_callback) {
|
|
|
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
+ struct cgroup_subsys *ss = subsys[i];
|
|
|
+ if (ss->exit)
|
|
|
+ ss->exit(ss, tsk);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ /* Reassign the task to the init_css_set. */
|
|
|
+ task_lock(tsk);
|
|
|
+ put_css_set(&tsk->cgroups);
|
|
|
+ tsk->cgroups = init_task.cgroups;
|
|
|
+ task_unlock(tsk);
|
|
|
+}
|