|
@@ -40,7 +40,7 @@
|
|
#include <linux/magic.h>
|
|
#include <linux/magic.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/string.h>
|
|
#include <linux/string.h>
|
|
-
|
|
|
|
|
|
+#include <linux/sort.h>
|
|
#include <asm/atomic.h>
|
|
#include <asm/atomic.h>
|
|
|
|
|
|
/* Generate an array of cgroup subsystem pointers */
|
|
/* Generate an array of cgroup subsystem pointers */
|
|
@@ -700,6 +700,127 @@ int cgroup_path(const struct cgroup *cont, char *buf, int buflen)
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Return the first subsystem attached to a cgroup's hierarchy, and
|
|
|
|
+ * its subsystem id.
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+static void get_first_subsys(const struct cgroup *cont,
|
|
|
|
+ struct cgroup_subsys_state **css, int *subsys_id)
|
|
|
|
+{
|
|
|
|
+ const struct cgroupfs_root *root = cont->root;
|
|
|
|
+ const struct cgroup_subsys *test_ss;
|
|
|
|
+ BUG_ON(list_empty(&root->subsys_list));
|
|
|
|
+ test_ss = list_entry(root->subsys_list.next,
|
|
|
|
+ struct cgroup_subsys, sibling);
|
|
|
|
+ if (css) {
|
|
|
|
+ *css = cont->subsys[test_ss->subsys_id];
|
|
|
|
+ BUG_ON(!*css);
|
|
|
|
+ }
|
|
|
|
+ if (subsys_id)
|
|
|
|
+ *subsys_id = test_ss->subsys_id;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Attach task 'tsk' to cgroup 'cont'
|
|
|
|
+ *
|
|
|
|
+ * Call holding cgroup_mutex. May take task_lock of
|
|
|
|
+ * the task 'pid' during call.
|
|
|
|
+ */
|
|
|
|
+static int attach_task(struct cgroup *cont, struct task_struct *tsk)
|
|
|
|
+{
|
|
|
|
+ int retval = 0;
|
|
|
|
+ struct cgroup_subsys *ss;
|
|
|
|
+ struct cgroup *oldcont;
|
|
|
|
+ struct css_set *cg = &tsk->cgroups;
|
|
|
|
+ struct cgroupfs_root *root = cont->root;
|
|
|
|
+ int i;
|
|
|
|
+ int subsys_id;
|
|
|
|
+
|
|
|
|
+ get_first_subsys(cont, NULL, &subsys_id);
|
|
|
|
+
|
|
|
|
+ /* Nothing to do if the task is already in that cgroup */
|
|
|
|
+ oldcont = task_cgroup(tsk, subsys_id);
|
|
|
|
+ if (cont == oldcont)
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ for_each_subsys(root, ss) {
|
|
|
|
+ if (ss->can_attach) {
|
|
|
|
+ retval = ss->can_attach(ss, cont, tsk);
|
|
|
|
+ if (retval) {
|
|
|
|
+ return retval;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ task_lock(tsk);
|
|
|
|
+ if (tsk->flags & PF_EXITING) {
|
|
|
|
+ task_unlock(tsk);
|
|
|
|
+ return -ESRCH;
|
|
|
|
+ }
|
|
|
|
+ /* Update the css_set pointers for the subsystems in this
|
|
|
|
+ * hierarchy */
|
|
|
|
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
|
|
|
+ if (root->subsys_bits & (1ull << i)) {
|
|
|
|
+ /* Subsystem is in this hierarchy. So we want
|
|
|
|
+ * the subsystem state from the new
|
|
|
|
+ * cgroup. Transfer the refcount from the
|
|
|
|
+ * old to the new */
|
|
|
|
+ atomic_inc(&cont->count);
|
|
|
|
+ atomic_dec(&cg->subsys[i]->cgroup->count);
|
|
|
|
+ rcu_assign_pointer(cg->subsys[i], cont->subsys[i]);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ task_unlock(tsk);
|
|
|
|
+
|
|
|
|
+ for_each_subsys(root, ss) {
|
|
|
|
+ if (ss->attach) {
|
|
|
|
+ ss->attach(ss, cont, oldcont, tsk);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ synchronize_rcu();
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Attach task with pid 'pid' to cgroup 'cont'. Call with
|
|
|
|
+ * cgroup_mutex, may take task_lock of task
|
|
|
|
+ */
|
|
|
|
+static int attach_task_by_pid(struct cgroup *cont, char *pidbuf)
|
|
|
|
+{
|
|
|
|
+ pid_t pid;
|
|
|
|
+ struct task_struct *tsk;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ if (sscanf(pidbuf, "%d", &pid) != 1)
|
|
|
|
+ return -EIO;
|
|
|
|
+
|
|
|
|
+ if (pid) {
|
|
|
|
+ rcu_read_lock();
|
|
|
|
+ tsk = find_task_by_pid(pid);
|
|
|
|
+ if (!tsk || tsk->flags & PF_EXITING) {
|
|
|
|
+ rcu_read_unlock();
|
|
|
|
+ return -ESRCH;
|
|
|
|
+ }
|
|
|
|
+ get_task_struct(tsk);
|
|
|
|
+ rcu_read_unlock();
|
|
|
|
+
|
|
|
|
+ if ((current->euid) && (current->euid != tsk->uid)
|
|
|
|
+ && (current->euid != tsk->suid)) {
|
|
|
|
+ put_task_struct(tsk);
|
|
|
|
+ return -EACCES;
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ tsk = current;
|
|
|
|
+ get_task_struct(tsk);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ ret = attach_task(cont, tsk);
|
|
|
|
+ put_task_struct(tsk);
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
/* The various types of files and directories in a cgroup file system */
|
|
/* The various types of files and directories in a cgroup file system */
|
|
|
|
|
|
enum cgroup_filetype {
|
|
enum cgroup_filetype {
|
|
@@ -708,6 +829,55 @@ enum cgroup_filetype {
|
|
FILE_TASKLIST,
|
|
FILE_TASKLIST,
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+static ssize_t cgroup_common_file_write(struct cgroup *cont,
|
|
|
|
+ struct cftype *cft,
|
|
|
|
+ struct file *file,
|
|
|
|
+ const char __user *userbuf,
|
|
|
|
+ size_t nbytes, loff_t *unused_ppos)
|
|
|
|
+{
|
|
|
|
+ enum cgroup_filetype type = cft->private;
|
|
|
|
+ char *buffer;
|
|
|
|
+ int retval = 0;
|
|
|
|
+
|
|
|
|
+ if (nbytes >= PATH_MAX)
|
|
|
|
+ return -E2BIG;
|
|
|
|
+
|
|
|
|
+ /* +1 for nul-terminator */
|
|
|
|
+ buffer = kmalloc(nbytes + 1, GFP_KERNEL);
|
|
|
|
+ if (buffer == NULL)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+
|
|
|
|
+ if (copy_from_user(buffer, userbuf, nbytes)) {
|
|
|
|
+ retval = -EFAULT;
|
|
|
|
+ goto out1;
|
|
|
|
+ }
|
|
|
|
+ buffer[nbytes] = 0; /* nul-terminate */
|
|
|
|
+
|
|
|
|
+ mutex_lock(&cgroup_mutex);
|
|
|
|
+
|
|
|
|
+ if (cgroup_is_removed(cont)) {
|
|
|
|
+ retval = -ENODEV;
|
|
|
|
+ goto out2;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ switch (type) {
|
|
|
|
+ case FILE_TASKLIST:
|
|
|
|
+ retval = attach_task_by_pid(cont, buffer);
|
|
|
|
+ break;
|
|
|
|
+ default:
|
|
|
|
+ retval = -EINVAL;
|
|
|
|
+ goto out2;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (retval == 0)
|
|
|
|
+ retval = nbytes;
|
|
|
|
+out2:
|
|
|
|
+ mutex_unlock(&cgroup_mutex);
|
|
|
|
+out1:
|
|
|
|
+ kfree(buffer);
|
|
|
|
+ return retval;
|
|
|
|
+}
|
|
|
|
+
|
|
static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
|
|
static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
|
|
size_t nbytes, loff_t *ppos)
|
|
size_t nbytes, loff_t *ppos)
|
|
{
|
|
{
|
|
@@ -914,6 +1084,189 @@ int cgroup_add_files(struct cgroup *cont,
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/* Count the number of tasks in a cgroup. Could be made more
|
|
|
|
+ * time-efficient but less space-efficient with more linked lists
|
|
|
|
+ * running through each cgroup and the css_set structures that
|
|
|
|
+ * referenced it. Must be called with tasklist_lock held for read or
|
|
|
|
+ * write or in an rcu critical section.
|
|
|
|
+ */
|
|
|
|
+int __cgroup_task_count(const struct cgroup *cont)
|
|
|
|
+{
|
|
|
|
+ int count = 0;
|
|
|
|
+ struct task_struct *g, *p;
|
|
|
|
+ struct cgroup_subsys_state *css;
|
|
|
|
+ int subsys_id;
|
|
|
|
+
|
|
|
|
+ get_first_subsys(cont, &css, &subsys_id);
|
|
|
|
+ do_each_thread(g, p) {
|
|
|
|
+ if (task_subsys_state(p, subsys_id) == css)
|
|
|
|
+ count ++;
|
|
|
|
+ } while_each_thread(g, p);
|
|
|
|
+ return count;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Stuff for reading the 'tasks' file.
|
|
|
|
+ *
|
|
|
|
+ * Reading this file can return large amounts of data if a cgroup has
|
|
|
|
+ * *lots* of attached tasks. So it may need several calls to read(),
|
|
|
|
+ * but we cannot guarantee that the information we produce is correct
|
|
|
|
+ * unless we produce it entirely atomically.
|
|
|
|
+ *
|
|
|
|
+ * Upon tasks file open(), a struct ctr_struct is allocated, that
|
|
|
|
+ * will have a pointer to an array (also allocated here). The struct
|
|
|
|
+ * ctr_struct * is stored in file->private_data. Its resources will
|
|
|
|
+ * be freed by release() when the file is closed. The array is used
|
|
|
|
+ * to sprintf the PIDs and then used by read().
|
|
|
|
+ */
|
|
|
|
+struct ctr_struct {
|
|
|
|
+ char *buf;
|
|
|
|
+ int bufsz;
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Load into 'pidarray' up to 'npids' of the tasks using cgroup
|
|
|
|
+ * 'cont'. Return actual number of pids loaded. No need to
|
|
|
|
+ * task_lock(p) when reading out p->cgroup, since we're in an RCU
|
|
|
|
+ * read section, so the css_set can't go away, and is
|
|
|
|
+ * immutable after creation.
|
|
|
|
+ */
|
|
|
|
+static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cont)
|
|
|
|
+{
|
|
|
|
+ int n = 0;
|
|
|
|
+ struct task_struct *g, *p;
|
|
|
|
+ struct cgroup_subsys_state *css;
|
|
|
|
+ int subsys_id;
|
|
|
|
+
|
|
|
|
+ get_first_subsys(cont, &css, &subsys_id);
|
|
|
|
+ rcu_read_lock();
|
|
|
|
+ do_each_thread(g, p) {
|
|
|
|
+ if (task_subsys_state(p, subsys_id) == css) {
|
|
|
|
+ pidarray[n++] = pid_nr(task_pid(p));
|
|
|
|
+ if (unlikely(n == npids))
|
|
|
|
+ goto array_full;
|
|
|
|
+ }
|
|
|
|
+ } while_each_thread(g, p);
|
|
|
|
+
|
|
|
|
+array_full:
|
|
|
|
+ rcu_read_unlock();
|
|
|
|
+ return n;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int cmppid(const void *a, const void *b)
|
|
|
|
+{
|
|
|
|
+ return *(pid_t *)a - *(pid_t *)b;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Convert array 'a' of 'npids' pid_t's to a string of newline separated
|
|
|
|
+ * decimal pids in 'buf'. Don't write more than 'sz' chars, but return
|
|
|
|
+ * count 'cnt' of how many chars would be written if buf were large enough.
|
|
|
|
+ */
|
|
|
|
+static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
|
|
|
|
+{
|
|
|
|
+ int cnt = 0;
|
|
|
|
+ int i;
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < npids; i++)
|
|
|
|
+ cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
|
|
|
|
+ return cnt;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Handle an open on 'tasks' file. Prepare a buffer listing the
|
|
|
|
+ * process id's of tasks currently attached to the cgroup being opened.
|
|
|
|
+ *
|
|
|
|
+ * Does not require any specific cgroup mutexes, and does not take any.
|
|
|
|
+ */
|
|
|
|
+static int cgroup_tasks_open(struct inode *unused, struct file *file)
|
|
|
|
+{
|
|
|
|
+ struct cgroup *cont = __d_cont(file->f_dentry->d_parent);
|
|
|
|
+ struct ctr_struct *ctr;
|
|
|
|
+ pid_t *pidarray;
|
|
|
|
+ int npids;
|
|
|
|
+ char c;
|
|
|
|
+
|
|
|
|
+ if (!(file->f_mode & FMODE_READ))
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
|
|
|
|
+ if (!ctr)
|
|
|
|
+ goto err0;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If cgroup gets more users after we read count, we won't have
|
|
|
|
+ * enough space - tough. This race is indistinguishable to the
|
|
|
|
+ * caller from the case that the additional cgroup users didn't
|
|
|
|
+ * show up until sometime later on.
|
|
|
|
+ */
|
|
|
|
+ npids = cgroup_task_count(cont);
|
|
|
|
+ if (npids) {
|
|
|
|
+ pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
|
|
|
|
+ if (!pidarray)
|
|
|
|
+ goto err1;
|
|
|
|
+
|
|
|
|
+ npids = pid_array_load(pidarray, npids, cont);
|
|
|
|
+ sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
|
|
|
|
+
|
|
|
|
+ /* Call pid_array_to_buf() twice, first just to get bufsz */
|
|
|
|
+ ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
|
|
|
|
+ ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
|
|
|
|
+ if (!ctr->buf)
|
|
|
|
+ goto err2;
|
|
|
|
+ ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
|
|
|
|
+
|
|
|
|
+ kfree(pidarray);
|
|
|
|
+ } else {
|
|
|
|
+ ctr->buf = 0;
|
|
|
|
+ ctr->bufsz = 0;
|
|
|
|
+ }
|
|
|
|
+ file->private_data = ctr;
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+err2:
|
|
|
|
+ kfree(pidarray);
|
|
|
|
+err1:
|
|
|
|
+ kfree(ctr);
|
|
|
|
+err0:
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static ssize_t cgroup_tasks_read(struct cgroup *cont,
|
|
|
|
+ struct cftype *cft,
|
|
|
|
+ struct file *file, char __user *buf,
|
|
|
|
+ size_t nbytes, loff_t *ppos)
|
|
|
|
+{
|
|
|
|
+ struct ctr_struct *ctr = file->private_data;
|
|
|
|
+
|
|
|
|
+ return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int cgroup_tasks_release(struct inode *unused_inode,
|
|
|
|
+ struct file *file)
|
|
|
|
+{
|
|
|
|
+ struct ctr_struct *ctr;
|
|
|
|
+
|
|
|
|
+ if (file->f_mode & FMODE_READ) {
|
|
|
|
+ ctr = file->private_data;
|
|
|
|
+ kfree(ctr->buf);
|
|
|
|
+ kfree(ctr);
|
|
|
|
+ }
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * for the common functions, 'private' gives the type of file
|
|
|
|
+ */
|
|
|
|
+static struct cftype cft_tasks = {
|
|
|
|
+ .name = "tasks",
|
|
|
|
+ .open = cgroup_tasks_open,
|
|
|
|
+ .read = cgroup_tasks_read,
|
|
|
|
+ .write = cgroup_common_file_write,
|
|
|
|
+ .release = cgroup_tasks_release,
|
|
|
|
+ .private = FILE_TASKLIST,
|
|
|
|
+};
|
|
|
|
+
|
|
static int cgroup_populate_dir(struct cgroup *cont)
|
|
static int cgroup_populate_dir(struct cgroup *cont)
|
|
{
|
|
{
|
|
int err;
|
|
int err;
|
|
@@ -922,6 +1275,10 @@ static int cgroup_populate_dir(struct cgroup *cont)
|
|
/* First clear out any existing files */
|
|
/* First clear out any existing files */
|
|
cgroup_clear_directory(cont->dentry);
|
|
cgroup_clear_directory(cont->dentry);
|
|
|
|
|
|
|
|
+ err = cgroup_add_file(cont, NULL, &cft_tasks);
|
|
|
|
+ if (err < 0)
|
|
|
|
+ return err;
|
|
|
|
+
|
|
for_each_subsys(cont->root, ss) {
|
|
for_each_subsys(cont->root, ss) {
|
|
if (ss->populate && (err = ss->populate(ss, cont)) < 0)
|
|
if (ss->populate && (err = ss->populate(ss, cont)) < 0)
|
|
return err;
|
|
return err;
|