|
@@ -4,6 +4,10 @@
|
|
|
* Based originally on the cpuset system, extracted by Paul Menage
|
|
|
* Copyright (C) 2006 Google, Inc
|
|
|
*
|
|
|
+ * Notifications support
|
|
|
+ * Copyright (C) 2009 Nokia Corporation
|
|
|
+ * Author: Kirill A. Shutemov
|
|
|
+ *
|
|
|
* Copyright notices from the original cpuset code:
|
|
|
* --------------------------------------------------
|
|
|
* Copyright (C) 2003 BULL SA.
|
|
@@ -53,6 +57,8 @@
|
|
|
#include <linux/pid_namespace.h>
|
|
|
#include <linux/idr.h>
|
|
|
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
|
|
|
+#include <linux/eventfd.h>
|
|
|
+#include <linux/poll.h>
|
|
|
|
|
|
#include <asm/atomic.h>
|
|
|
|
|
@@ -152,6 +158,35 @@ struct css_id {
|
|
|
unsigned short stack[0]; /* Array of Length (depth+1) */
|
|
|
};
|
|
|
|
|
|
+/*
|
|
|
+ * cgroup_event represents events which userspace want to recieve.
|
|
|
+ */
|
|
|
+struct cgroup_event {
|
|
|
+ /*
|
|
|
+ * Cgroup which the event belongs to.
|
|
|
+ */
|
|
|
+ struct cgroup *cgrp;
|
|
|
+ /*
|
|
|
+ * Control file which the event associated.
|
|
|
+ */
|
|
|
+ struct cftype *cft;
|
|
|
+ /*
|
|
|
+ * eventfd to signal userspace about the event.
|
|
|
+ */
|
|
|
+ struct eventfd_ctx *eventfd;
|
|
|
+ /*
|
|
|
+ * Each of these stored in a list by the cgroup.
|
|
|
+ */
|
|
|
+ struct list_head list;
|
|
|
+ /*
|
|
|
+ * All fields below needed to unregister event when
|
|
|
+ * userspace closes eventfd.
|
|
|
+ */
|
|
|
+ poll_table pt;
|
|
|
+ wait_queue_head_t *wqh;
|
|
|
+ wait_queue_t wait;
|
|
|
+ struct work_struct remove;
|
|
|
+};
|
|
|
|
|
|
/* The list of hierarchy roots */
|
|
|
|
|
@@ -760,14 +795,28 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
|
|
|
static int cgroup_call_pre_destroy(struct cgroup *cgrp)
|
|
|
{
|
|
|
struct cgroup_subsys *ss;
|
|
|
+ struct cgroup_event *event, *tmp;
|
|
|
int ret = 0;
|
|
|
|
|
|
for_each_subsys(cgrp->root, ss)
|
|
|
if (ss->pre_destroy) {
|
|
|
ret = ss->pre_destroy(ss, cgrp);
|
|
|
if (ret)
|
|
|
- break;
|
|
|
+ goto out;
|
|
|
}
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Unregister events and notify userspace.
|
|
|
+ */
|
|
|
+ spin_lock(&cgrp->event_list_lock);
|
|
|
+ list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
|
|
|
+ list_del(&event->list);
|
|
|
+ eventfd_signal(event->eventfd, 1);
|
|
|
+ schedule_work(&event->remove);
|
|
|
+ }
|
|
|
+ spin_unlock(&cgrp->event_list_lock);
|
|
|
+
|
|
|
+out:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -1239,6 +1288,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
|
|
|
INIT_LIST_HEAD(&cgrp->release_list);
|
|
|
INIT_LIST_HEAD(&cgrp->pidlists);
|
|
|
mutex_init(&cgrp->pidlist_mutex);
|
|
|
+ INIT_LIST_HEAD(&cgrp->event_list);
|
|
|
+ spin_lock_init(&cgrp->event_list_lock);
|
|
|
}
|
|
|
|
|
|
static void init_cgroup_root(struct cgroupfs_root *root)
|
|
@@ -2077,6 +2128,16 @@ static const struct inode_operations cgroup_dir_inode_operations = {
|
|
|
.rename = cgroup_rename,
|
|
|
};
|
|
|
|
|
|
+/*
|
|
|
+ * Check if a file is a control file
|
|
|
+ */
|
|
|
+static inline struct cftype *__file_cft(struct file *file)
|
|
|
+{
|
|
|
+ if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
|
|
|
+ return ERR_PTR(-EINVAL);
|
|
|
+ return __d_cft(file->f_dentry);
|
|
|
+}
|
|
|
+
|
|
|
static int cgroup_create_file(struct dentry *dentry, mode_t mode,
|
|
|
struct super_block *sb)
|
|
|
{
|
|
@@ -2930,6 +2991,166 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Unregister event and free resources.
|
|
|
+ *
|
|
|
+ * Gets called from workqueue.
|
|
|
+ */
|
|
|
+static void cgroup_event_remove(struct work_struct *work)
|
|
|
+{
|
|
|
+ struct cgroup_event *event = container_of(work, struct cgroup_event,
|
|
|
+ remove);
|
|
|
+ struct cgroup *cgrp = event->cgrp;
|
|
|
+
|
|
|
+ /* TODO: check return code */
|
|
|
+ event->cft->unregister_event(cgrp, event->cft, event->eventfd);
|
|
|
+
|
|
|
+ eventfd_ctx_put(event->eventfd);
|
|
|
+ remove_wait_queue(event->wqh, &event->wait);
|
|
|
+ kfree(event);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Gets called on POLLHUP on eventfd when user closes it.
|
|
|
+ *
|
|
|
+ * Called with wqh->lock held and interrupts disabled.
|
|
|
+ */
|
|
|
+static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
|
|
|
+ int sync, void *key)
|
|
|
+{
|
|
|
+ struct cgroup_event *event = container_of(wait,
|
|
|
+ struct cgroup_event, wait);
|
|
|
+ struct cgroup *cgrp = event->cgrp;
|
|
|
+ unsigned long flags = (unsigned long)key;
|
|
|
+
|
|
|
+ if (flags & POLLHUP) {
|
|
|
+ spin_lock(&cgrp->event_list_lock);
|
|
|
+ list_del(&event->list);
|
|
|
+ spin_unlock(&cgrp->event_list_lock);
|
|
|
+ /*
|
|
|
+ * We are in atomic context, but cgroup_event_remove() may
|
|
|
+ * sleep, so we have to call it in workqueue.
|
|
|
+ */
|
|
|
+ schedule_work(&event->remove);
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void cgroup_event_ptable_queue_proc(struct file *file,
|
|
|
+ wait_queue_head_t *wqh, poll_table *pt)
|
|
|
+{
|
|
|
+ struct cgroup_event *event = container_of(pt,
|
|
|
+ struct cgroup_event, pt);
|
|
|
+
|
|
|
+ event->wqh = wqh;
|
|
|
+ add_wait_queue(wqh, &event->wait);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Parse input and register new cgroup event handler.
|
|
|
+ *
|
|
|
+ * Input must be in format '<event_fd> <control_fd> <args>'.
|
|
|
+ * Interpretation of args is defined by control file implementation.
|
|
|
+ */
|
|
|
+static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
|
|
|
+ const char *buffer)
|
|
|
+{
|
|
|
+ struct cgroup_event *event = NULL;
|
|
|
+ unsigned int efd, cfd;
|
|
|
+ struct file *efile = NULL;
|
|
|
+ struct file *cfile = NULL;
|
|
|
+ char *endp;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ efd = simple_strtoul(buffer, &endp, 10);
|
|
|
+ if (*endp != ' ')
|
|
|
+ return -EINVAL;
|
|
|
+ buffer = endp + 1;
|
|
|
+
|
|
|
+ cfd = simple_strtoul(buffer, &endp, 10);
|
|
|
+ if ((*endp != ' ') && (*endp != '\0'))
|
|
|
+ return -EINVAL;
|
|
|
+ buffer = endp + 1;
|
|
|
+
|
|
|
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
|
|
|
+ if (!event)
|
|
|
+ return -ENOMEM;
|
|
|
+ event->cgrp = cgrp;
|
|
|
+ INIT_LIST_HEAD(&event->list);
|
|
|
+ init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
|
|
|
+ init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
|
|
|
+ INIT_WORK(&event->remove, cgroup_event_remove);
|
|
|
+
|
|
|
+ efile = eventfd_fget(efd);
|
|
|
+ if (IS_ERR(efile)) {
|
|
|
+ ret = PTR_ERR(efile);
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+
|
|
|
+ event->eventfd = eventfd_ctx_fileget(efile);
|
|
|
+ if (IS_ERR(event->eventfd)) {
|
|
|
+ ret = PTR_ERR(event->eventfd);
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+
|
|
|
+ cfile = fget(cfd);
|
|
|
+ if (!cfile) {
|
|
|
+ ret = -EBADF;
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* the process need read permission on control file */
|
|
|
+ ret = file_permission(cfile, MAY_READ);
|
|
|
+ if (ret < 0)
|
|
|
+ goto fail;
|
|
|
+
|
|
|
+ event->cft = __file_cft(cfile);
|
|
|
+ if (IS_ERR(event->cft)) {
|
|
|
+ ret = PTR_ERR(event->cft);
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!event->cft->register_event || !event->cft->unregister_event) {
|
|
|
+ ret = -EINVAL;
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+
|
|
|
+ ret = event->cft->register_event(cgrp, event->cft,
|
|
|
+ event->eventfd, buffer);
|
|
|
+ if (ret)
|
|
|
+ goto fail;
|
|
|
+
|
|
|
+ if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
|
|
|
+ event->cft->unregister_event(cgrp, event->cft, event->eventfd);
|
|
|
+ ret = 0;
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+
|
|
|
+ spin_lock(&cgrp->event_list_lock);
|
|
|
+ list_add(&event->list, &cgrp->event_list);
|
|
|
+ spin_unlock(&cgrp->event_list_lock);
|
|
|
+
|
|
|
+ fput(cfile);
|
|
|
+ fput(efile);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+
|
|
|
+fail:
|
|
|
+ if (cfile)
|
|
|
+ fput(cfile);
|
|
|
+
|
|
|
+ if (event && event->eventfd && !IS_ERR(event->eventfd))
|
|
|
+ eventfd_ctx_put(event->eventfd);
|
|
|
+
|
|
|
+ if (!IS_ERR_OR_NULL(efile))
|
|
|
+ fput(efile);
|
|
|
+
|
|
|
+ kfree(event);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* for the common functions, 'private' gives the type of file
|
|
|
*/
|
|
@@ -2955,6 +3176,11 @@ static struct cftype files[] = {
|
|
|
.read_u64 = cgroup_read_notify_on_release,
|
|
|
.write_u64 = cgroup_write_notify_on_release,
|
|
|
},
|
|
|
+ {
|
|
|
+ .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
|
|
|
+ .write_string = cgroup_write_event_control,
|
|
|
+ .mode = S_IWUGO,
|
|
|
+ },
|
|
|
};
|
|
|
|
|
|
static struct cftype cft_release_agent = {
|