|
@@ -622,13 +622,18 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
|
|
|
* Call subsys's pre_destroy handler.
|
|
|
* This is called before css refcnt check.
|
|
|
*/
|
|
|
-static void cgroup_call_pre_destroy(struct cgroup *cgrp)
|
|
|
+static int cgroup_call_pre_destroy(struct cgroup *cgrp)
|
|
|
{
|
|
|
struct cgroup_subsys *ss;
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
for_each_subsys(cgrp->root, ss)
|
|
|
- if (ss->pre_destroy)
|
|
|
- ss->pre_destroy(ss, cgrp);
|
|
|
- return;
|
|
|
+ if (ss->pre_destroy) {
|
|
|
+ ret = ss->pre_destroy(ss, cgrp);
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
static void free_cgroup_rcu(struct rcu_head *obj)
|
|
@@ -722,6 +727,22 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
|
|
|
remove_dir(dentry);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
|
|
|
+ * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
|
|
|
+ * reference to css->refcnt. In general, this refcnt is expected to goes down
|
|
|
+ * to zero, soon.
|
|
|
+ *
|
|
|
+ * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex;
|
|
|
+ */
|
|
|
+DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
|
|
|
+
|
|
|
+static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
|
|
|
+{
|
|
|
+ if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
|
|
|
+ wake_up_all(&cgroup_rmdir_waitq);
|
|
|
+}
|
|
|
+
|
|
|
static int rebind_subsystems(struct cgroupfs_root *root,
|
|
|
unsigned long final_bits)
|
|
|
{
|
|
@@ -1317,6 +1338,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
|
|
|
set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
|
|
|
synchronize_rcu();
|
|
|
put_css_set(cg);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * wake up rmdir() waiter. the rmdir should fail since the cgroup
|
|
|
+ * is no longer empty.
|
|
|
+ */
|
|
|
+ cgroup_wakeup_rmdir_waiters(cgrp);
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -2608,9 +2635,11 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
|
|
|
struct cgroup *cgrp = dentry->d_fsdata;
|
|
|
struct dentry *d;
|
|
|
struct cgroup *parent;
|
|
|
+ DEFINE_WAIT(wait);
|
|
|
+ int ret;
|
|
|
|
|
|
/* the vfs holds both inode->i_mutex already */
|
|
|
-
|
|
|
+again:
|
|
|
mutex_lock(&cgroup_mutex);
|
|
|
if (atomic_read(&cgrp->count) != 0) {
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
@@ -2626,17 +2655,39 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
|
|
|
* Call pre_destroy handlers of subsys. Notify subsystems
|
|
|
* that rmdir() request comes.
|
|
|
*/
|
|
|
- cgroup_call_pre_destroy(cgrp);
|
|
|
+ ret = cgroup_call_pre_destroy(cgrp);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
|
|
|
mutex_lock(&cgroup_mutex);
|
|
|
parent = cgrp->parent;
|
|
|
-
|
|
|
- if (atomic_read(&cgrp->count)
|
|
|
- || !list_empty(&cgrp->children)
|
|
|
- || !cgroup_clear_css_refs(cgrp)) {
|
|
|
+ if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
|
return -EBUSY;
|
|
|
}
|
|
|
+ /*
|
|
|
+ * css_put/get is provided for subsys to grab refcnt to css. In typical
|
|
|
+ * case, subsystem has no reference after pre_destroy(). But, under
|
|
|
+ * hierarchy management, some *temporal* refcnt can be hold.
|
|
|
+ * To avoid returning -EBUSY to a user, waitqueue is used. If subsys
|
|
|
+ * is really busy, it should return -EBUSY at pre_destroy(). wake_up
|
|
|
+ * is called when css_put() is called and refcnt goes down to 0.
|
|
|
+ */
|
|
|
+ set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
+ prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
|
|
|
+
|
|
|
+ if (!cgroup_clear_css_refs(cgrp)) {
|
|
|
+ mutex_unlock(&cgroup_mutex);
|
|
|
+ schedule();
|
|
|
+ finish_wait(&cgroup_rmdir_waitq, &wait);
|
|
|
+ clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
+ if (signal_pending(current))
|
|
|
+ return -EINTR;
|
|
|
+ goto again;
|
|
|
+ }
|
|
|
+ /* NO css_tryget() can success after here. */
|
|
|
+ finish_wait(&cgroup_rmdir_waitq, &wait);
|
|
|
+ clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
|
|
|
spin_lock(&release_list_lock);
|
|
|
set_bit(CGRP_REMOVED, &cgrp->flags);
|
|
@@ -3194,10 +3245,12 @@ void __css_put(struct cgroup_subsys_state *css)
|
|
|
{
|
|
|
struct cgroup *cgrp = css->cgroup;
|
|
|
rcu_read_lock();
|
|
|
- if ((atomic_dec_return(&css->refcnt) == 1) &&
|
|
|
- notify_on_release(cgrp)) {
|
|
|
- set_bit(CGRP_RELEASABLE, &cgrp->flags);
|
|
|
- check_for_release(cgrp);
|
|
|
+ if (atomic_dec_return(&css->refcnt) == 1) {
|
|
|
+ if (notify_on_release(cgrp)) {
|
|
|
+ set_bit(CGRP_RELEASABLE, &cgrp->flags);
|
|
|
+ check_for_release(cgrp);
|
|
|
+ }
|
|
|
+ cgroup_wakeup_rmdir_waiters(cgrp);
|
|
|
}
|
|
|
rcu_read_unlock();
|
|
|
}
|