|
@@ -735,16 +735,28 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
|
|
|
* reference to css->refcnt. In general, this refcnt is expected to goes down
|
|
|
* to zero, soon.
|
|
|
*
|
|
|
- * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex;
|
|
|
+ * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
|
|
|
*/
|
|
|
DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
|
|
|
|
|
|
-static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
|
|
|
+static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
|
|
|
{
|
|
|
- if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
|
|
|
+ if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
|
|
|
wake_up_all(&cgroup_rmdir_waitq);
|
|
|
}
|
|
|
|
|
|
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
|
|
|
+{
|
|
|
+ css_get(css);
|
|
|
+}
|
|
|
+
|
|
|
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
|
|
|
+{
|
|
|
+ cgroup_wakeup_rmdir_waiter(css->cgroup);
|
|
|
+ css_put(css);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
static int rebind_subsystems(struct cgroupfs_root *root,
|
|
|
unsigned long final_bits)
|
|
|
{
|
|
@@ -1359,7 +1371,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
|
|
|
* wake up rmdir() waiter. the rmdir should fail since the cgroup
|
|
|
* is no longer empty.
|
|
|
*/
|
|
|
- cgroup_wakeup_rmdir_waiters(cgrp);
|
|
|
+ cgroup_wakeup_rmdir_waiter(cgrp);
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -2743,34 +2755,43 @@ again:
|
|
|
}
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
|
|
|
|
+ /*
|
|
|
+ * In general, subsystem has no css->refcnt after pre_destroy(). But
|
|
|
+ * in racy cases, subsystem may have to get css->refcnt after
|
|
|
+ * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
|
|
|
+ * make rmdir return -EBUSY too often. To avoid that, we use waitqueue
|
|
|
+ * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
|
|
|
+ * and subsystem's reference count handling. Please see css_get/put
|
|
|
+ * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
|
|
|
+ */
|
|
|
+ set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
+
|
|
|
/*
|
|
|
* Call pre_destroy handlers of subsys. Notify subsystems
|
|
|
* that rmdir() request comes.
|
|
|
*/
|
|
|
ret = cgroup_call_pre_destroy(cgrp);
|
|
|
- if (ret)
|
|
|
+ if (ret) {
|
|
|
+ clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
return ret;
|
|
|
+ }
|
|
|
|
|
|
mutex_lock(&cgroup_mutex);
|
|
|
parent = cgrp->parent;
|
|
|
if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
|
|
|
+ clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
|
return -EBUSY;
|
|
|
}
|
|
|
- /*
|
|
|
- * css_put/get is provided for subsys to grab refcnt to css. In typical
|
|
|
- * case, subsystem has no reference after pre_destroy(). But, under
|
|
|
- * hierarchy management, some *temporal* refcnt can be hold.
|
|
|
- * To avoid returning -EBUSY to a user, waitqueue is used. If subsys
|
|
|
- * is really busy, it should return -EBUSY at pre_destroy(). wake_up
|
|
|
- * is called when css_put() is called and refcnt goes down to 0.
|
|
|
- */
|
|
|
- set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
|
|
|
-
|
|
|
if (!cgroup_clear_css_refs(cgrp)) {
|
|
|
mutex_unlock(&cgroup_mutex);
|
|
|
- schedule();
|
|
|
+ /*
|
|
|
+ * Because someone may call cgroup_wakeup_rmdir_waiter() before
|
|
|
+ * prepare_to_wait(), we need to check this flag.
|
|
|
+ */
|
|
|
+ if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
|
|
|
+ schedule();
|
|
|
finish_wait(&cgroup_rmdir_waitq, &wait);
|
|
|
clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
|
|
|
if (signal_pending(current))
|
|
@@ -3342,7 +3363,7 @@ void __css_put(struct cgroup_subsys_state *css)
|
|
|
set_bit(CGRP_RELEASABLE, &cgrp->flags);
|
|
|
check_for_release(cgrp);
|
|
|
}
|
|
|
- cgroup_wakeup_rmdir_waiters(cgrp);
|
|
|
+ cgroup_wakeup_rmdir_waiter(cgrp);
|
|
|
}
|
|
|
rcu_read_unlock();
|
|
|
}
|