|
@@ -6355,6 +6355,42 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
|
|
|
static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
|
|
+ /*
|
|
|
+ * XXX: css_offline() would be where we should reparent all
|
|
|
+ * memory to prepare the cgroup for destruction. However,
|
|
|
+ * memcg does not do css_tryget() and res_counter charging
|
|
|
+ * under the same RCU lock region, which means that charging
|
|
|
+ * could race with offlining. Offlining only happens to
|
|
|
+ * cgroups with no tasks in them but charges can show up
|
|
|
+ * without any tasks from the swapin path when the target
|
|
|
+ * memcg is looked up from the swapout record and not from the
|
|
|
+ * current task as it usually is. A race like this can leak
|
|
|
+ * charges and put pages with stale cgroup pointers into
|
|
|
+ * circulation:
|
|
|
+ *
|
|
|
+ * #0 #1
|
|
|
+ * lookup_swap_cgroup_id()
|
|
|
+ * rcu_read_lock()
|
|
|
+ * mem_cgroup_lookup()
|
|
|
+ * css_tryget()
|
|
|
+ * rcu_read_unlock()
|
|
|
+ * disable css_tryget()
|
|
|
+ * call_rcu()
|
|
|
+ * offline_css()
|
|
|
+ * reparent_charges()
|
|
|
+ * res_counter_charge()
|
|
|
+ * css_put()
|
|
|
+ * css_free()
|
|
|
+ * pc->mem_cgroup = dead memcg
|
|
|
+ * add page to lru
|
|
|
+ *
|
|
|
+ * The bulk of the charges are still moved in offline_css() to
|
|
|
+ * avoid pinning a lot of pages in case a long-term reference
|
|
|
+ * like a swapout record is deferring the css_free() to long
|
|
|
+ * after offlining. But this makes sure we catch any charges
|
|
|
+ * made after offlining:
|
|
|
+ */
|
|
|
+ mem_cgroup_reparent_charges(memcg);
|
|
|
|
|
|
memcg_destroy_kmem(memcg);
|
|
|
__mem_cgroup_free(memcg);
|