|
@@ -152,12 +152,15 @@ struct mem_cgroup_stat_cpu {
|
|
|
};
|
|
|
|
|
|
struct mem_cgroup_reclaim_iter {
|
|
|
- /* last scanned hierarchy member with elevated css ref count */
|
|
|
+ /*
|
|
|
+ * last scanned hierarchy member. Valid only if last_dead_count
|
|
|
+ * matches memcg->dead_count of the hierarchy root group.
|
|
|
+ */
|
|
|
struct mem_cgroup *last_visited;
|
|
|
+ unsigned long last_dead_count;
|
|
|
+
|
|
|
/* scan generation, increased every round-trip */
|
|
|
unsigned int generation;
|
|
|
- /* lock to protect the position and generation */
|
|
|
- spinlock_t iter_lock;
|
|
|
};
|
|
|
|
|
|
/*
|
|
@@ -337,6 +340,7 @@ struct mem_cgroup {
|
|
|
struct mem_cgroup_stat_cpu nocpu_base;
|
|
|
spinlock_t pcp_counter_lock;
|
|
|
|
|
|
+ atomic_t dead_count;
|
|
|
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
|
|
|
struct tcp_memcontrol tcp_mem;
|
|
|
#endif
|
|
@@ -1092,6 +1096,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
|
|
{
|
|
|
struct mem_cgroup *memcg = NULL;
|
|
|
struct mem_cgroup *last_visited = NULL;
|
|
|
+ unsigned long uninitialized_var(dead_count);
|
|
|
|
|
|
if (mem_cgroup_disabled())
|
|
|
return NULL;
|
|
@@ -1120,16 +1125,33 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
|
|
|
|
|
mz = mem_cgroup_zoneinfo(root, nid, zid);
|
|
|
iter = &mz->reclaim_iter[reclaim->priority];
|
|
|
- spin_lock(&iter->iter_lock);
|
|
|
last_visited = iter->last_visited;
|
|
|
if (prev && reclaim->generation != iter->generation) {
|
|
|
- if (last_visited) {
|
|
|
- css_put(&last_visited->css);
|
|
|
- iter->last_visited = NULL;
|
|
|
- }
|
|
|
- spin_unlock(&iter->iter_lock);
|
|
|
+ iter->last_visited = NULL;
|
|
|
goto out_unlock;
|
|
|
}
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the dead_count mismatches, a destruction
|
|
|
+ * has happened or is happening concurrently.
|
|
|
+ * If the dead_count matches, a destruction
|
|
|
+ * might still happen concurrently, but since
|
|
|
+ * we checked under RCU, that destruction
|
|
|
+ * won't free the object until we release the
|
|
|
+ * RCU reader lock. Thus, the dead_count
|
|
|
+ * check verifies the pointer is still valid,
|
|
|
+ * css_tryget() verifies the cgroup pointed to
|
|
|
+ * is alive.
|
|
|
+ */
|
|
|
+ dead_count = atomic_read(&root->dead_count);
|
|
|
+ smp_rmb();
|
|
|
+ last_visited = iter->last_visited;
|
|
|
+ if (last_visited) {
|
|
|
+ if ((dead_count != iter->last_dead_count) ||
|
|
|
+ !css_tryget(&last_visited->css)) {
|
|
|
+ last_visited = NULL;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1169,16 +1191,14 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
|
|
if (css && !memcg)
|
|
|
curr = mem_cgroup_from_css(css);
|
|
|
|
|
|
- /* make sure that the cached memcg is not removed */
|
|
|
- if (curr)
|
|
|
- css_get(&curr->css);
|
|
|
iter->last_visited = curr;
|
|
|
+ smp_wmb();
|
|
|
+ iter->last_dead_count = dead_count;
|
|
|
|
|
|
if (!css)
|
|
|
iter->generation++;
|
|
|
else if (!prev && memcg)
|
|
|
reclaim->generation = iter->generation;
|
|
|
- spin_unlock(&iter->iter_lock);
|
|
|
} else if (css && !memcg) {
|
|
|
last_visited = mem_cgroup_from_css(css);
|
|
|
}
|
|
@@ -5975,12 +5995,8 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
|
|
|
return 1;
|
|
|
|
|
|
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
|
|
- int prio;
|
|
|
-
|
|
|
mz = &pn->zoneinfo[zone];
|
|
|
lruvec_init(&mz->lruvec);
|
|
|
- for (prio = 0; prio < DEF_PRIORITY + 1; prio++)
|
|
|
- spin_lock_init(&mz->reclaim_iter[prio].iter_lock);
|
|
|
mz->usage_in_excess = 0;
|
|
|
mz->on_tree = false;
|
|
|
mz->memcg = memcg;
|
|
@@ -6235,10 +6251,29 @@ mem_cgroup_css_online(struct cgroup *cont)
|
|
|
return error;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Announce all parents that a group from their hierarchy is gone.
|
|
|
+ */
|
|
|
+static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
|
|
|
+{
|
|
|
+ struct mem_cgroup *parent = memcg;
|
|
|
+
|
|
|
+ while ((parent = parent_mem_cgroup(parent)))
|
|
|
+ atomic_inc(&parent->dead_count);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * if the root memcg is not hierarchical we have to check it
|
|
|
+ * explicitely.
|
|
|
+ */
|
|
|
+ if (!root_mem_cgroup->use_hierarchy)
|
|
|
+ atomic_inc(&root_mem_cgroup->dead_count);
|
|
|
+}
|
|
|
+
|
|
|
static void mem_cgroup_css_offline(struct cgroup *cont)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
|
|
|
|
|
|
+ mem_cgroup_invalidate_reclaim_iterators(memcg);
|
|
|
mem_cgroup_reparent_charges(memcg);
|
|
|
mem_cgroup_destroy_all_caches(memcg);
|
|
|
}
|