|
@@ -138,6 +138,7 @@ static const char * const mem_cgroup_lru_names[] = {
|
|
|
*/
|
|
|
enum mem_cgroup_events_target {
|
|
|
MEM_CGROUP_TARGET_THRESH,
|
|
|
+ MEM_CGROUP_TARGET_SOFTLIMIT,
|
|
|
MEM_CGROUP_TARGET_NUMAINFO,
|
|
|
MEM_CGROUP_NTARGETS,
|
|
|
};
|
|
@@ -315,6 +316,22 @@ struct mem_cgroup {
|
|
|
atomic_t numainfo_events;
|
|
|
atomic_t numainfo_updating;
|
|
|
#endif
|
|
|
+ /*
|
|
|
+ * Protects soft_contributed transitions.
|
|
|
+ * See mem_cgroup_update_soft_limit
|
|
|
+ */
|
|
|
+ spinlock_t soft_lock;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If true then this group has increased parents' children_in_excess
|
|
|
+ * when it got over the soft limit.
|
|
|
+ * When a group falls bellow the soft limit, parents' children_in_excess
|
|
|
+ * is decreased and soft_contributed changed to false.
|
|
|
+ */
|
|
|
+ bool soft_contributed;
|
|
|
+
|
|
|
+ /* Number of children that are in soft limit excess */
|
|
|
+ atomic_t children_in_excess;
|
|
|
|
|
|
struct mem_cgroup_per_node *nodeinfo[0];
|
|
|
/* WARNING: nodeinfo must be the last member here */
|
|
@@ -802,6 +819,9 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
|
|
|
case MEM_CGROUP_TARGET_THRESH:
|
|
|
next = val + THRESHOLDS_EVENTS_TARGET;
|
|
|
break;
|
|
|
+ case MEM_CGROUP_TARGET_SOFTLIMIT:
|
|
|
+ next = val + SOFTLIMIT_EVENTS_TARGET;
|
|
|
+ break;
|
|
|
case MEM_CGROUP_TARGET_NUMAINFO:
|
|
|
next = val + NUMAINFO_EVENTS_TARGET;
|
|
|
break;
|
|
@@ -814,6 +834,42 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Called from rate-limitted memcg_check_events when enough
|
|
|
+ * MEM_CGROUP_TARGET_SOFTLIMIT events are accumulated and it makes sure
|
|
|
+ * that all the parents up the hierarchy will be noticed that this group
|
|
|
+ * is in excess or that it is not in excess anymore. mmecg->soft_contributed
|
|
|
+ * makes the transition a single action whenever the state flips from one to
|
|
|
+ * other.
|
|
|
+ */
|
|
|
+static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg)
|
|
|
+{
|
|
|
+ unsigned long long excess = res_counter_soft_limit_excess(&memcg->res);
|
|
|
+ struct mem_cgroup *parent = memcg;
|
|
|
+ int delta = 0;
|
|
|
+
|
|
|
+ spin_lock(&memcg->soft_lock);
|
|
|
+ if (excess) {
|
|
|
+ if (!memcg->soft_contributed) {
|
|
|
+ delta = 1;
|
|
|
+ memcg->soft_contributed = true;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if (memcg->soft_contributed) {
|
|
|
+ delta = -1;
|
|
|
+ memcg->soft_contributed = false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Necessary to update all ancestors when hierarchy is used
|
|
|
+ * because their event counter is not touched.
|
|
|
+ */
|
|
|
+ while (delta && (parent = parent_mem_cgroup(parent)))
|
|
|
+ atomic_add(delta, &parent->children_in_excess);
|
|
|
+ spin_unlock(&memcg->soft_lock);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Check events in order.
|
|
|
*
|
|
@@ -824,8 +880,11 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
|
|
|
/* threshold event is triggered in finer grain than soft limit */
|
|
|
if (unlikely(mem_cgroup_event_ratelimit(memcg,
|
|
|
MEM_CGROUP_TARGET_THRESH))) {
|
|
|
+ bool do_softlimit;
|
|
|
bool do_numainfo __maybe_unused;
|
|
|
|
|
|
+ do_softlimit = mem_cgroup_event_ratelimit(memcg,
|
|
|
+ MEM_CGROUP_TARGET_SOFTLIMIT);
|
|
|
#if MAX_NUMNODES > 1
|
|
|
do_numainfo = mem_cgroup_event_ratelimit(memcg,
|
|
|
MEM_CGROUP_TARGET_NUMAINFO);
|
|
@@ -833,6 +892,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
|
|
|
preempt_enable();
|
|
|
|
|
|
mem_cgroup_threshold(memcg);
|
|
|
+ if (unlikely(do_softlimit))
|
|
|
+ mem_cgroup_update_soft_limit(memcg);
|
|
|
#if MAX_NUMNODES > 1
|
|
|
if (unlikely(do_numainfo))
|
|
|
atomic_inc(&memcg->numainfo_events);
|
|
@@ -1816,6 +1877,9 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
|
|
|
* hierarchy if
|
|
|
* a) it is over its soft limit
|
|
|
* b) any parent up the hierarchy is over its soft limit
|
|
|
+ *
|
|
|
+ * If the given group doesn't have any children over the limit then it
|
|
|
+ * doesn't make any sense to iterate its subtree.
|
|
|
*/
|
|
|
enum mem_cgroup_filter_t
|
|
|
mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
|
|
@@ -1837,6 +1901,8 @@ mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
+ if (!atomic_read(&memcg->children_in_excess))
|
|
|
+ return SKIP_TREE;
|
|
|
return SKIP;
|
|
|
}
|
|
|
|
|
@@ -5892,6 +5958,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
|
|
mutex_init(&memcg->thresholds_lock);
|
|
|
spin_lock_init(&memcg->move_lock);
|
|
|
vmpressure_init(&memcg->vmpressure);
|
|
|
+ spin_lock_init(&memcg->soft_lock);
|
|
|
|
|
|
return &memcg->css;
|
|
|
|
|
@@ -5969,6 +6036,10 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
|
|
|
|
|
|
mem_cgroup_invalidate_reclaim_iterators(memcg);
|
|
|
mem_cgroup_reparent_charges(memcg);
|
|
|
+ if (memcg->soft_contributed) {
|
|
|
+ while ((memcg = parent_mem_cgroup(memcg)))
|
|
|
+ atomic_dec(&memcg->children_in_excess);
|
|
|
+ }
|
|
|
mem_cgroup_destroy_all_caches(memcg);
|
|
|
vmpressure_cleanup(&memcg->vmpressure);
|
|
|
}
|