|
@@ -1991,28 +1991,57 @@ static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
-/*
|
|
|
- * A group is eligible for the soft limit reclaim if
|
|
|
- * a) it is over its soft limit
|
|
|
- * b) any parent up the hierarchy is over its soft limit
|
|
|
- */
|
|
|
-bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg)
|
|
|
-{
|
|
|
- struct mem_cgroup *parent = memcg;
|
|
|
-
|
|
|
- if (res_counter_soft_limit_excess(&memcg->res))
|
|
|
- return true;
|
|
|
-
|
|
|
- /*
|
|
|
- * If any parent up the hierarchy is over its soft limit then we
|
|
|
- * have to obey and reclaim from this group as well.
|
|
|
- */
|
|
|
- while ((parent = parent_mem_cgroup(parent))) {
|
|
|
- if (res_counter_soft_limit_excess(&parent->res))
|
|
|
- return true;
|
|
|
+static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
|
|
|
+ struct zone *zone,
|
|
|
+ gfp_t gfp_mask,
|
|
|
+ unsigned long *total_scanned)
|
|
|
+{
|
|
|
+ struct mem_cgroup *victim = NULL;
|
|
|
+ int total = 0;
|
|
|
+ int loop = 0;
|
|
|
+ unsigned long excess;
|
|
|
+ unsigned long nr_scanned;
|
|
|
+ struct mem_cgroup_reclaim_cookie reclaim = {
|
|
|
+ .zone = zone,
|
|
|
+ .priority = 0,
|
|
|
+ };
|
|
|
+
|
|
|
+ excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
|
|
|
+
|
|
|
+ while (1) {
|
|
|
+ victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
|
|
|
+ if (!victim) {
|
|
|
+ loop++;
|
|
|
+ if (loop >= 2) {
|
|
|
+ /*
|
|
|
+ * If we have not been able to reclaim
|
|
|
+ * anything, it might because there are
|
|
|
+ * no reclaimable pages under this hierarchy
|
|
|
+ */
|
|
|
+ if (!total)
|
|
|
+ break;
|
|
|
+ /*
|
|
|
+ * We want to do more targeted reclaim.
|
|
|
+ * excess >> 2 is not to excessive so as to
|
|
|
+ * reclaim too much, nor too less that we keep
|
|
|
+ * coming back to reclaim from this cgroup
|
|
|
+ */
|
|
|
+ if (total >= (excess >> 2) ||
|
|
|
+ (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (!mem_cgroup_reclaimable(victim, false))
|
|
|
+ continue;
|
|
|
+ total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
|
|
|
+ zone, &nr_scanned);
|
|
|
+ *total_scanned += nr_scanned;
|
|
|
+ if (!res_counter_soft_limit_excess(&root_memcg->res))
|
|
|
+ break;
|
|
|
}
|
|
|
-
|
|
|
- return false;
|
|
|
+ mem_cgroup_iter_break(root_memcg, victim);
|
|
|
+ return total;
|
|
|
}
|
|
|
|
|
|
static DEFINE_SPINLOCK(memcg_oom_lock);
|
|
@@ -4761,6 +4790,98 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
|
|
+ gfp_t gfp_mask,
|
|
|
+ unsigned long *total_scanned)
|
|
|
+{
|
|
|
+ unsigned long nr_reclaimed = 0;
|
|
|
+ struct mem_cgroup_per_zone *mz, *next_mz = NULL;
|
|
|
+ unsigned long reclaimed;
|
|
|
+ int loop = 0;
|
|
|
+ struct mem_cgroup_tree_per_zone *mctz;
|
|
|
+ unsigned long long excess;
|
|
|
+ unsigned long nr_scanned;
|
|
|
+
|
|
|
+ if (order > 0)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
|
|
|
+ /*
|
|
|
+ * This loop can run a while, specially if mem_cgroup's continuously
|
|
|
+ * keep exceeding their soft limit and putting the system under
|
|
|
+ * pressure
|
|
|
+ */
|
|
|
+ do {
|
|
|
+ if (next_mz)
|
|
|
+ mz = next_mz;
|
|
|
+ else
|
|
|
+ mz = mem_cgroup_largest_soft_limit_node(mctz);
|
|
|
+ if (!mz)
|
|
|
+ break;
|
|
|
+
|
|
|
+ nr_scanned = 0;
|
|
|
+ reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
|
|
|
+ gfp_mask, &nr_scanned);
|
|
|
+ nr_reclaimed += reclaimed;
|
|
|
+ *total_scanned += nr_scanned;
|
|
|
+ spin_lock(&mctz->lock);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If we failed to reclaim anything from this memory cgroup
|
|
|
+ * it is time to move on to the next cgroup
|
|
|
+ */
|
|
|
+ next_mz = NULL;
|
|
|
+ if (!reclaimed) {
|
|
|
+ do {
|
|
|
+ /*
|
|
|
+ * Loop until we find yet another one.
|
|
|
+ *
|
|
|
+ * By the time we get the soft_limit lock
|
|
|
+ * again, someone might have aded the
|
|
|
+ * group back on the RB tree. Iterate to
|
|
|
+ * make sure we get a different mem.
|
|
|
+ * mem_cgroup_largest_soft_limit_node returns
|
|
|
+ * NULL if no other cgroup is present on
|
|
|
+ * the tree
|
|
|
+ */
|
|
|
+ next_mz =
|
|
|
+ __mem_cgroup_largest_soft_limit_node(mctz);
|
|
|
+ if (next_mz == mz)
|
|
|
+ css_put(&next_mz->memcg->css);
|
|
|
+ else /* next_mz == NULL or other memcg */
|
|
|
+ break;
|
|
|
+ } while (1);
|
|
|
+ }
|
|
|
+ __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
|
|
|
+ excess = res_counter_soft_limit_excess(&mz->memcg->res);
|
|
|
+ /*
|
|
|
+ * One school of thought says that we should not add
|
|
|
+ * back the node to the tree if reclaim returns 0.
|
|
|
+ * But our reclaim could return 0, simply because due
|
|
|
+ * to priority we are exposing a smaller subset of
|
|
|
+ * memory to reclaim from. Consider this as a longer
|
|
|
+ * term TODO.
|
|
|
+ */
|
|
|
+ /* If excess == 0, no tree ops */
|
|
|
+ __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
|
|
|
+ spin_unlock(&mctz->lock);
|
|
|
+ css_put(&mz->memcg->css);
|
|
|
+ loop++;
|
|
|
+ /*
|
|
|
+ * Could not reclaim anything and there are no more
|
|
|
+ * mem cgroups to try or we seem to be looping without
|
|
|
+ * reclaiming anything.
|
|
|
+ */
|
|
|
+ if (!nr_reclaimed &&
|
|
|
+ (next_mz == NULL ||
|
|
|
+ loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
|
|
|
+ break;
|
|
|
+ } while (!nr_reclaimed);
|
|
|
+ if (next_mz)
|
|
|
+ css_put(&next_mz->memcg->css);
|
|
|
+ return nr_reclaimed;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* mem_cgroup_force_empty_list - clears LRU of a group
|
|
|
* @memcg: group to clear
|