|
@@ -370,8 +370,6 @@ enum charge_type {
|
|
|
#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
|
|
|
#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1
|
|
|
#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
|
|
|
-#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2
|
|
|
-#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
|
|
|
|
|
|
static void mem_cgroup_get(struct mem_cgroup *memcg);
|
|
|
static void mem_cgroup_put(struct mem_cgroup *memcg);
|
|
@@ -857,20 +855,33 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
|
|
|
return memcg;
|
|
|
}
|
|
|
|
|
|
-struct mem_cgroup_reclaim_cookie {
|
|
|
- struct zone *zone;
|
|
|
- int priority;
|
|
|
- unsigned int generation;
|
|
|
-};
|
|
|
-
|
|
|
-static struct mem_cgroup *
|
|
|
-mem_cgroup_iter(struct mem_cgroup *root,
|
|
|
- struct mem_cgroup *prev,
|
|
|
- struct mem_cgroup_reclaim_cookie *reclaim)
|
|
|
+/**
|
|
|
+ * mem_cgroup_iter - iterate over memory cgroup hierarchy
|
|
|
+ * @root: hierarchy root
|
|
|
+ * @prev: previously returned memcg, NULL on first invocation
|
|
|
+ * @reclaim: cookie for shared reclaim walks, NULL for full walks
|
|
|
+ *
|
|
|
+ * Returns references to children of the hierarchy below @root, or
|
|
|
+ * @root itself, or %NULL after a full round-trip.
|
|
|
+ *
|
|
|
+ * Caller must pass the return value in @prev on subsequent
|
|
|
+ * invocations for reference counting, or use mem_cgroup_iter_break()
|
|
|
+ * to cancel a hierarchy walk before the round-trip is complete.
|
|
|
+ *
|
|
|
+ * Reclaimers can specify a zone and a priority level in @reclaim to
|
|
|
+ * divide up the memcgs in the hierarchy among all concurrent
|
|
|
+ * reclaimers operating on the same zone and priority.
|
|
|
+ */
|
|
|
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
|
|
+ struct mem_cgroup *prev,
|
|
|
+ struct mem_cgroup_reclaim_cookie *reclaim)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = NULL;
|
|
|
int id = 0;
|
|
|
|
|
|
+ if (mem_cgroup_disabled())
|
|
|
+ return NULL;
|
|
|
+
|
|
|
if (!root)
|
|
|
root = root_mem_cgroup;
|
|
|
|
|
@@ -926,8 +937,13 @@ mem_cgroup_iter(struct mem_cgroup *root,
|
|
|
return memcg;
|
|
|
}
|
|
|
|
|
|
-static void mem_cgroup_iter_break(struct mem_cgroup *root,
|
|
|
- struct mem_cgroup *prev)
|
|
|
+/**
|
|
|
+ * mem_cgroup_iter_break - abort a hierarchy walk prematurely
|
|
|
+ * @root: hierarchy root
|
|
|
+ * @prev: last visited hierarchy member as returned by mem_cgroup_iter()
|
|
|
+ */
|
|
|
+void mem_cgroup_iter_break(struct mem_cgroup *root,
|
|
|
+ struct mem_cgroup *prev)
|
|
|
{
|
|
|
if (!root)
|
|
|
root = root_mem_cgroup;
|
|
@@ -1555,6 +1571,42 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
|
|
|
return min(limit, memsw);
|
|
|
}
|
|
|
|
|
|
+static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
|
|
|
+ gfp_t gfp_mask,
|
|
|
+ unsigned long flags)
|
|
|
+{
|
|
|
+ unsigned long total = 0;
|
|
|
+ bool noswap = false;
|
|
|
+ int loop;
|
|
|
+
|
|
|
+ if (flags & MEM_CGROUP_RECLAIM_NOSWAP)
|
|
|
+ noswap = true;
|
|
|
+ if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum)
|
|
|
+ noswap = true;
|
|
|
+
|
|
|
+ for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) {
|
|
|
+ if (loop)
|
|
|
+ drain_all_stock_async(memcg);
|
|
|
+ total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
|
|
|
+ /*
|
|
|
+ * Allow limit shrinkers, which are triggered directly
|
|
|
+ * by userspace, to catch signals and stop reclaim
|
|
|
+ * after minimal progress, regardless of the margin.
|
|
|
+ */
|
|
|
+ if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
|
|
|
+ break;
|
|
|
+ if (mem_cgroup_margin(memcg))
|
|
|
+ break;
|
|
|
+ /*
|
|
|
+ * If nothing was reclaimed after two attempts, there
|
|
|
+ * may be no reclaimable pages in this hierarchy.
|
|
|
+ */
|
|
|
+ if (loop && !total)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ return total;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* test_mem_cgroup_node_reclaimable
|
|
|
* @mem: the target memcg
|
|
@@ -1692,30 +1744,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
-/*
|
|
|
- * Scan the hierarchy if needed to reclaim memory. We remember the last child
|
|
|
- * we reclaimed from, so that we don't end up penalizing one child extensively
|
|
|
- * based on its position in the children list.
|
|
|
- *
|
|
|
- * root_memcg is the original ancestor that we've been reclaim from.
|
|
|
- *
|
|
|
- * We give up and return to the caller when we visit root_memcg twice.
|
|
|
- * (other groups can be removed while we're walking....)
|
|
|
- *
|
|
|
- * If shrink==true, for avoiding to free too much, this returns immedieately.
|
|
|
- */
|
|
|
-static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
|
|
|
- struct zone *zone,
|
|
|
- gfp_t gfp_mask,
|
|
|
- unsigned long reclaim_options,
|
|
|
- unsigned long *total_scanned)
|
|
|
+static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
|
|
|
+ struct zone *zone,
|
|
|
+ gfp_t gfp_mask,
|
|
|
+ unsigned long *total_scanned)
|
|
|
{
|
|
|
struct mem_cgroup *victim = NULL;
|
|
|
- int ret, total = 0;
|
|
|
+ int total = 0;
|
|
|
int loop = 0;
|
|
|
- bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
|
|
|
- bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
|
|
|
- bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
|
|
|
unsigned long excess;
|
|
|
unsigned long nr_scanned;
|
|
|
struct mem_cgroup_reclaim_cookie reclaim = {
|
|
@@ -1725,29 +1761,17 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
|
|
|
|
|
|
excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
|
|
|
|
|
|
- /* If memsw_is_minimum==1, swap-out is of-no-use. */
|
|
|
- if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
|
|
|
- noswap = true;
|
|
|
-
|
|
|
while (1) {
|
|
|
victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
|
|
|
if (!victim) {
|
|
|
loop++;
|
|
|
- /*
|
|
|
- * We are not draining per cpu cached charges during
|
|
|
- * soft limit reclaim because global reclaim doesn't
|
|
|
- * care about charges. It tries to free some memory and
|
|
|
- * charges will not give any.
|
|
|
- */
|
|
|
- if (!check_soft && loop >= 1)
|
|
|
- drain_all_stock_async(root_memcg);
|
|
|
if (loop >= 2) {
|
|
|
/*
|
|
|
* If we have not been able to reclaim
|
|
|
* anything, it might because there are
|
|
|
* no reclaimable pages under this hierarchy
|
|
|
*/
|
|
|
- if (!check_soft || !total)
|
|
|
+ if (!total)
|
|
|
break;
|
|
|
/*
|
|
|
* We want to do more targeted reclaim.
|
|
@@ -1761,30 +1785,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
|
|
|
}
|
|
|
continue;
|
|
|
}
|
|
|
- if (!mem_cgroup_reclaimable(victim, noswap)) {
|
|
|
- /* this cgroup's local usage == 0 */
|
|
|
+ if (!mem_cgroup_reclaimable(victim, false))
|
|
|
continue;
|
|
|
- }
|
|
|
- /* we use swappiness of local cgroup */
|
|
|
- if (check_soft) {
|
|
|
- ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
|
|
|
- noswap, zone, &nr_scanned);
|
|
|
- *total_scanned += nr_scanned;
|
|
|
- } else
|
|
|
- ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
|
|
|
- noswap);
|
|
|
- total += ret;
|
|
|
- /*
|
|
|
- * At shrinking usage, we can't check we should stop here or
|
|
|
- * reclaim more. It's depends on callers. last_scanned_child
|
|
|
- * will work enough for keeping fairness under tree.
|
|
|
- */
|
|
|
- if (shrink)
|
|
|
- break;
|
|
|
- if (check_soft) {
|
|
|
- if (!res_counter_soft_limit_excess(&root_memcg->res))
|
|
|
- break;
|
|
|
- } else if (mem_cgroup_margin(root_memcg))
|
|
|
+ total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
|
|
|
+ zone, &nr_scanned);
|
|
|
+ *total_scanned += nr_scanned;
|
|
|
+ if (!res_counter_soft_limit_excess(&root_memcg->res))
|
|
|
break;
|
|
|
}
|
|
|
mem_cgroup_iter_break(root_memcg, victim);
|
|
@@ -2281,8 +2287,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
|
|
if (!(gfp_mask & __GFP_WAIT))
|
|
|
return CHARGE_WOULDBLOCK;
|
|
|
|
|
|
- ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
|
|
|
- gfp_mask, flags, NULL);
|
|
|
+ ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
|
|
|
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
|
|
|
return CHARGE_RETRY;
|
|
|
/*
|
|
@@ -3559,9 +3564,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
|
|
|
if (!ret)
|
|
|
break;
|
|
|
|
|
|
- mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
|
|
|
- MEM_CGROUP_RECLAIM_SHRINK,
|
|
|
- NULL);
|
|
|
+ mem_cgroup_reclaim(memcg, GFP_KERNEL,
|
|
|
+ MEM_CGROUP_RECLAIM_SHRINK);
|
|
|
curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
|
|
|
/* Usage is reduced ? */
|
|
|
if (curusage >= oldusage)
|
|
@@ -3619,10 +3623,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
|
|
|
if (!ret)
|
|
|
break;
|
|
|
|
|
|
- mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
|
|
|
- MEM_CGROUP_RECLAIM_NOSWAP |
|
|
|
- MEM_CGROUP_RECLAIM_SHRINK,
|
|
|
- NULL);
|
|
|
+ mem_cgroup_reclaim(memcg, GFP_KERNEL,
|
|
|
+ MEM_CGROUP_RECLAIM_NOSWAP |
|
|
|
+ MEM_CGROUP_RECLAIM_SHRINK);
|
|
|
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
|
|
|
/* Usage is reduced ? */
|
|
|
if (curusage >= oldusage)
|
|
@@ -3665,10 +3668,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
|
|
break;
|
|
|
|
|
|
nr_scanned = 0;
|
|
|
- reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
|
|
|
- gfp_mask,
|
|
|
- MEM_CGROUP_RECLAIM_SOFT,
|
|
|
- &nr_scanned);
|
|
|
+ reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone,
|
|
|
+ gfp_mask, &nr_scanned);
|
|
|
nr_reclaimed += reclaimed;
|
|
|
*total_scanned += nr_scanned;
|
|
|
spin_lock(&mctz->lock);
|