|
@@ -61,12 +61,12 @@ struct cgroup_subsys mem_cgroup_subsys __read_mostly;
|
|
|
#define MEM_CGROUP_RECLAIM_RETRIES 5
|
|
|
static struct mem_cgroup *root_mem_cgroup __read_mostly;
|
|
|
|
|
|
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
|
|
|
+#ifdef CONFIG_MEMCG_SWAP
|
|
|
/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
|
|
|
int do_swap_account __read_mostly;
|
|
|
|
|
|
/* for remember boot option*/
|
|
|
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED
|
|
|
+#ifdef CONFIG_MEMCG_SWAP_ENABLED
|
|
|
static int really_do_swap_account __initdata = 1;
|
|
|
#else
|
|
|
static int really_do_swap_account __initdata = 0;
|
|
@@ -87,7 +87,7 @@ enum mem_cgroup_stat_index {
|
|
|
MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
|
|
|
MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */
|
|
|
MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */
|
|
|
- MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
|
|
|
+ MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */
|
|
|
MEM_CGROUP_STAT_NSTATS,
|
|
|
};
|
|
|
|
|
@@ -378,9 +378,7 @@ static bool move_file(void)
|
|
|
|
|
|
enum charge_type {
|
|
|
MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
|
|
|
- MEM_CGROUP_CHARGE_TYPE_MAPPED,
|
|
|
- MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */
|
|
|
- MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
|
|
|
+ MEM_CGROUP_CHARGE_TYPE_ANON,
|
|
|
MEM_CGROUP_CHARGE_TYPE_SWAPOUT, /* for accounting swapcache */
|
|
|
MEM_CGROUP_CHARGE_TYPE_DROP, /* a page was unused swap cache */
|
|
|
NR_CHARGE_TYPE,
|
|
@@ -407,8 +405,14 @@ enum charge_type {
|
|
|
static void mem_cgroup_get(struct mem_cgroup *memcg);
|
|
|
static void mem_cgroup_put(struct mem_cgroup *memcg);
|
|
|
|
|
|
+static inline
|
|
|
+struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
|
|
|
+{
|
|
|
+ return container_of(s, struct mem_cgroup, css);
|
|
|
+}
|
|
|
+
|
|
|
/* Writing them here to avoid exposing memcg's inner layout */
|
|
|
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
|
|
|
+#ifdef CONFIG_MEMCG_KMEM
|
|
|
#include <net/sock.h>
|
|
|
#include <net/ip.h>
|
|
|
|
|
@@ -467,9 +471,9 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
|
|
|
}
|
|
|
EXPORT_SYMBOL(tcp_proto_cgroup);
|
|
|
#endif /* CONFIG_INET */
|
|
|
-#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
|
|
|
+#endif /* CONFIG_MEMCG_KMEM */
|
|
|
|
|
|
-#if defined(CONFIG_INET) && defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM)
|
|
|
+#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
|
|
|
static void disarm_sock_keys(struct mem_cgroup *memcg)
|
|
|
{
|
|
|
if (!memcg_proto_activated(&memcg->tcp_mem.cg_proto))
|
|
@@ -703,7 +707,7 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
|
|
|
bool charge)
|
|
|
{
|
|
|
int val = (charge) ? 1 : -1;
|
|
|
- this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
|
|
|
+ this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
|
|
|
}
|
|
|
|
|
|
static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
|
|
@@ -864,9 +868,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
|
|
|
|
|
|
struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
|
|
|
{
|
|
|
- return container_of(cgroup_subsys_state(cont,
|
|
|
- mem_cgroup_subsys_id), struct mem_cgroup,
|
|
|
- css);
|
|
|
+ return mem_cgroup_from_css(
|
|
|
+ cgroup_subsys_state(cont, mem_cgroup_subsys_id));
|
|
|
}
|
|
|
|
|
|
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
|
|
@@ -879,8 +882,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
|
|
|
if (unlikely(!p))
|
|
|
return NULL;
|
|
|
|
|
|
- return container_of(task_subsys_state(p, mem_cgroup_subsys_id),
|
|
|
- struct mem_cgroup, css);
|
|
|
+ return mem_cgroup_from_css(task_subsys_state(p, mem_cgroup_subsys_id));
|
|
|
}
|
|
|
|
|
|
struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
|
|
@@ -966,8 +968,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
|
|
css = css_get_next(&mem_cgroup_subsys, id + 1, &root->css, &id);
|
|
|
if (css) {
|
|
|
if (css == &root->css || css_tryget(css))
|
|
|
- memcg = container_of(css,
|
|
|
- struct mem_cgroup, css);
|
|
|
+ memcg = mem_cgroup_from_css(css);
|
|
|
} else
|
|
|
id = 0;
|
|
|
rcu_read_unlock();
|
|
@@ -1454,7 +1455,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
|
|
|
/*
|
|
|
* Return the memory (and swap, if configured) limit for a memcg.
|
|
|
*/
|
|
|
-u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
|
|
|
+static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
|
|
|
{
|
|
|
u64 limit;
|
|
|
u64 memsw;
|
|
@@ -1470,6 +1471,73 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
|
|
|
return min(limit, memsw);
|
|
|
}
|
|
|
|
|
|
+void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
|
|
+ int order)
|
|
|
+{
|
|
|
+ struct mem_cgroup *iter;
|
|
|
+ unsigned long chosen_points = 0;
|
|
|
+ unsigned long totalpages;
|
|
|
+ unsigned int points = 0;
|
|
|
+ struct task_struct *chosen = NULL;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If current has a pending SIGKILL, then automatically select it. The
|
|
|
+ * goal is to allow it to allocate so that it may quickly exit and free
|
|
|
+ * its memory.
|
|
|
+ */
|
|
|
+ if (fatal_signal_pending(current)) {
|
|
|
+ set_thread_flag(TIF_MEMDIE);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
|
|
|
+ totalpages = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1;
|
|
|
+ for_each_mem_cgroup_tree(iter, memcg) {
|
|
|
+ struct cgroup *cgroup = iter->css.cgroup;
|
|
|
+ struct cgroup_iter it;
|
|
|
+ struct task_struct *task;
|
|
|
+
|
|
|
+ cgroup_iter_start(cgroup, &it);
|
|
|
+ while ((task = cgroup_iter_next(cgroup, &it))) {
|
|
|
+ switch (oom_scan_process_thread(task, totalpages, NULL,
|
|
|
+ false)) {
|
|
|
+ case OOM_SCAN_SELECT:
|
|
|
+ if (chosen)
|
|
|
+ put_task_struct(chosen);
|
|
|
+ chosen = task;
|
|
|
+ chosen_points = ULONG_MAX;
|
|
|
+ get_task_struct(chosen);
|
|
|
+ /* fall through */
|
|
|
+ case OOM_SCAN_CONTINUE:
|
|
|
+ continue;
|
|
|
+ case OOM_SCAN_ABORT:
|
|
|
+ cgroup_iter_end(cgroup, &it);
|
|
|
+ mem_cgroup_iter_break(memcg, iter);
|
|
|
+ if (chosen)
|
|
|
+ put_task_struct(chosen);
|
|
|
+ return;
|
|
|
+ case OOM_SCAN_OK:
|
|
|
+ break;
|
|
|
+ };
|
|
|
+ points = oom_badness(task, memcg, NULL, totalpages);
|
|
|
+ if (points > chosen_points) {
|
|
|
+ if (chosen)
|
|
|
+ put_task_struct(chosen);
|
|
|
+ chosen = task;
|
|
|
+ chosen_points = points;
|
|
|
+ get_task_struct(chosen);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ cgroup_iter_end(cgroup, &it);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!chosen)
|
|
|
+ return;
|
|
|
+ points = chosen_points * 1000 / totalpages;
|
|
|
+ oom_kill_process(chosen, gfp_mask, order, points, totalpages, memcg,
|
|
|
+ NULL, "Memory cgroup out of memory");
|
|
|
+}
|
|
|
+
|
|
|
static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
|
|
|
gfp_t gfp_mask,
|
|
|
unsigned long flags)
|
|
@@ -1899,7 +1967,7 @@ again:
|
|
|
return;
|
|
|
/*
|
|
|
* If this memory cgroup is not under account moving, we don't
|
|
|
- * need to take move_lock_page_cgroup(). Because we already hold
|
|
|
+ * need to take move_lock_mem_cgroup(). Because we already hold
|
|
|
* rcu_read_lock(), any calls to move_account will be delayed until
|
|
|
* rcu_read_unlock() if mem_cgroup_stolen() == true.
|
|
|
*/
|
|
@@ -1921,7 +1989,7 @@ void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
|
|
|
/*
|
|
|
* It's guaranteed that pc->mem_cgroup never changes while
|
|
|
* lock is held because a routine modifies pc->mem_cgroup
|
|
|
- * should take move_lock_page_cgroup().
|
|
|
+ * should take move_lock_mem_cgroup().
|
|
|
*/
|
|
|
move_unlock_mem_cgroup(pc->mem_cgroup, flags);
|
|
|
}
|
|
@@ -2268,7 +2336,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
|
|
|
* We always charge the cgroup the mm_struct belongs to.
|
|
|
* The mm_struct's mem_cgroup changes on task migration if the
|
|
|
* thread group leader migrates. It's possible that mm is not
|
|
|
- * set, if so charge the init_mm (happens for pagecache usage).
|
|
|
+ * set, if so charge the root memcg (happens for pagecache usage).
|
|
|
*/
|
|
|
if (!*ptr && !mm)
|
|
|
*ptr = root_mem_cgroup;
|
|
@@ -2429,7 +2497,7 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
|
|
|
css = css_lookup(&mem_cgroup_subsys, id);
|
|
|
if (!css)
|
|
|
return NULL;
|
|
|
- return container_of(css, struct mem_cgroup, css);
|
|
|
+ return mem_cgroup_from_css(css);
|
|
|
}
|
|
|
|
|
|
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
|
|
@@ -2473,11 +2541,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
|
|
|
bool anon;
|
|
|
|
|
|
lock_page_cgroup(pc);
|
|
|
- if (unlikely(PageCgroupUsed(pc))) {
|
|
|
- unlock_page_cgroup(pc);
|
|
|
- __mem_cgroup_cancel_charge(memcg, nr_pages);
|
|
|
- return;
|
|
|
- }
|
|
|
+ VM_BUG_ON(PageCgroupUsed(pc));
|
|
|
/*
|
|
|
* we don't need page_cgroup_lock about tail pages, becase they are not
|
|
|
* accessed by any other context at this point.
|
|
@@ -2519,7 +2583,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
|
|
|
spin_unlock_irq(&zone->lru_lock);
|
|
|
}
|
|
|
|
|
|
- if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
|
|
|
+ if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)
|
|
|
anon = true;
|
|
|
else
|
|
|
anon = false;
|
|
@@ -2644,8 +2708,7 @@ out:
|
|
|
|
|
|
static int mem_cgroup_move_parent(struct page *page,
|
|
|
struct page_cgroup *pc,
|
|
|
- struct mem_cgroup *child,
|
|
|
- gfp_t gfp_mask)
|
|
|
+ struct mem_cgroup *child)
|
|
|
{
|
|
|
struct mem_cgroup *parent;
|
|
|
unsigned int nr_pages;
|
|
@@ -2728,38 +2791,7 @@ int mem_cgroup_newpage_charge(struct page *page,
|
|
|
VM_BUG_ON(page->mapping && !PageAnon(page));
|
|
|
VM_BUG_ON(!mm);
|
|
|
return mem_cgroup_charge_common(page, mm, gfp_mask,
|
|
|
- MEM_CGROUP_CHARGE_TYPE_MAPPED);
|
|
|
-}
|
|
|
-
|
|
|
-static void
|
|
|
-__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
|
|
|
- enum charge_type ctype);
|
|
|
-
|
|
|
-int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
|
|
|
- gfp_t gfp_mask)
|
|
|
-{
|
|
|
- struct mem_cgroup *memcg = NULL;
|
|
|
- enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
|
|
|
- int ret;
|
|
|
-
|
|
|
- if (mem_cgroup_disabled())
|
|
|
- return 0;
|
|
|
- if (PageCompound(page))
|
|
|
- return 0;
|
|
|
-
|
|
|
- if (unlikely(!mm))
|
|
|
- mm = &init_mm;
|
|
|
- if (!page_is_file_cache(page))
|
|
|
- type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
|
|
|
-
|
|
|
- if (!PageSwapCache(page))
|
|
|
- ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
|
|
|
- else { /* page is swapcache/shmem */
|
|
|
- ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
|
|
|
- if (!ret)
|
|
|
- __mem_cgroup_commit_charge_swapin(page, memcg, type);
|
|
|
- }
|
|
|
- return ret;
|
|
|
+ MEM_CGROUP_CHARGE_TYPE_ANON);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2768,27 +2800,26 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
|
|
|
* struct page_cgroup is acquired. This refcnt will be consumed by
|
|
|
* "commit()" or removed by "cancel()"
|
|
|
*/
|
|
|
-int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
|
|
|
- struct page *page,
|
|
|
- gfp_t mask, struct mem_cgroup **memcgp)
|
|
|
+static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
|
|
|
+ struct page *page,
|
|
|
+ gfp_t mask,
|
|
|
+ struct mem_cgroup **memcgp)
|
|
|
{
|
|
|
struct mem_cgroup *memcg;
|
|
|
+ struct page_cgroup *pc;
|
|
|
int ret;
|
|
|
|
|
|
- *memcgp = NULL;
|
|
|
-
|
|
|
- if (mem_cgroup_disabled())
|
|
|
- return 0;
|
|
|
-
|
|
|
- if (!do_swap_account)
|
|
|
- goto charge_cur_mm;
|
|
|
+ pc = lookup_page_cgroup(page);
|
|
|
/*
|
|
|
- * A racing thread's fault, or swapoff, may have already updated
|
|
|
- * the pte, and even removed page from swap cache: in those cases
|
|
|
- * do_swap_page()'s pte_same() test will fail; but there's also a
|
|
|
- * KSM case which does need to charge the page.
|
|
|
+ * Every swap fault against a single page tries to charge the
|
|
|
+ * page, bail as early as possible. shmem_unuse() encounters
|
|
|
+ * already charged pages, too. The USED bit is protected by
|
|
|
+ * the page lock, which serializes swap cache removal, which
|
|
|
+ * in turn serializes uncharging.
|
|
|
*/
|
|
|
- if (!PageSwapCache(page))
|
|
|
+ if (PageCgroupUsed(pc))
|
|
|
+ return 0;
|
|
|
+ if (!do_swap_account)
|
|
|
goto charge_cur_mm;
|
|
|
memcg = try_get_mem_cgroup_from_page(page);
|
|
|
if (!memcg)
|
|
@@ -2800,14 +2831,44 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
|
|
|
ret = 0;
|
|
|
return ret;
|
|
|
charge_cur_mm:
|
|
|
- if (unlikely(!mm))
|
|
|
- mm = &init_mm;
|
|
|
ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
|
|
|
if (ret == -EINTR)
|
|
|
ret = 0;
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
|
|
|
+ gfp_t gfp_mask, struct mem_cgroup **memcgp)
|
|
|
+{
|
|
|
+ *memcgp = NULL;
|
|
|
+ if (mem_cgroup_disabled())
|
|
|
+ return 0;
|
|
|
+ /*
|
|
|
+ * A racing thread's fault, or swapoff, may have already
|
|
|
+ * updated the pte, and even removed page from swap cache: in
|
|
|
+ * those cases unuse_pte()'s pte_same() test will fail; but
|
|
|
+ * there's also a KSM case which does need to charge the page.
|
|
|
+ */
|
|
|
+ if (!PageSwapCache(page)) {
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, memcgp, true);
|
|
|
+ if (ret == -EINTR)
|
|
|
+ ret = 0;
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+ return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
|
|
|
+}
|
|
|
+
|
|
|
+void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
|
|
|
+{
|
|
|
+ if (mem_cgroup_disabled())
|
|
|
+ return;
|
|
|
+ if (!memcg)
|
|
|
+ return;
|
|
|
+ __mem_cgroup_cancel_charge(memcg, 1);
|
|
|
+}
|
|
|
+
|
|
|
static void
|
|
|
__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
|
|
|
enum charge_type ctype)
|
|
@@ -2842,16 +2903,30 @@ void mem_cgroup_commit_charge_swapin(struct page *page,
|
|
|
struct mem_cgroup *memcg)
|
|
|
{
|
|
|
__mem_cgroup_commit_charge_swapin(page, memcg,
|
|
|
- MEM_CGROUP_CHARGE_TYPE_MAPPED);
|
|
|
+ MEM_CGROUP_CHARGE_TYPE_ANON);
|
|
|
}
|
|
|
|
|
|
-void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
|
|
|
+int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
|
|
|
+ gfp_t gfp_mask)
|
|
|
{
|
|
|
+ struct mem_cgroup *memcg = NULL;
|
|
|
+ enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
|
|
|
+ int ret;
|
|
|
+
|
|
|
if (mem_cgroup_disabled())
|
|
|
- return;
|
|
|
- if (!memcg)
|
|
|
- return;
|
|
|
- __mem_cgroup_cancel_charge(memcg, 1);
|
|
|
+ return 0;
|
|
|
+ if (PageCompound(page))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ if (!PageSwapCache(page))
|
|
|
+ ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
|
|
|
+ else { /* page is swapcache/shmem */
|
|
|
+ ret = __mem_cgroup_try_charge_swapin(mm, page,
|
|
|
+ gfp_mask, &memcg);
|
|
|
+ if (!ret)
|
|
|
+ __mem_cgroup_commit_charge_swapin(page, memcg, type);
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
|
|
@@ -2911,7 +2986,8 @@ direct_uncharge:
|
|
|
* uncharge if !page_mapped(page)
|
|
|
*/
|
|
|
static struct mem_cgroup *
|
|
|
-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
|
|
|
+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
|
|
|
+ bool end_migration)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = NULL;
|
|
|
unsigned int nr_pages = 1;
|
|
@@ -2921,8 +2997,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
|
|
|
if (mem_cgroup_disabled())
|
|
|
return NULL;
|
|
|
|
|
|
- if (PageSwapCache(page))
|
|
|
- return NULL;
|
|
|
+ VM_BUG_ON(PageSwapCache(page));
|
|
|
|
|
|
if (PageTransHuge(page)) {
|
|
|
nr_pages <<= compound_order(page);
|
|
@@ -2945,7 +3020,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
|
|
|
anon = PageAnon(page);
|
|
|
|
|
|
switch (ctype) {
|
|
|
- case MEM_CGROUP_CHARGE_TYPE_MAPPED:
|
|
|
+ case MEM_CGROUP_CHARGE_TYPE_ANON:
|
|
|
/*
|
|
|
* Generally PageAnon tells if it's the anon statistics to be
|
|
|
* updated; but sometimes e.g. mem_cgroup_uncharge_page() is
|
|
@@ -2955,7 +3030,16 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
|
|
|
/* fallthrough */
|
|
|
case MEM_CGROUP_CHARGE_TYPE_DROP:
|
|
|
/* See mem_cgroup_prepare_migration() */
|
|
|
- if (page_mapped(page) || PageCgroupMigration(pc))
|
|
|
+ if (page_mapped(page))
|
|
|
+ goto unlock_out;
|
|
|
+ /*
|
|
|
+ * Pages under migration may not be uncharged. But
|
|
|
+ * end_migration() /must/ be the one uncharging the
|
|
|
+ * unused post-migration page and so it has to call
|
|
|
+ * here with the migration bit still set. See the
|
|
|
+ * res_counter handling below.
|
|
|
+ */
|
|
|
+ if (!end_migration && PageCgroupMigration(pc))
|
|
|
goto unlock_out;
|
|
|
break;
|
|
|
case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
|
|
@@ -2989,7 +3073,12 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
|
|
|
mem_cgroup_swap_statistics(memcg, true);
|
|
|
mem_cgroup_get(memcg);
|
|
|
}
|
|
|
- if (!mem_cgroup_is_root(memcg))
|
|
|
+ /*
|
|
|
+ * Migration does not charge the res_counter for the
|
|
|
+ * replacement page, so leave it alone when phasing out the
|
|
|
+ * page that is unused after the migration.
|
|
|
+ */
|
|
|
+ if (!end_migration && !mem_cgroup_is_root(memcg))
|
|
|
mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
|
|
|
|
|
|
return memcg;
|
|
@@ -3005,14 +3094,16 @@ void mem_cgroup_uncharge_page(struct page *page)
|
|
|
if (page_mapped(page))
|
|
|
return;
|
|
|
VM_BUG_ON(page->mapping && !PageAnon(page));
|
|
|
- __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
|
|
|
+ if (PageSwapCache(page))
|
|
|
+ return;
|
|
|
+ __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
|
|
|
}
|
|
|
|
|
|
void mem_cgroup_uncharge_cache_page(struct page *page)
|
|
|
{
|
|
|
VM_BUG_ON(page_mapped(page));
|
|
|
VM_BUG_ON(page->mapping);
|
|
|
- __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
|
|
|
+ __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -3076,7 +3167,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
|
|
|
if (!swapout) /* this was a swap cache but the swap is unused ! */
|
|
|
ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
|
|
|
|
|
|
- memcg = __mem_cgroup_uncharge_common(page, ctype);
|
|
|
+ memcg = __mem_cgroup_uncharge_common(page, ctype, false);
|
|
|
|
|
|
/*
|
|
|
* record memcg information, if swapout && memcg != NULL,
|
|
@@ -3087,7 +3178,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
|
|
|
+#ifdef CONFIG_MEMCG_SWAP
|
|
|
/*
|
|
|
* called from swap_entry_free(). remove record in swap_cgroup and
|
|
|
* uncharge "memsw" account.
|
|
@@ -3166,19 +3257,18 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
|
|
|
* Before starting migration, account PAGE_SIZE to mem_cgroup that the old
|
|
|
* page belongs to.
|
|
|
*/
|
|
|
-int mem_cgroup_prepare_migration(struct page *page,
|
|
|
- struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask)
|
|
|
+void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
|
|
|
+ struct mem_cgroup **memcgp)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = NULL;
|
|
|
struct page_cgroup *pc;
|
|
|
enum charge_type ctype;
|
|
|
- int ret = 0;
|
|
|
|
|
|
*memcgp = NULL;
|
|
|
|
|
|
VM_BUG_ON(PageTransHuge(page));
|
|
|
if (mem_cgroup_disabled())
|
|
|
- return 0;
|
|
|
+ return;
|
|
|
|
|
|
pc = lookup_page_cgroup(page);
|
|
|
lock_page_cgroup(pc);
|
|
@@ -3223,24 +3313,9 @@ int mem_cgroup_prepare_migration(struct page *page,
|
|
|
* we return here.
|
|
|
*/
|
|
|
if (!memcg)
|
|
|
- return 0;
|
|
|
+ return;
|
|
|
|
|
|
*memcgp = memcg;
|
|
|
- ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
|
|
|
- css_put(&memcg->css);/* drop extra refcnt */
|
|
|
- if (ret) {
|
|
|
- if (PageAnon(page)) {
|
|
|
- lock_page_cgroup(pc);
|
|
|
- ClearPageCgroupMigration(pc);
|
|
|
- unlock_page_cgroup(pc);
|
|
|
- /*
|
|
|
- * The old page may be fully unmapped while we kept it.
|
|
|
- */
|
|
|
- mem_cgroup_uncharge_page(page);
|
|
|
- }
|
|
|
- /* we'll need to revisit this error code (we have -EINTR) */
|
|
|
- return -ENOMEM;
|
|
|
- }
|
|
|
/*
|
|
|
* We charge new page before it's used/mapped. So, even if unlock_page()
|
|
|
* is called before end_migration, we can catch all events on this new
|
|
@@ -3248,13 +3323,15 @@ int mem_cgroup_prepare_migration(struct page *page,
|
|
|
* mapcount will be finally 0 and we call uncharge in end_migration().
|
|
|
*/
|
|
|
if (PageAnon(page))
|
|
|
- ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
|
|
|
- else if (page_is_file_cache(page))
|
|
|
- ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
|
|
|
+ ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
|
|
|
else
|
|
|
- ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
|
|
|
+ ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
|
|
|
+ /*
|
|
|
+ * The page is committed to the memcg, but it's not actually
|
|
|
+ * charged to the res_counter since we plan on replacing the
|
|
|
+ * old one and only one page is going to be left afterwards.
|
|
|
+ */
|
|
|
__mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);
|
|
|
- return ret;
|
|
|
}
|
|
|
|
|
|
/* remove redundant charge if migration failed*/
|
|
@@ -3276,6 +3353,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
|
|
|
used = newpage;
|
|
|
unused = oldpage;
|
|
|
}
|
|
|
+ anon = PageAnon(used);
|
|
|
+ __mem_cgroup_uncharge_common(unused,
|
|
|
+ anon ? MEM_CGROUP_CHARGE_TYPE_ANON
|
|
|
+ : MEM_CGROUP_CHARGE_TYPE_CACHE,
|
|
|
+ true);
|
|
|
+ css_put(&memcg->css);
|
|
|
/*
|
|
|
* We disallowed uncharge of pages under migration because mapcount
|
|
|
* of the page goes down to zero, temporarly.
|
|
@@ -3285,10 +3368,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
|
|
|
lock_page_cgroup(pc);
|
|
|
ClearPageCgroupMigration(pc);
|
|
|
unlock_page_cgroup(pc);
|
|
|
- anon = PageAnon(used);
|
|
|
- __mem_cgroup_uncharge_common(unused,
|
|
|
- anon ? MEM_CGROUP_CHARGE_TYPE_MAPPED
|
|
|
- : MEM_CGROUP_CHARGE_TYPE_CACHE);
|
|
|
|
|
|
/*
|
|
|
* If a page is a file cache, radix-tree replacement is very atomic
|
|
@@ -3340,10 +3419,6 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
|
|
|
*/
|
|
|
if (!memcg)
|
|
|
return;
|
|
|
-
|
|
|
- if (PageSwapBacked(oldpage))
|
|
|
- type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
|
|
|
-
|
|
|
/*
|
|
|
* Even if newpage->mapping was NULL before starting replacement,
|
|
|
* the newpage may be on LRU(or pagevec for LRU) already. We lock
|
|
@@ -3418,7 +3493,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
|
|
|
/*
|
|
|
* Rather than hide all in some function, I do this in
|
|
|
* open coded manner. You see what this really does.
|
|
|
- * We have to guarantee memcg->res.limit < memcg->memsw.limit.
|
|
|
+ * We have to guarantee memcg->res.limit <= memcg->memsw.limit.
|
|
|
*/
|
|
|
mutex_lock(&set_limit_mutex);
|
|
|
memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
|
|
@@ -3479,7 +3554,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
|
|
|
/*
|
|
|
* Rather than hide all in some function, I do this in
|
|
|
* open coded manner. You see what this really does.
|
|
|
- * We have to guarantee memcg->res.limit < memcg->memsw.limit.
|
|
|
+ * We have to guarantee memcg->res.limit <= memcg->memsw.limit.
|
|
|
*/
|
|
|
mutex_lock(&set_limit_mutex);
|
|
|
memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
|
|
@@ -3611,10 +3686,12 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * This routine traverse page_cgroup in given list and drop them all.
|
|
|
- * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
|
|
|
+ * Traverse a specified page_cgroup list and try to drop them all. This doesn't
|
|
|
+ * reclaim the pages page themselves - it just removes the page_cgroups.
|
|
|
+ * Returns true if some page_cgroups were not freed, indicating that the caller
|
|
|
+ * must retry this operation.
|
|
|
*/
|
|
|
-static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
|
|
|
+static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
|
|
|
int node, int zid, enum lru_list lru)
|
|
|
{
|
|
|
struct mem_cgroup_per_zone *mz;
|
|
@@ -3622,7 +3699,6 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
|
|
|
struct list_head *list;
|
|
|
struct page *busy;
|
|
|
struct zone *zone;
|
|
|
- int ret = 0;
|
|
|
|
|
|
zone = &NODE_DATA(node)->node_zones[zid];
|
|
|
mz = mem_cgroup_zoneinfo(memcg, node, zid);
|
|
@@ -3636,7 +3712,6 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
|
|
|
struct page_cgroup *pc;
|
|
|
struct page *page;
|
|
|
|
|
|
- ret = 0;
|
|
|
spin_lock_irqsave(&zone->lru_lock, flags);
|
|
|
if (list_empty(list)) {
|
|
|
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
|
@@ -3653,21 +3728,14 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
|
|
|
|
|
|
pc = lookup_page_cgroup(page);
|
|
|
|
|
|
- ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
|
|
|
- if (ret == -ENOMEM || ret == -EINTR)
|
|
|
- break;
|
|
|
-
|
|
|
- if (ret == -EBUSY || ret == -EINVAL) {
|
|
|
+ if (mem_cgroup_move_parent(page, pc, memcg)) {
|
|
|
/* found lock contention or "pc" is obsolete. */
|
|
|
busy = page;
|
|
|
cond_resched();
|
|
|
} else
|
|
|
busy = NULL;
|
|
|
}
|
|
|
-
|
|
|
- if (!ret && !list_empty(list))
|
|
|
- return -EBUSY;
|
|
|
- return ret;
|
|
|
+ return !list_empty(list);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -3692,9 +3760,6 @@ move_account:
|
|
|
ret = -EBUSY;
|
|
|
if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
|
|
|
goto out;
|
|
|
- ret = -EINTR;
|
|
|
- if (signal_pending(current))
|
|
|
- goto out;
|
|
|
/* This is for making all *used* pages to be on LRU. */
|
|
|
lru_add_drain_all();
|
|
|
drain_all_stock_sync(memcg);
|
|
@@ -3715,9 +3780,6 @@ move_account:
|
|
|
}
|
|
|
mem_cgroup_end_move(memcg);
|
|
|
memcg_oom_recover(memcg);
|
|
|
- /* it seems parent cgroup doesn't have enough mem */
|
|
|
- if (ret == -ENOMEM)
|
|
|
- goto try_to_free;
|
|
|
cond_resched();
|
|
|
/* "ret" should also be checked to ensure all lists are empty. */
|
|
|
} while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0 || ret);
|
|
@@ -3779,6 +3841,10 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
|
|
|
parent_memcg = mem_cgroup_from_cont(parent);
|
|
|
|
|
|
cgroup_lock();
|
|
|
+
|
|
|
+ if (memcg->use_hierarchy == val)
|
|
|
+ goto out;
|
|
|
+
|
|
|
/*
|
|
|
* If parent's use_hierarchy is set, we can't make any modifications
|
|
|
* in the child subtrees. If it is unset, then the change can
|
|
@@ -3795,6 +3861,8 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
|
|
|
retval = -EBUSY;
|
|
|
} else
|
|
|
retval = -EINVAL;
|
|
|
+
|
|
|
+out:
|
|
|
cgroup_unlock();
|
|
|
|
|
|
return retval;
|
|
@@ -3831,7 +3899,7 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
|
|
|
val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
|
|
|
|
|
|
if (swap)
|
|
|
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAPOUT);
|
|
|
+ val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
|
|
|
|
|
|
return val << PAGE_SHIFT;
|
|
|
}
|
|
@@ -4015,7 +4083,7 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
|
|
|
#endif
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
-static int mem_control_numa_stat_show(struct cgroup *cont, struct cftype *cft,
|
|
|
+static int memcg_numa_stat_show(struct cgroup *cont, struct cftype *cft,
|
|
|
struct seq_file *m)
|
|
|
{
|
|
|
int nid;
|
|
@@ -4074,7 +4142,7 @@ static inline void mem_cgroup_lru_names_not_uptodate(void)
|
|
|
BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
|
|
|
}
|
|
|
|
|
|
-static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
|
|
|
+static int memcg_stat_show(struct cgroup *cont, struct cftype *cft,
|
|
|
struct seq_file *m)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
|
|
@@ -4082,7 +4150,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
|
|
|
unsigned int i;
|
|
|
|
|
|
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
|
|
|
- if (i == MEM_CGROUP_STAT_SWAPOUT && !do_swap_account)
|
|
|
+ if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
|
|
|
continue;
|
|
|
seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i],
|
|
|
mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
|
|
@@ -4109,7 +4177,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
|
|
|
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
|
|
|
long long val = 0;
|
|
|
|
|
|
- if (i == MEM_CGROUP_STAT_SWAPOUT && !do_swap_account)
|
|
|
+ if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
|
|
|
continue;
|
|
|
for_each_mem_cgroup_tree(mi, memcg)
|
|
|
val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
|
|
@@ -4533,7 +4601,7 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
|
|
|
+#ifdef CONFIG_MEMCG_KMEM
|
|
|
static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
|
|
|
{
|
|
|
return mem_cgroup_sockets_init(memcg, ss);
|
|
@@ -4588,7 +4656,7 @@ static struct cftype mem_cgroup_files[] = {
|
|
|
},
|
|
|
{
|
|
|
.name = "stat",
|
|
|
- .read_seq_string = mem_control_stat_show,
|
|
|
+ .read_seq_string = memcg_stat_show,
|
|
|
},
|
|
|
{
|
|
|
.name = "force_empty",
|
|
@@ -4620,10 +4688,10 @@ static struct cftype mem_cgroup_files[] = {
|
|
|
#ifdef CONFIG_NUMA
|
|
|
{
|
|
|
.name = "numa_stat",
|
|
|
- .read_seq_string = mem_control_numa_stat_show,
|
|
|
+ .read_seq_string = memcg_numa_stat_show,
|
|
|
},
|
|
|
#endif
|
|
|
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
|
|
|
+#ifdef CONFIG_MEMCG_SWAP
|
|
|
{
|
|
|
.name = "memsw.usage_in_bytes",
|
|
|
.private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
|
|
@@ -4810,7 +4878,7 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
|
|
|
}
|
|
|
EXPORT_SYMBOL(parent_mem_cgroup);
|
|
|
|
|
|
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
|
|
|
+#ifdef CONFIG_MEMCG_SWAP
|
|
|
static void __init enable_swap_cgroup(void)
|
|
|
{
|
|
|
if (!mem_cgroup_disabled() && really_do_swap_account)
|
|
@@ -5541,7 +5609,7 @@ struct cgroup_subsys mem_cgroup_subsys = {
|
|
|
.__DEPRECATED_clear_css_refs = true,
|
|
|
};
|
|
|
|
|
|
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
|
|
|
+#ifdef CONFIG_MEMCG_SWAP
|
|
|
static int __init enable_swap_account(char *s)
|
|
|
{
|
|
|
/* consider enabled if no parameter or 1 is given */
|