|
@@ -10,6 +10,10 @@
|
|
|
* Copyright (C) 2009 Nokia Corporation
|
|
|
* Author: Kirill A. Shutemov
|
|
|
*
|
|
|
+ * Kernel Memory Controller
|
|
|
+ * Copyright (C) 2012 Parallels Inc. and Google Inc.
|
|
|
+ * Authors: Glauber Costa and Suleiman Souhlal
|
|
|
+ *
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
@@ -2661,6 +2665,172 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
|
|
|
memcg_check_events(memcg, page);
|
|
|
}
|
|
|
|
|
|
+#ifdef CONFIG_MEMCG_KMEM
|
|
|
+static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
|
|
|
+{
|
|
|
+ return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
|
|
|
+ (memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK);
|
|
|
+}
|
|
|
+
|
|
|
+static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
|
|
|
+{
|
|
|
+ struct res_counter *fail_res;
|
|
|
+ struct mem_cgroup *_memcg;
|
|
|
+ int ret = 0;
|
|
|
+ bool may_oom;
|
|
|
+
|
|
|
+ ret = res_counter_charge(&memcg->kmem, size, &fail_res);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Conditions under which we can wait for the oom_killer. Those are
|
|
|
+ * the same conditions tested by the core page allocator
|
|
|
+ */
|
|
|
+ may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY);
|
|
|
+
|
|
|
+ _memcg = memcg;
|
|
|
+ ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
|
|
|
+ &_memcg, may_oom);
|
|
|
+
|
|
|
+ if (ret == -EINTR) {
|
|
|
+ /*
|
|
|
+ * __mem_cgroup_try_charge() chosed to bypass to root due to
|
|
|
+ * OOM kill or fatal signal. Since our only options are to
|
|
|
+ * either fail the allocation or charge it to this cgroup, do
|
|
|
+ * it as a temporary condition. But we can't fail. From a
|
|
|
+ * kmem/slab perspective, the cache has already been selected,
|
|
|
+ * by mem_cgroup_kmem_get_cache(), so it is too late to change
|
|
|
+ * our minds.
|
|
|
+ *
|
|
|
+ * This condition will only trigger if the task entered
|
|
|
+ * memcg_charge_kmem in a sane state, but was OOM-killed during
|
|
|
+ * __mem_cgroup_try_charge() above. Tasks that were already
|
|
|
+ * dying when the allocation triggers should have been already
|
|
|
+ * directed to the root cgroup in memcontrol.h
|
|
|
+ */
|
|
|
+ res_counter_charge_nofail(&memcg->res, size, &fail_res);
|
|
|
+ if (do_swap_account)
|
|
|
+ res_counter_charge_nofail(&memcg->memsw, size,
|
|
|
+ &fail_res);
|
|
|
+ ret = 0;
|
|
|
+ } else if (ret)
|
|
|
+ res_counter_uncharge(&memcg->kmem, size);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
|
|
|
+{
|
|
|
+ res_counter_uncharge(&memcg->kmem, size);
|
|
|
+ res_counter_uncharge(&memcg->res, size);
|
|
|
+ if (do_swap_account)
|
|
|
+ res_counter_uncharge(&memcg->memsw, size);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * We need to verify if the allocation against current->mm->owner's memcg is
|
|
|
+ * possible for the given order. But the page is not allocated yet, so we'll
|
|
|
+ * need a further commit step to do the final arrangements.
|
|
|
+ *
|
|
|
+ * It is possible for the task to switch cgroups in this mean time, so at
|
|
|
+ * commit time, we can't rely on task conversion any longer. We'll then use
|
|
|
+ * the handle argument to return to the caller which cgroup we should commit
|
|
|
+ * against. We could also return the memcg directly and avoid the pointer
|
|
|
+ * passing, but a boolean return value gives better semantics considering
|
|
|
+ * the compiled-out case as well.
|
|
|
+ *
|
|
|
+ * Returning true means the allocation is possible.
|
|
|
+ */
|
|
|
+bool
|
|
|
+__memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
|
|
|
+{
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ *_memcg = NULL;
|
|
|
+ memcg = try_get_mem_cgroup_from_mm(current->mm);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * very rare case described in mem_cgroup_from_task. Unfortunately there
|
|
|
+ * isn't much we can do without complicating this too much, and it would
|
|
|
+ * be gfp-dependent anyway. Just let it go
|
|
|
+ */
|
|
|
+ if (unlikely(!memcg))
|
|
|
+ return true;
|
|
|
+
|
|
|
+ if (!memcg_can_account_kmem(memcg)) {
|
|
|
+ css_put(&memcg->css);
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ mem_cgroup_get(memcg);
|
|
|
+
|
|
|
+ ret = memcg_charge_kmem(memcg, gfp, PAGE_SIZE << order);
|
|
|
+ if (!ret)
|
|
|
+ *_memcg = memcg;
|
|
|
+ else
|
|
|
+ mem_cgroup_put(memcg);
|
|
|
+
|
|
|
+ css_put(&memcg->css);
|
|
|
+ return (ret == 0);
|
|
|
+}
|
|
|
+
|
|
|
+void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
|
|
|
+ int order)
|
|
|
+{
|
|
|
+ struct page_cgroup *pc;
|
|
|
+
|
|
|
+ VM_BUG_ON(mem_cgroup_is_root(memcg));
|
|
|
+
|
|
|
+ /* The page allocation failed. Revert */
|
|
|
+ if (!page) {
|
|
|
+ memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
|
|
|
+ mem_cgroup_put(memcg);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ pc = lookup_page_cgroup(page);
|
|
|
+ lock_page_cgroup(pc);
|
|
|
+ pc->mem_cgroup = memcg;
|
|
|
+ SetPageCgroupUsed(pc);
|
|
|
+ unlock_page_cgroup(pc);
|
|
|
+}
|
|
|
+
|
|
|
+void __memcg_kmem_uncharge_pages(struct page *page, int order)
|
|
|
+{
|
|
|
+ struct mem_cgroup *memcg = NULL;
|
|
|
+ struct page_cgroup *pc;
|
|
|
+
|
|
|
+
|
|
|
+ pc = lookup_page_cgroup(page);
|
|
|
+ /*
|
|
|
+ * Fast unlocked return. Theoretically might have changed, have to
|
|
|
+ * check again after locking.
|
|
|
+ */
|
|
|
+ if (!PageCgroupUsed(pc))
|
|
|
+ return;
|
|
|
+
|
|
|
+ lock_page_cgroup(pc);
|
|
|
+ if (PageCgroupUsed(pc)) {
|
|
|
+ memcg = pc->mem_cgroup;
|
|
|
+ ClearPageCgroupUsed(pc);
|
|
|
+ }
|
|
|
+ unlock_page_cgroup(pc);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We trust that only if there is a memcg associated with the page, it
|
|
|
+ * is a valid allocation
|
|
|
+ */
|
|
|
+ if (!memcg)
|
|
|
+ return;
|
|
|
+
|
|
|
+ VM_BUG_ON(mem_cgroup_is_root(memcg));
|
|
|
+ memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
|
|
|
+ mem_cgroup_put(memcg);
|
|
|
+}
|
|
|
+#endif /* CONFIG_MEMCG_KMEM */
|
|
|
+
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
|
|
|
|
#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
|