|
@@ -27,6 +27,7 @@
|
|
|
#include <linux/module.h>
|
|
|
#include <linux/notifier.h>
|
|
|
#include <linux/memcontrol.h>
|
|
|
+#include <linux/mempolicy.h>
|
|
|
#include <linux/security.h>
|
|
|
|
|
|
int sysctl_panic_on_oom;
|
|
@@ -35,23 +36,57 @@ int sysctl_oom_dump_tasks;
|
|
|
static DEFINE_SPINLOCK(zone_scan_lock);
|
|
|
/* #define DEBUG */
|
|
|
|
|
|
-/*
|
|
|
- * Is all threads of the target process nodes overlap ours?
|
|
|
+#ifdef CONFIG_NUMA
|
|
|
+/**
|
|
|
+ * has_intersects_mems_allowed() - check task eligiblity for kill
|
|
|
+ * @tsk: task struct of which task to consider
|
|
|
+ * @mask: nodemask passed to page allocator for mempolicy ooms
|
|
|
+ *
|
|
|
+ * Task eligibility is determined by whether or not a candidate task, @tsk,
|
|
|
+ * shares the same mempolicy nodes as current if it is bound by such a policy
|
|
|
+ * and whether or not it has the same set of allowed cpuset nodes.
|
|
|
*/
|
|
|
-static int has_intersects_mems_allowed(struct task_struct *tsk)
|
|
|
+static bool has_intersects_mems_allowed(struct task_struct *tsk,
|
|
|
+ const nodemask_t *mask)
|
|
|
{
|
|
|
- struct task_struct *t;
|
|
|
+ struct task_struct *start = tsk;
|
|
|
|
|
|
- t = tsk;
|
|
|
do {
|
|
|
- if (cpuset_mems_allowed_intersects(current, t))
|
|
|
- return 1;
|
|
|
- t = next_thread(t);
|
|
|
- } while (t != tsk);
|
|
|
-
|
|
|
- return 0;
|
|
|
+ if (mask) {
|
|
|
+ /*
|
|
|
+ * If this is a mempolicy constrained oom, tsk's
|
|
|
+ * cpuset is irrelevant. Only return true if its
|
|
|
+ * mempolicy intersects current, otherwise it may be
|
|
|
+ * needlessly killed.
|
|
|
+ */
|
|
|
+ if (mempolicy_nodemask_intersects(tsk, mask))
|
|
|
+ return true;
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * This is not a mempolicy constrained oom, so only
|
|
|
+ * check the mems of tsk's cpuset.
|
|
|
+ */
|
|
|
+ if (cpuset_mems_allowed_intersects(current, tsk))
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ tsk = next_thread(tsk);
|
|
|
+ } while (tsk != start);
|
|
|
+ return false;
|
|
|
+}
|
|
|
+#else
|
|
|
+static bool has_intersects_mems_allowed(struct task_struct *tsk,
|
|
|
+ const nodemask_t *mask)
|
|
|
+{
|
|
|
+ return true;
|
|
|
}
|
|
|
+#endif /* CONFIG_NUMA */
|
|
|
|
|
|
+/*
|
|
|
+ * The process p may have detached its own ->mm while exiting or through
|
|
|
+ * use_mm(), but one or more of its subthreads may still have a valid
|
|
|
+ * pointer. Return p, or any of its subthreads with a valid ->mm, with
|
|
|
+ * task_lock() held.
|
|
|
+ */
|
|
|
static struct task_struct *find_lock_task_mm(struct task_struct *p)
|
|
|
{
|
|
|
struct task_struct *t = p;
|
|
@@ -106,10 +141,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
|
|
|
* The memory size of the process is the basis for the badness.
|
|
|
*/
|
|
|
points = p->mm->total_vm;
|
|
|
-
|
|
|
- /*
|
|
|
- * After this unlock we can no longer dereference local variable `mm'
|
|
|
- */
|
|
|
task_unlock(p);
|
|
|
|
|
|
/*
|
|
@@ -253,7 +284,8 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
|
|
|
* (not docbooked, we don't want this one cluttering up the manual)
|
|
|
*/
|
|
|
static struct task_struct *select_bad_process(unsigned long *ppoints,
|
|
|
- struct mem_cgroup *mem)
|
|
|
+ struct mem_cgroup *mem, enum oom_constraint constraint,
|
|
|
+ const nodemask_t *mask)
|
|
|
{
|
|
|
struct task_struct *p;
|
|
|
struct task_struct *chosen = NULL;
|
|
@@ -269,7 +301,9 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
|
|
|
continue;
|
|
|
if (mem && !task_in_mem_cgroup(p, mem))
|
|
|
continue;
|
|
|
- if (!has_intersects_mems_allowed(p))
|
|
|
+ if (!has_intersects_mems_allowed(p,
|
|
|
+ constraint == CONSTRAINT_MEMORY_POLICY ? mask :
|
|
|
+ NULL))
|
|
|
continue;
|
|
|
|
|
|
/*
|
|
@@ -497,7 +531,7 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
|
|
|
panic("out of memory(memcg). panic_on_oom is selected.\n");
|
|
|
read_lock(&tasklist_lock);
|
|
|
retry:
|
|
|
- p = select_bad_process(&points, mem);
|
|
|
+ p = select_bad_process(&points, mem, CONSTRAINT_NONE, NULL);
|
|
|
if (!p || PTR_ERR(p) == -1UL)
|
|
|
goto out;
|
|
|
|
|
@@ -576,7 +610,8 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
|
|
|
/*
|
|
|
* Must be called with tasklist_lock held for read.
|
|
|
*/
|
|
|
-static void __out_of_memory(gfp_t gfp_mask, int order)
|
|
|
+static void __out_of_memory(gfp_t gfp_mask, int order,
|
|
|
+ enum oom_constraint constraint, const nodemask_t *mask)
|
|
|
{
|
|
|
struct task_struct *p;
|
|
|
unsigned long points;
|
|
@@ -590,7 +625,7 @@ retry:
|
|
|
* Rambo mode: Shoot down a process and hope it solves whatever
|
|
|
* issues we may have.
|
|
|
*/
|
|
|
- p = select_bad_process(&points, NULL);
|
|
|
+ p = select_bad_process(&points, NULL, constraint, mask);
|
|
|
|
|
|
if (PTR_ERR(p) == -1UL)
|
|
|
return;
|
|
@@ -624,7 +659,8 @@ void pagefault_out_of_memory(void)
|
|
|
panic("out of memory from page fault. panic_on_oom is selected.\n");
|
|
|
|
|
|
read_lock(&tasklist_lock);
|
|
|
- __out_of_memory(0, 0); /* unknown gfp_mask and order */
|
|
|
+ /* unknown gfp_mask and order */
|
|
|
+ __out_of_memory(0, 0, CONSTRAINT_NONE, NULL);
|
|
|
read_unlock(&tasklist_lock);
|
|
|
|
|
|
/*
|
|
@@ -640,6 +676,7 @@ void pagefault_out_of_memory(void)
|
|
|
* @zonelist: zonelist pointer
|
|
|
* @gfp_mask: memory allocation flags
|
|
|
* @order: amount of memory being requested as a power of 2
|
|
|
+ * @nodemask: nodemask passed to page allocator
|
|
|
*
|
|
|
* If we run out of memory, we have the choice between either
|
|
|
* killing a random task (bad), letting the system crash (worse)
|
|
@@ -678,24 +715,19 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
|
|
|
*/
|
|
|
constraint = constrained_alloc(zonelist, gfp_mask, nodemask);
|
|
|
read_lock(&tasklist_lock);
|
|
|
-
|
|
|
- switch (constraint) {
|
|
|
- case CONSTRAINT_MEMORY_POLICY:
|
|
|
- oom_kill_process(current, gfp_mask, order, 0, NULL,
|
|
|
- "No available memory (MPOL_BIND)");
|
|
|
- break;
|
|
|
-
|
|
|
- case CONSTRAINT_NONE:
|
|
|
- if (sysctl_panic_on_oom) {
|
|
|
+ if (unlikely(sysctl_panic_on_oom)) {
|
|
|
+ /*
|
|
|
+ * panic_on_oom only affects CONSTRAINT_NONE, the kernel
|
|
|
+ * should not panic for cpuset or mempolicy induced memory
|
|
|
+ * failures.
|
|
|
+ */
|
|
|
+ if (constraint == CONSTRAINT_NONE) {
|
|
|
dump_header(NULL, gfp_mask, order, NULL);
|
|
|
- panic("out of memory. panic_on_oom is selected\n");
|
|
|
+ read_unlock(&tasklist_lock);
|
|
|
+ panic("Out of memory: panic_on_oom is enabled\n");
|
|
|
}
|
|
|
- /* Fall-through */
|
|
|
- case CONSTRAINT_CPUSET:
|
|
|
- __out_of_memory(gfp_mask, order);
|
|
|
- break;
|
|
|
}
|
|
|
-
|
|
|
+ __out_of_memory(gfp_mask, order, constraint, nodemask);
|
|
|
read_unlock(&tasklist_lock);
|
|
|
|
|
|
/*
|