|
@@ -149,6 +149,7 @@ struct mem_cgroup_threshold {
|
|
|
u64 threshold;
|
|
|
};
|
|
|
|
|
|
+/* For threshold */
|
|
|
struct mem_cgroup_threshold_ary {
|
|
|
/* An array index points to threshold just below usage. */
|
|
|
atomic_t current_threshold;
|
|
@@ -157,8 +158,14 @@ struct mem_cgroup_threshold_ary {
|
|
|
/* Array of thresholds */
|
|
|
struct mem_cgroup_threshold entries[0];
|
|
|
};
|
|
|
+/* for OOM */
|
|
|
+struct mem_cgroup_eventfd_list {
|
|
|
+ struct list_head list;
|
|
|
+ struct eventfd_ctx *eventfd;
|
|
|
+};
|
|
|
|
|
|
static void mem_cgroup_threshold(struct mem_cgroup *mem);
|
|
|
+static void mem_cgroup_oom_notify(struct mem_cgroup *mem);
|
|
|
|
|
|
/*
|
|
|
* The memory controller data structure. The memory controller controls both
|
|
@@ -220,6 +227,9 @@ struct mem_cgroup {
|
|
|
/* thresholds for mem+swap usage. RCU-protected */
|
|
|
struct mem_cgroup_threshold_ary *memsw_thresholds;
|
|
|
|
|
|
+ /* For oom notifier event fd */
|
|
|
+ struct list_head oom_notify;
|
|
|
+
|
|
|
/*
|
|
|
* Should we move charges of a task when a task is moved into this
|
|
|
* mem_cgroup ? And what type of charges should we move ?
|
|
@@ -282,9 +292,12 @@ enum charge_type {
|
|
|
/* for encoding cft->private value on file */
|
|
|
#define _MEM (0)
|
|
|
#define _MEMSWAP (1)
|
|
|
+#define _OOM_TYPE (2)
|
|
|
#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
|
|
|
#define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff)
|
|
|
#define MEMFILE_ATTR(val) ((val) & 0xffff)
|
|
|
+/* Used for OOM nofiier */
|
|
|
+#define OOM_CONTROL (0)
|
|
|
|
|
|
/*
|
|
|
* Reclaim flags for mem_cgroup_hierarchical_reclaim
|
|
@@ -1353,6 +1366,8 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask)
|
|
|
*/
|
|
|
if (!locked)
|
|
|
prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
|
|
|
+ else
|
|
|
+ mem_cgroup_oom_notify(mem);
|
|
|
mutex_unlock(&memcg_oom_mutex);
|
|
|
|
|
|
if (locked)
|
|
@@ -3398,8 +3413,22 @@ static int compare_thresholds(const void *a, const void *b)
|
|
|
return _a->threshold - _b->threshold;
|
|
|
}
|
|
|
|
|
|
-static int mem_cgroup_register_event(struct cgroup *cgrp, struct cftype *cft,
|
|
|
- struct eventfd_ctx *eventfd, const char *args)
|
|
|
+static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data)
|
|
|
+{
|
|
|
+ struct mem_cgroup_eventfd_list *ev;
|
|
|
+
|
|
|
+ list_for_each_entry(ev, &mem->oom_notify, list)
|
|
|
+ eventfd_signal(ev->eventfd, 1);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void mem_cgroup_oom_notify(struct mem_cgroup *mem)
|
|
|
+{
|
|
|
+ mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_notify_cb);
|
|
|
+}
|
|
|
+
|
|
|
+static int mem_cgroup_usage_register_event(struct cgroup *cgrp,
|
|
|
+ struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
|
|
|
struct mem_cgroup_threshold_ary *thresholds, *thresholds_new;
|
|
@@ -3483,8 +3512,8 @@ unlock:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static int mem_cgroup_unregister_event(struct cgroup *cgrp, struct cftype *cft,
|
|
|
- struct eventfd_ctx *eventfd)
|
|
|
+static int mem_cgroup_usage_unregister_event(struct cgroup *cgrp,
|
|
|
+ struct cftype *cft, struct eventfd_ctx *eventfd)
|
|
|
{
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
|
|
|
struct mem_cgroup_threshold_ary *thresholds, *thresholds_new;
|
|
@@ -3568,13 +3597,61 @@ unlock:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+static int mem_cgroup_oom_register_event(struct cgroup *cgrp,
|
|
|
+ struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
|
|
|
+{
|
|
|
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
|
|
|
+ struct mem_cgroup_eventfd_list *event;
|
|
|
+ int type = MEMFILE_TYPE(cft->private);
|
|
|
+
|
|
|
+ BUG_ON(type != _OOM_TYPE);
|
|
|
+ event = kmalloc(sizeof(*event), GFP_KERNEL);
|
|
|
+ if (!event)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ mutex_lock(&memcg_oom_mutex);
|
|
|
+
|
|
|
+ event->eventfd = eventfd;
|
|
|
+ list_add(&event->list, &memcg->oom_notify);
|
|
|
+
|
|
|
+ /* already in OOM ? */
|
|
|
+ if (atomic_read(&memcg->oom_lock))
|
|
|
+ eventfd_signal(eventfd, 1);
|
|
|
+ mutex_unlock(&memcg_oom_mutex);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
|
|
|
+ struct cftype *cft, struct eventfd_ctx *eventfd)
|
|
|
+{
|
|
|
+ struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
|
|
|
+ struct mem_cgroup_eventfd_list *ev, *tmp;
|
|
|
+ int type = MEMFILE_TYPE(cft->private);
|
|
|
+
|
|
|
+ BUG_ON(type != _OOM_TYPE);
|
|
|
+
|
|
|
+ mutex_lock(&memcg_oom_mutex);
|
|
|
+
|
|
|
+ list_for_each_entry_safe(ev, tmp, &mem->oom_notify, list) {
|
|
|
+ if (ev->eventfd == eventfd) {
|
|
|
+ list_del(&ev->list);
|
|
|
+ kfree(ev);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ mutex_unlock(&memcg_oom_mutex);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static struct cftype mem_cgroup_files[] = {
|
|
|
{
|
|
|
.name = "usage_in_bytes",
|
|
|
.private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
|
|
|
.read_u64 = mem_cgroup_read,
|
|
|
- .register_event = mem_cgroup_register_event,
|
|
|
- .unregister_event = mem_cgroup_unregister_event,
|
|
|
+ .register_event = mem_cgroup_usage_register_event,
|
|
|
+ .unregister_event = mem_cgroup_usage_unregister_event,
|
|
|
},
|
|
|
{
|
|
|
.name = "max_usage_in_bytes",
|
|
@@ -3623,6 +3700,12 @@ static struct cftype mem_cgroup_files[] = {
|
|
|
.read_u64 = mem_cgroup_move_charge_read,
|
|
|
.write_u64 = mem_cgroup_move_charge_write,
|
|
|
},
|
|
|
+ {
|
|
|
+ .name = "oom_control",
|
|
|
+ .register_event = mem_cgroup_oom_register_event,
|
|
|
+ .unregister_event = mem_cgroup_oom_unregister_event,
|
|
|
+ .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
|
|
|
+ },
|
|
|
};
|
|
|
|
|
|
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
|
|
@@ -3631,8 +3714,8 @@ static struct cftype memsw_cgroup_files[] = {
|
|
|
.name = "memsw.usage_in_bytes",
|
|
|
.private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
|
|
|
.read_u64 = mem_cgroup_read,
|
|
|
- .register_event = mem_cgroup_register_event,
|
|
|
- .unregister_event = mem_cgroup_unregister_event,
|
|
|
+ .register_event = mem_cgroup_usage_register_event,
|
|
|
+ .unregister_event = mem_cgroup_usage_unregister_event,
|
|
|
},
|
|
|
{
|
|
|
.name = "memsw.max_usage_in_bytes",
|
|
@@ -3878,6 +3961,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
|
|
|
}
|
|
|
mem->last_scanned_child = 0;
|
|
|
spin_lock_init(&mem->reclaim_param_lock);
|
|
|
+ INIT_LIST_HEAD(&mem->oom_notify);
|
|
|
|
|
|
if (parent)
|
|
|
mem->swappiness = get_swappiness(parent);
|