|
@@ -346,10 +346,13 @@ struct mem_cgroup {
|
|
|
/* internal only representation about the status of kmem accounting. */
|
|
|
enum {
|
|
|
KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */
|
|
|
+ KMEM_ACCOUNTED_ACTIVATED, /* static key enabled. */
|
|
|
KMEM_ACCOUNTED_DEAD, /* dead memcg with pending kmem charges */
|
|
|
};
|
|
|
|
|
|
-#define KMEM_ACCOUNTED_MASK (1 << KMEM_ACCOUNTED_ACTIVE)
|
|
|
+/* We account when limit is on, but only after call sites are patched */
|
|
|
+#define KMEM_ACCOUNTED_MASK \
|
|
|
+ ((1 << KMEM_ACCOUNTED_ACTIVE) | (1 << KMEM_ACCOUNTED_ACTIVATED))
|
|
|
|
|
|
#ifdef CONFIG_MEMCG_KMEM
|
|
|
static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
|
|
@@ -362,6 +365,11 @@ static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
|
|
|
return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
|
|
|
}
|
|
|
|
|
|
+static void memcg_kmem_set_activated(struct mem_cgroup *memcg)
|
|
|
+{
|
|
|
+ set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
|
|
|
+}
|
|
|
+
|
|
|
static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
|
|
|
{
|
|
|
if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))
|
|
@@ -532,6 +540,26 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
+#ifdef CONFIG_MEMCG_KMEM
|
|
|
+struct static_key memcg_kmem_enabled_key;
|
|
|
+
|
|
|
+static void disarm_kmem_keys(struct mem_cgroup *memcg)
|
|
|
+{
|
|
|
+ if (memcg_kmem_is_active(memcg))
|
|
|
+ static_key_slow_dec(&memcg_kmem_enabled_key);
|
|
|
+}
|
|
|
+#else
|
|
|
+static void disarm_kmem_keys(struct mem_cgroup *memcg)
|
|
|
+{
|
|
|
+}
|
|
|
+#endif /* CONFIG_MEMCG_KMEM */
|
|
|
+
|
|
|
+static void disarm_static_keys(struct mem_cgroup *memcg)
|
|
|
+{
|
|
|
+ disarm_sock_keys(memcg);
|
|
|
+ disarm_kmem_keys(memcg);
|
|
|
+}
|
|
|
+
|
|
|
static void drain_all_stock_async(struct mem_cgroup *memcg);
|
|
|
|
|
|
static struct mem_cgroup_per_zone *
|
|
@@ -4204,6 +4232,8 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
|
|
|
{
|
|
|
int ret = -EINVAL;
|
|
|
#ifdef CONFIG_MEMCG_KMEM
|
|
|
+ bool must_inc_static_branch = false;
|
|
|
+
|
|
|
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
|
|
|
/*
|
|
|
* For simplicity, we won't allow this to be disabled. It also can't
|
|
@@ -4234,7 +4264,15 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
|
|
|
ret = res_counter_set_limit(&memcg->kmem, val);
|
|
|
VM_BUG_ON(ret);
|
|
|
|
|
|
- memcg_kmem_set_active(memcg);
|
|
|
+ /*
|
|
|
+ * After this point, kmem_accounted (that we test atomically in
|
|
|
+ * the beginning of this conditional), is no longer 0. This
|
|
|
+ * guarantees only one process will set the following boolean
|
|
|
+ * to true. We don't need test_and_set because we're protected
|
|
|
+ * by the set_limit_mutex anyway.
|
|
|
+ */
|
|
|
+ memcg_kmem_set_activated(memcg);
|
|
|
+ must_inc_static_branch = true;
|
|
|
/*
|
|
|
* kmem charges can outlive the cgroup. In the case of slab
|
|
|
* pages, for instance, a page contain objects from various
|
|
@@ -4247,6 +4285,27 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
|
|
|
out:
|
|
|
mutex_unlock(&set_limit_mutex);
|
|
|
cgroup_unlock();
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We are by now familiar with the fact that we can't inc the static
|
|
|
+ * branch inside cgroup_lock. See disarm functions for details. A
|
|
|
+ * worker here is overkill, but also wrong: After the limit is set, we
|
|
|
+ * must start accounting right away. Since this operation can't fail,
|
|
|
+ * we can safely defer it to here - no rollback will be needed.
|
|
|
+ *
|
|
|
+ * The boolean used to control this is also safe, because
|
|
|
+ * KMEM_ACCOUNTED_ACTIVATED guarantees that only one process will be
|
|
|
+ * able to set it to true;
|
|
|
+ */
|
|
|
+ if (must_inc_static_branch) {
|
|
|
+ static_key_slow_inc(&memcg_kmem_enabled_key);
|
|
|
+ /*
|
|
|
+ * setting the active bit after the inc will guarantee no one
|
|
|
+ * starts accounting before all call sites are patched
|
|
|
+ */
|
|
|
+ memcg_kmem_set_active(memcg);
|
|
|
+ }
|
|
|
+
|
|
|
#endif
|
|
|
return ret;
|
|
|
}
|
|
@@ -4258,8 +4317,20 @@ static void memcg_propagate_kmem(struct mem_cgroup *memcg)
|
|
|
return;
|
|
|
memcg->kmem_account_flags = parent->kmem_account_flags;
|
|
|
#ifdef CONFIG_MEMCG_KMEM
|
|
|
- if (memcg_kmem_is_active(memcg))
|
|
|
+ /*
|
|
|
+ * When that happen, we need to disable the static branch only on those
|
|
|
+ * memcgs that enabled it. To achieve this, we would be forced to
|
|
|
+ * complicate the code by keeping track of which memcgs were the ones
|
|
|
+ * that actually enabled limits, and which ones got it from its
|
|
|
+ * parents.
|
|
|
+ *
|
|
|
+ * It is a lot simpler just to do static_key_slow_inc() on every child
|
|
|
+ * that is accounted.
|
|
|
+ */
|
|
|
+ if (memcg_kmem_is_active(memcg)) {
|
|
|
mem_cgroup_get(memcg);
|
|
|
+ static_key_slow_inc(&memcg_kmem_enabled_key);
|
|
|
+ }
|
|
|
#endif
|
|
|
}
|
|
|
|
|
@@ -5184,7 +5255,7 @@ static void free_work(struct work_struct *work)
|
|
|
* to move this code around, and make sure it is outside
|
|
|
* the cgroup_lock.
|
|
|
*/
|
|
|
- disarm_sock_keys(memcg);
|
|
|
+ disarm_static_keys(memcg);
|
|
|
if (size < PAGE_SIZE)
|
|
|
kfree(memcg);
|
|
|
else
|