|
@@ -588,7 +588,14 @@ static int memcg_limited_groups_array_size;
|
|
|
#define MEMCG_CACHES_MIN_SIZE 4
|
|
|
#define MEMCG_CACHES_MAX_SIZE 65535
|
|
|
|
|
|
+/*
|
|
|
+ * A lot of the calls to the cache allocation functions are expected to be
|
|
|
+ * inlined by the compiler. Since the calls to memcg_kmem_get_cache are
|
|
|
+ * conditional to this static branch, we'll have to allow modules that does
|
|
|
+ * kmem_cache_alloc and the such to see this symbol as well
|
|
|
+ */
|
|
|
struct static_key memcg_kmem_enabled_key;
|
|
|
+EXPORT_SYMBOL(memcg_kmem_enabled_key);
|
|
|
|
|
|
static void disarm_kmem_keys(struct mem_cgroup *memcg)
|
|
|
{
|
|
@@ -2989,9 +2996,219 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
|
|
|
|
|
|
void memcg_release_cache(struct kmem_cache *s)
|
|
|
{
|
|
|
+ struct kmem_cache *root;
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
+ int id;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * This happens, for instance, when a root cache goes away before we
|
|
|
+ * add any memcg.
|
|
|
+ */
|
|
|
+ if (!s->memcg_params)
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (s->memcg_params->is_root_cache)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ memcg = s->memcg_params->memcg;
|
|
|
+ id = memcg_cache_id(memcg);
|
|
|
+
|
|
|
+ root = s->memcg_params->root_cache;
|
|
|
+ root->memcg_params->memcg_caches[id] = NULL;
|
|
|
+ mem_cgroup_put(memcg);
|
|
|
+
|
|
|
+ mutex_lock(&memcg->slab_caches_mutex);
|
|
|
+ list_del(&s->memcg_params->list);
|
|
|
+ mutex_unlock(&memcg->slab_caches_mutex);
|
|
|
+
|
|
|
+out:
|
|
|
kfree(s->memcg_params);
|
|
|
}
|
|
|
|
|
|
+static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
|
|
|
+{
|
|
|
+ char *name;
|
|
|
+ struct dentry *dentry;
|
|
|
+
|
|
|
+ rcu_read_lock();
|
|
|
+ dentry = rcu_dereference(memcg->css.cgroup->dentry);
|
|
|
+ rcu_read_unlock();
|
|
|
+
|
|
|
+ BUG_ON(dentry == NULL);
|
|
|
+
|
|
|
+ name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
|
|
|
+ memcg_cache_id(memcg), dentry->d_name.name);
|
|
|
+
|
|
|
+ return name;
|
|
|
+}
|
|
|
+
|
|
|
+static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
|
|
|
+ struct kmem_cache *s)
|
|
|
+{
|
|
|
+ char *name;
|
|
|
+ struct kmem_cache *new;
|
|
|
+
|
|
|
+ name = memcg_cache_name(memcg, s);
|
|
|
+ if (!name)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
|
|
|
+ (s->flags & ~SLAB_PANIC), s->ctor);
|
|
|
+
|
|
|
+ kfree(name);
|
|
|
+ return new;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * This lock protects updaters, not readers. We want readers to be as fast as
|
|
|
+ * they can, and they will either see NULL or a valid cache value. Our model
|
|
|
+ * allow them to see NULL, in which case the root memcg will be selected.
|
|
|
+ *
|
|
|
+ * We need this lock because multiple allocations to the same cache from a non
|
|
|
+ * will span more than one worker. Only one of them can create the cache.
|
|
|
+ */
|
|
|
+static DEFINE_MUTEX(memcg_cache_mutex);
|
|
|
+static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
|
|
|
+ struct kmem_cache *cachep)
|
|
|
+{
|
|
|
+ struct kmem_cache *new_cachep;
|
|
|
+ int idx;
|
|
|
+
|
|
|
+ BUG_ON(!memcg_can_account_kmem(memcg));
|
|
|
+
|
|
|
+ idx = memcg_cache_id(memcg);
|
|
|
+
|
|
|
+ mutex_lock(&memcg_cache_mutex);
|
|
|
+ new_cachep = cachep->memcg_params->memcg_caches[idx];
|
|
|
+ if (new_cachep)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ new_cachep = kmem_cache_dup(memcg, cachep);
|
|
|
+
|
|
|
+ if (new_cachep == NULL) {
|
|
|
+ new_cachep = cachep;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ mem_cgroup_get(memcg);
|
|
|
+ new_cachep->memcg_params->root_cache = cachep;
|
|
|
+
|
|
|
+ cachep->memcg_params->memcg_caches[idx] = new_cachep;
|
|
|
+ /*
|
|
|
+ * the readers won't lock, make sure everybody sees the updated value,
|
|
|
+ * so they won't put stuff in the queue again for no reason
|
|
|
+ */
|
|
|
+ wmb();
|
|
|
+out:
|
|
|
+ mutex_unlock(&memcg_cache_mutex);
|
|
|
+ return new_cachep;
|
|
|
+}
|
|
|
+
|
|
|
+struct create_work {
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
+ struct kmem_cache *cachep;
|
|
|
+ struct work_struct work;
|
|
|
+};
|
|
|
+
|
|
|
+static void memcg_create_cache_work_func(struct work_struct *w)
|
|
|
+{
|
|
|
+ struct create_work *cw;
|
|
|
+
|
|
|
+ cw = container_of(w, struct create_work, work);
|
|
|
+ memcg_create_kmem_cache(cw->memcg, cw->cachep);
|
|
|
+ /* Drop the reference gotten when we enqueued. */
|
|
|
+ css_put(&cw->memcg->css);
|
|
|
+ kfree(cw);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Enqueue the creation of a per-memcg kmem_cache.
|
|
|
+ * Called with rcu_read_lock.
|
|
|
+ */
|
|
|
+static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
|
|
|
+ struct kmem_cache *cachep)
|
|
|
+{
|
|
|
+ struct create_work *cw;
|
|
|
+
|
|
|
+ cw = kmalloc(sizeof(struct create_work), GFP_NOWAIT);
|
|
|
+ if (cw == NULL)
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* The corresponding put will be done in the workqueue. */
|
|
|
+ if (!css_tryget(&memcg->css)) {
|
|
|
+ kfree(cw);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ cw->memcg = memcg;
|
|
|
+ cw->cachep = cachep;
|
|
|
+
|
|
|
+ INIT_WORK(&cw->work, memcg_create_cache_work_func);
|
|
|
+ schedule_work(&cw->work);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Return the kmem_cache we're supposed to use for a slab allocation.
|
|
|
+ * We try to use the current memcg's version of the cache.
|
|
|
+ *
|
|
|
+ * If the cache does not exist yet, if we are the first user of it,
|
|
|
+ * we either create it immediately, if possible, or create it asynchronously
|
|
|
+ * in a workqueue.
|
|
|
+ * In the latter case, we will let the current allocation go through with
|
|
|
+ * the original cache.
|
|
|
+ *
|
|
|
+ * Can't be called in interrupt context or from kernel threads.
|
|
|
+ * This function needs to be called with rcu_read_lock() held.
|
|
|
+ */
|
|
|
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
|
|
|
+ gfp_t gfp)
|
|
|
+{
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
+ int idx;
|
|
|
+
|
|
|
+ VM_BUG_ON(!cachep->memcg_params);
|
|
|
+ VM_BUG_ON(!cachep->memcg_params->is_root_cache);
|
|
|
+
|
|
|
+ rcu_read_lock();
|
|
|
+ memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
|
|
|
+ rcu_read_unlock();
|
|
|
+
|
|
|
+ if (!memcg_can_account_kmem(memcg))
|
|
|
+ return cachep;
|
|
|
+
|
|
|
+ idx = memcg_cache_id(memcg);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * barrier to mare sure we're always seeing the up to date value. The
|
|
|
+ * code updating memcg_caches will issue a write barrier to match this.
|
|
|
+ */
|
|
|
+ read_barrier_depends();
|
|
|
+ if (unlikely(cachep->memcg_params->memcg_caches[idx] == NULL)) {
|
|
|
+ /*
|
|
|
+ * If we are in a safe context (can wait, and not in interrupt
|
|
|
+ * context), we could be be predictable and return right away.
|
|
|
+ * This would guarantee that the allocation being performed
|
|
|
+ * already belongs in the new cache.
|
|
|
+ *
|
|
|
+ * However, there are some clashes that can arrive from locking.
|
|
|
+ * For instance, because we acquire the slab_mutex while doing
|
|
|
+ * kmem_cache_dup, this means no further allocation could happen
|
|
|
+ * with the slab_mutex held.
|
|
|
+ *
|
|
|
+ * Also, because cache creation issue get_online_cpus(), this
|
|
|
+ * creates a lock chain: memcg_slab_mutex -> cpu_hotplug_mutex,
|
|
|
+ * that ends up reversed during cpu hotplug. (cpuset allocates
|
|
|
+ * a bunch of GFP_KERNEL memory during cpuup). Due to all that,
|
|
|
+ * better to defer everything.
|
|
|
+ */
|
|
|
+ memcg_create_cache_enqueue(memcg, cachep);
|
|
|
+ return cachep;
|
|
|
+ }
|
|
|
+
|
|
|
+ return cachep->memcg_params->memcg_caches[idx];
|
|
|
+}
|
|
|
+EXPORT_SYMBOL(__memcg_kmem_get_cache);
|
|
|
+
|
|
|
/*
|
|
|
* We need to verify if the allocation against current->mm->owner's memcg is
|
|
|
* possible for the given order. But the page is not allocated yet, so we'll
|