|
@@ -31,6 +31,7 @@
|
|
|
#include <linux/fault-inject.h>
|
|
|
#include <linux/stacktrace.h>
|
|
|
#include <linux/prefetch.h>
|
|
|
+#include <linux/memcontrol.h>
|
|
|
|
|
|
#include <trace/events/kmem.h>
|
|
|
|
|
@@ -200,13 +201,14 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
|
|
|
static int sysfs_slab_add(struct kmem_cache *);
|
|
|
static int sysfs_slab_alias(struct kmem_cache *, const char *);
|
|
|
static void sysfs_slab_remove(struct kmem_cache *);
|
|
|
-
|
|
|
+static void memcg_propagate_slab_attrs(struct kmem_cache *s);
|
|
|
#else
|
|
|
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
|
|
|
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
|
|
|
{ return 0; }
|
|
|
static inline void sysfs_slab_remove(struct kmem_cache *s) { }
|
|
|
|
|
|
+static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
|
|
|
#endif
|
|
|
|
|
|
static inline void stat(const struct kmem_cache *s, enum stat_item si)
|
|
@@ -1343,6 +1345,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
|
|
|
void *start;
|
|
|
void *last;
|
|
|
void *p;
|
|
|
+ int order;
|
|
|
|
|
|
BUG_ON(flags & GFP_SLAB_BUG_MASK);
|
|
|
|
|
@@ -1351,7 +1354,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
|
|
|
if (!page)
|
|
|
goto out;
|
|
|
|
|
|
+ order = compound_order(page);
|
|
|
inc_slabs_node(s, page_to_nid(page), page->objects);
|
|
|
+ memcg_bind_pages(s, order);
|
|
|
page->slab_cache = s;
|
|
|
__SetPageSlab(page);
|
|
|
if (page->pfmemalloc)
|
|
@@ -1360,7 +1365,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
|
|
|
start = page_address(page);
|
|
|
|
|
|
if (unlikely(s->flags & SLAB_POISON))
|
|
|
- memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
|
|
|
+ memset(start, POISON_INUSE, PAGE_SIZE << order);
|
|
|
|
|
|
last = start;
|
|
|
for_each_object(p, s, start, page->objects) {
|
|
@@ -1401,10 +1406,12 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
|
|
|
|
|
|
__ClearPageSlabPfmemalloc(page);
|
|
|
__ClearPageSlab(page);
|
|
|
+
|
|
|
+ memcg_release_pages(s, order);
|
|
|
reset_page_mapcount(page);
|
|
|
if (current->reclaim_state)
|
|
|
current->reclaim_state->reclaimed_slab += pages;
|
|
|
- __free_pages(page, order);
|
|
|
+ __free_memcg_kmem_pages(page, order);
|
|
|
}
|
|
|
|
|
|
#define need_reserve_slab_rcu \
|
|
@@ -2322,6 +2329,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
|
|
|
if (slab_pre_alloc_hook(s, gfpflags))
|
|
|
return NULL;
|
|
|
|
|
|
+ s = memcg_kmem_get_cache(s, gfpflags);
|
|
|
redo:
|
|
|
|
|
|
/*
|
|
@@ -2610,19 +2618,10 @@ redo:
|
|
|
|
|
|
void kmem_cache_free(struct kmem_cache *s, void *x)
|
|
|
{
|
|
|
- struct page *page;
|
|
|
-
|
|
|
- page = virt_to_head_page(x);
|
|
|
-
|
|
|
- if (kmem_cache_debug(s) && page->slab_cache != s) {
|
|
|
- pr_err("kmem_cache_free: Wrong slab cache. %s but object"
|
|
|
- " is from %s\n", page->slab_cache->name, s->name);
|
|
|
- WARN_ON_ONCE(1);
|
|
|
+ s = cache_from_obj(s, x);
|
|
|
+ if (!s)
|
|
|
return;
|
|
|
- }
|
|
|
-
|
|
|
- slab_free(s, page, x, _RET_IP_);
|
|
|
-
|
|
|
+ slab_free(s, virt_to_head_page(x), x, _RET_IP_);
|
|
|
trace_kmem_cache_free(_RET_IP_, x);
|
|
|
}
|
|
|
EXPORT_SYMBOL(kmem_cache_free);
|
|
@@ -3154,8 +3153,19 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
|
|
|
{
|
|
|
int rc = kmem_cache_close(s);
|
|
|
|
|
|
- if (!rc)
|
|
|
+ if (!rc) {
|
|
|
+ /*
|
|
|
+ * We do the same lock strategy around sysfs_slab_add, see
|
|
|
+ * __kmem_cache_create. Because this is pretty much the last
|
|
|
+ * operation we do and the lock will be released shortly after
|
|
|
+ * that in slab_common.c, we could just move sysfs_slab_remove
|
|
|
+ * to a later point in common code. We should do that when we
|
|
|
+ * have a common sysfs framework for all allocators.
|
|
|
+ */
|
|
|
+ mutex_unlock(&slab_mutex);
|
|
|
sysfs_slab_remove(s);
|
|
|
+ mutex_lock(&slab_mutex);
|
|
|
+ }
|
|
|
|
|
|
return rc;
|
|
|
}
|
|
@@ -3292,7 +3302,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
|
|
|
struct page *page;
|
|
|
void *ptr = NULL;
|
|
|
|
|
|
- flags |= __GFP_COMP | __GFP_NOTRACK;
|
|
|
+ flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
|
|
|
page = alloc_pages_node(node, flags, get_order(size));
|
|
|
if (page)
|
|
|
ptr = page_address(page);
|
|
@@ -3398,7 +3408,7 @@ void kfree(const void *x)
|
|
|
if (unlikely(!PageSlab(page))) {
|
|
|
BUG_ON(!PageCompound(page));
|
|
|
kmemleak_free(x);
|
|
|
- __free_pages(page, compound_order(page));
|
|
|
+ __free_memcg_kmem_pages(page, compound_order(page));
|
|
|
return;
|
|
|
}
|
|
|
slab_free(page->slab_cache, page, object, _RET_IP_);
|
|
@@ -3786,7 +3796,7 @@ static int slab_unmergeable(struct kmem_cache *s)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static struct kmem_cache *find_mergeable(size_t size,
|
|
|
+static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size,
|
|
|
size_t align, unsigned long flags, const char *name,
|
|
|
void (*ctor)(void *))
|
|
|
{
|
|
@@ -3822,17 +3832,21 @@ static struct kmem_cache *find_mergeable(size_t size,
|
|
|
if (s->size - size >= sizeof(void *))
|
|
|
continue;
|
|
|
|
|
|
+ if (!cache_match_memcg(s, memcg))
|
|
|
+ continue;
|
|
|
+
|
|
|
return s;
|
|
|
}
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
-struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
|
|
|
- size_t align, unsigned long flags, void (*ctor)(void *))
|
|
|
+struct kmem_cache *
|
|
|
+__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
|
|
|
+ size_t align, unsigned long flags, void (*ctor)(void *))
|
|
|
{
|
|
|
struct kmem_cache *s;
|
|
|
|
|
|
- s = find_mergeable(size, align, flags, name, ctor);
|
|
|
+ s = find_mergeable(memcg, size, align, flags, name, ctor);
|
|
|
if (s) {
|
|
|
s->refcount++;
|
|
|
/*
|
|
@@ -3863,6 +3877,7 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
|
|
|
if (slab_state <= UP)
|
|
|
return 0;
|
|
|
|
|
|
+ memcg_propagate_slab_attrs(s);
|
|
|
mutex_unlock(&slab_mutex);
|
|
|
err = sysfs_slab_add(s);
|
|
|
mutex_lock(&slab_mutex);
|
|
@@ -5096,10 +5111,95 @@ static ssize_t slab_attr_store(struct kobject *kobj,
|
|
|
return -EIO;
|
|
|
|
|
|
err = attribute->store(s, buf, len);
|
|
|
+#ifdef CONFIG_MEMCG_KMEM
|
|
|
+ if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
|
|
|
+ int i;
|
|
|
|
|
|
+ mutex_lock(&slab_mutex);
|
|
|
+ if (s->max_attr_size < len)
|
|
|
+ s->max_attr_size = len;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * This is a best effort propagation, so this function's return
|
|
|
+ * value will be determined by the parent cache only. This is
|
|
|
+ * basically because not all attributes will have a well
|
|
|
+ * defined semantics for rollbacks - most of the actions will
|
|
|
+ * have permanent effects.
|
|
|
+ *
|
|
|
+ * Returning the error value of any of the children that fail
|
|
|
+ * is not 100 % defined, in the sense that users seeing the
|
|
|
+ * error code won't be able to know anything about the state of
|
|
|
+ * the cache.
|
|
|
+ *
|
|
|
+ * Only returning the error code for the parent cache at least
|
|
|
+ * has well defined semantics. The cache being written to
|
|
|
+ * directly either failed or succeeded, in which case we loop
|
|
|
+ * through the descendants with best-effort propagation.
|
|
|
+ */
|
|
|
+ for_each_memcg_cache_index(i) {
|
|
|
+ struct kmem_cache *c = cache_from_memcg(s, i);
|
|
|
+ if (c)
|
|
|
+ attribute->store(c, buf, len);
|
|
|
+ }
|
|
|
+ mutex_unlock(&slab_mutex);
|
|
|
+ }
|
|
|
+#endif
|
|
|
return err;
|
|
|
}
|
|
|
|
|
|
+static void memcg_propagate_slab_attrs(struct kmem_cache *s)
|
|
|
+{
|
|
|
+#ifdef CONFIG_MEMCG_KMEM
|
|
|
+ int i;
|
|
|
+ char *buffer = NULL;
|
|
|
+
|
|
|
+ if (!is_root_cache(s))
|
|
|
+ return;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * This mean this cache had no attribute written. Therefore, no point
|
|
|
+ * in copying default values around
|
|
|
+ */
|
|
|
+ if (!s->max_attr_size)
|
|
|
+ return;
|
|
|
+
|
|
|
+ for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
|
|
|
+ char mbuf[64];
|
|
|
+ char *buf;
|
|
|
+ struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
|
|
|
+
|
|
|
+ if (!attr || !attr->store || !attr->show)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * It is really bad that we have to allocate here, so we will
|
|
|
+ * do it only as a fallback. If we actually allocate, though,
|
|
|
+ * we can just use the allocated buffer until the end.
|
|
|
+ *
|
|
|
+ * Most of the slub attributes will tend to be very small in
|
|
|
+ * size, but sysfs allows buffers up to a page, so they can
|
|
|
+ * theoretically happen.
|
|
|
+ */
|
|
|
+ if (buffer)
|
|
|
+ buf = buffer;
|
|
|
+ else if (s->max_attr_size < ARRAY_SIZE(mbuf))
|
|
|
+ buf = mbuf;
|
|
|
+ else {
|
|
|
+ buffer = (char *) get_zeroed_page(GFP_KERNEL);
|
|
|
+ if (WARN_ON(!buffer))
|
|
|
+ continue;
|
|
|
+ buf = buffer;
|
|
|
+ }
|
|
|
+
|
|
|
+ attr->show(s->memcg_params->root_cache, buf);
|
|
|
+ attr->store(s, buf, strlen(buf));
|
|
|
+ }
|
|
|
+
|
|
|
+ if (buffer)
|
|
|
+ free_page((unsigned long)buffer);
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
static const struct sysfs_ops slab_sysfs_ops = {
|
|
|
.show = slab_attr_show,
|
|
|
.store = slab_attr_store,
|
|
@@ -5156,6 +5256,12 @@ static char *create_unique_id(struct kmem_cache *s)
|
|
|
if (p != name + 1)
|
|
|
*p++ = '-';
|
|
|
p += sprintf(p, "%07d", s->size);
|
|
|
+
|
|
|
+#ifdef CONFIG_MEMCG_KMEM
|
|
|
+ if (!is_root_cache(s))
|
|
|
+ p += sprintf(p, "-%08d", memcg_cache_id(s->memcg_params->memcg));
|
|
|
+#endif
|
|
|
+
|
|
|
BUG_ON(p > name + ID_STR_LENGTH - 1);
|
|
|
return name;
|
|
|
}
|