|
@@ -124,6 +124,8 @@
|
|
|
|
|
|
#include <trace/events/kmem.h>
|
|
|
|
|
|
+#include "internal.h"
|
|
|
+
|
|
|
/*
|
|
|
* DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
|
|
|
* 0 for faster, smaller code (especially in the critical paths).
|
|
@@ -152,6 +154,12 @@
|
|
|
#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
|
|
|
#endif
|
|
|
|
|
|
+/*
|
|
|
+ * true if a page was allocated from pfmemalloc reserves for network-based
|
|
|
+ * swap
|
|
|
+ */
|
|
|
+static bool pfmemalloc_active __read_mostly;
|
|
|
+
|
|
|
/* Legal flag mask for kmem_cache_create(). */
|
|
|
#if DEBUG
|
|
|
# define CREATE_MASK (SLAB_RED_ZONE | \
|
|
@@ -257,9 +265,30 @@ struct array_cache {
|
|
|
* Must have this definition in here for the proper
|
|
|
* alignment of array_cache. Also simplifies accessing
|
|
|
* the entries.
|
|
|
+ *
|
|
|
+ * Entries should not be directly dereferenced as
|
|
|
+ * entries belonging to slabs marked pfmemalloc will
|
|
|
+ * have the lower bits set SLAB_OBJ_PFMEMALLOC
|
|
|
*/
|
|
|
};
|
|
|
|
|
|
+#define SLAB_OBJ_PFMEMALLOC 1
|
|
|
+static inline bool is_obj_pfmemalloc(void *objp)
|
|
|
+{
|
|
|
+ return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void set_obj_pfmemalloc(void **objp)
|
|
|
+{
|
|
|
+ *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC);
|
|
|
+ return;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void clear_obj_pfmemalloc(void **objp)
|
|
|
+{
|
|
|
+ *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* bootstrap: The caches do not work without cpuarrays anymore, but the
|
|
|
* cpuarrays are allocated from the generic caches...
|
|
@@ -900,6 +929,102 @@ static struct array_cache *alloc_arraycache(int node, int entries,
|
|
|
return nc;
|
|
|
}
|
|
|
|
|
|
+static inline bool is_slab_pfmemalloc(struct slab *slabp)
|
|
|
+{
|
|
|
+ struct page *page = virt_to_page(slabp->s_mem);
|
|
|
+
|
|
|
+ return PageSlabPfmemalloc(page);
|
|
|
+}
|
|
|
+
|
|
|
+/* Clears pfmemalloc_active if no slabs have pfmalloc set */
|
|
|
+static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
|
|
|
+ struct array_cache *ac)
|
|
|
+{
|
|
|
+ struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()];
|
|
|
+ struct slab *slabp;
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ if (!pfmemalloc_active)
|
|
|
+ return;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&l3->list_lock, flags);
|
|
|
+ list_for_each_entry(slabp, &l3->slabs_full, list)
|
|
|
+ if (is_slab_pfmemalloc(slabp))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ list_for_each_entry(slabp, &l3->slabs_partial, list)
|
|
|
+ if (is_slab_pfmemalloc(slabp))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ list_for_each_entry(slabp, &l3->slabs_free, list)
|
|
|
+ if (is_slab_pfmemalloc(slabp))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ pfmemalloc_active = false;
|
|
|
+out:
|
|
|
+ spin_unlock_irqrestore(&l3->list_lock, flags);
|
|
|
+}
|
|
|
+
|
|
|
+static void *ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
|
|
|
+ gfp_t flags, bool force_refill)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ void *objp = ac->entry[--ac->avail];
|
|
|
+
|
|
|
+ /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */
|
|
|
+ if (unlikely(is_obj_pfmemalloc(objp))) {
|
|
|
+ struct kmem_list3 *l3;
|
|
|
+
|
|
|
+ if (gfp_pfmemalloc_allowed(flags)) {
|
|
|
+ clear_obj_pfmemalloc(&objp);
|
|
|
+ return objp;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* The caller cannot use PFMEMALLOC objects, find another one */
|
|
|
+ for (i = 1; i < ac->avail; i++) {
|
|
|
+ /* If a !PFMEMALLOC object is found, swap them */
|
|
|
+ if (!is_obj_pfmemalloc(ac->entry[i])) {
|
|
|
+ objp = ac->entry[i];
|
|
|
+ ac->entry[i] = ac->entry[ac->avail];
|
|
|
+ ac->entry[ac->avail] = objp;
|
|
|
+ return objp;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If there are empty slabs on the slabs_free list and we are
|
|
|
+ * being forced to refill the cache, mark this one !pfmemalloc.
|
|
|
+ */
|
|
|
+ l3 = cachep->nodelists[numa_mem_id()];
|
|
|
+ if (!list_empty(&l3->slabs_free) && force_refill) {
|
|
|
+ struct slab *slabp = virt_to_slab(objp);
|
|
|
+ ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem));
|
|
|
+ clear_obj_pfmemalloc(&objp);
|
|
|
+ recheck_pfmemalloc_active(cachep, ac);
|
|
|
+ return objp;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* No !PFMEMALLOC objects available */
|
|
|
+ ac->avail++;
|
|
|
+ objp = NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ return objp;
|
|
|
+}
|
|
|
+
|
|
|
+static void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
|
|
|
+ void *objp)
|
|
|
+{
|
|
|
+ if (unlikely(pfmemalloc_active)) {
|
|
|
+ /* Some pfmemalloc slabs exist, check if this is one */
|
|
|
+ struct page *page = virt_to_page(objp);
|
|
|
+ if (PageSlabPfmemalloc(page))
|
|
|
+ set_obj_pfmemalloc(&objp);
|
|
|
+ }
|
|
|
+
|
|
|
+ ac->entry[ac->avail++] = objp;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Transfer objects in one arraycache to another.
|
|
|
* Locking must be handled by the caller.
|
|
@@ -1076,7 +1201,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
|
|
|
STATS_INC_ACOVERFLOW(cachep);
|
|
|
__drain_alien_cache(cachep, alien, nodeid);
|
|
|
}
|
|
|
- alien->entry[alien->avail++] = objp;
|
|
|
+ ac_put_obj(cachep, alien, objp);
|
|
|
spin_unlock(&alien->lock);
|
|
|
} else {
|
|
|
spin_lock(&(cachep->nodelists[nodeid])->list_lock);
|
|
@@ -1759,6 +1884,10 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
+ /* Record if ALLOC_PFMEMALLOC was set when allocating the slab */
|
|
|
+ if (unlikely(page->pfmemalloc))
|
|
|
+ pfmemalloc_active = true;
|
|
|
+
|
|
|
nr_pages = (1 << cachep->gfporder);
|
|
|
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
|
|
|
add_zone_page_state(page_zone(page),
|
|
@@ -1766,9 +1895,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
|
|
|
else
|
|
|
add_zone_page_state(page_zone(page),
|
|
|
NR_SLAB_UNRECLAIMABLE, nr_pages);
|
|
|
- for (i = 0; i < nr_pages; i++)
|
|
|
+ for (i = 0; i < nr_pages; i++) {
|
|
|
__SetPageSlab(page + i);
|
|
|
|
|
|
+ if (page->pfmemalloc)
|
|
|
+ SetPageSlabPfmemalloc(page + i);
|
|
|
+ }
|
|
|
+
|
|
|
if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
|
|
|
kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
|
|
|
|
|
@@ -1800,6 +1933,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
|
|
|
NR_SLAB_UNRECLAIMABLE, nr_freed);
|
|
|
while (i--) {
|
|
|
BUG_ON(!PageSlab(page));
|
|
|
+ __ClearPageSlabPfmemalloc(page);
|
|
|
__ClearPageSlab(page);
|
|
|
page++;
|
|
|
}
|
|
@@ -3015,16 +3149,19 @@ bad:
|
|
|
#define check_slabp(x,y) do { } while(0)
|
|
|
#endif
|
|
|
|
|
|
-static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
|
|
|
+static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
|
|
|
+ bool force_refill)
|
|
|
{
|
|
|
int batchcount;
|
|
|
struct kmem_list3 *l3;
|
|
|
struct array_cache *ac;
|
|
|
int node;
|
|
|
|
|
|
-retry:
|
|
|
check_irq_off();
|
|
|
node = numa_mem_id();
|
|
|
+ if (unlikely(force_refill))
|
|
|
+ goto force_grow;
|
|
|
+retry:
|
|
|
ac = cpu_cache_get(cachep);
|
|
|
batchcount = ac->batchcount;
|
|
|
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
|
|
@@ -3074,8 +3211,8 @@ retry:
|
|
|
STATS_INC_ACTIVE(cachep);
|
|
|
STATS_SET_HIGH(cachep);
|
|
|
|
|
|
- ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
|
|
|
- node);
|
|
|
+ ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
|
|
|
+ node));
|
|
|
}
|
|
|
check_slabp(cachep, slabp);
|
|
|
|
|
@@ -3094,18 +3231,22 @@ alloc_done:
|
|
|
|
|
|
if (unlikely(!ac->avail)) {
|
|
|
int x;
|
|
|
+force_grow:
|
|
|
x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
|
|
|
|
|
|
/* cache_grow can reenable interrupts, then ac could change. */
|
|
|
ac = cpu_cache_get(cachep);
|
|
|
- if (!x && ac->avail == 0) /* no objects in sight? abort */
|
|
|
+
|
|
|
+ /* no objects in sight? abort */
|
|
|
+ if (!x && (ac->avail == 0 || force_refill))
|
|
|
return NULL;
|
|
|
|
|
|
if (!ac->avail) /* objects refilled by interrupt? */
|
|
|
goto retry;
|
|
|
}
|
|
|
ac->touched = 1;
|
|
|
- return ac->entry[--ac->avail];
|
|
|
+
|
|
|
+ return ac_get_obj(cachep, ac, flags, force_refill);
|
|
|
}
|
|
|
|
|
|
static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
|
|
@@ -3187,23 +3328,35 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
|
|
|
{
|
|
|
void *objp;
|
|
|
struct array_cache *ac;
|
|
|
+ bool force_refill = false;
|
|
|
|
|
|
check_irq_off();
|
|
|
|
|
|
ac = cpu_cache_get(cachep);
|
|
|
if (likely(ac->avail)) {
|
|
|
- STATS_INC_ALLOCHIT(cachep);
|
|
|
ac->touched = 1;
|
|
|
- objp = ac->entry[--ac->avail];
|
|
|
- } else {
|
|
|
- STATS_INC_ALLOCMISS(cachep);
|
|
|
- objp = cache_alloc_refill(cachep, flags);
|
|
|
+ objp = ac_get_obj(cachep, ac, flags, false);
|
|
|
+
|
|
|
/*
|
|
|
- * the 'ac' may be updated by cache_alloc_refill(),
|
|
|
- * and kmemleak_erase() requires its correct value.
|
|
|
+ * Allow for the possibility all avail objects are not allowed
|
|
|
+ * by the current flags
|
|
|
*/
|
|
|
- ac = cpu_cache_get(cachep);
|
|
|
+ if (objp) {
|
|
|
+ STATS_INC_ALLOCHIT(cachep);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ force_refill = true;
|
|
|
}
|
|
|
+
|
|
|
+ STATS_INC_ALLOCMISS(cachep);
|
|
|
+ objp = cache_alloc_refill(cachep, flags, force_refill);
|
|
|
+ /*
|
|
|
+ * the 'ac' may be updated by cache_alloc_refill(),
|
|
|
+ * and kmemleak_erase() requires its correct value.
|
|
|
+ */
|
|
|
+ ac = cpu_cache_get(cachep);
|
|
|
+
|
|
|
+out:
|
|
|
/*
|
|
|
* To avoid a false negative, if an object that is in one of the
|
|
|
* per-CPU caches is leaked, we need to make sure kmemleak doesn't
|
|
@@ -3525,9 +3678,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
|
|
|
struct kmem_list3 *l3;
|
|
|
|
|
|
for (i = 0; i < nr_objects; i++) {
|
|
|
- void *objp = objpp[i];
|
|
|
+ void *objp;
|
|
|
struct slab *slabp;
|
|
|
|
|
|
+ clear_obj_pfmemalloc(&objpp[i]);
|
|
|
+ objp = objpp[i];
|
|
|
+
|
|
|
slabp = virt_to_slab(objp);
|
|
|
l3 = cachep->nodelists[node];
|
|
|
list_del(&slabp->list);
|
|
@@ -3645,7 +3801,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
|
|
|
cache_flusharray(cachep, ac);
|
|
|
}
|
|
|
|
|
|
- ac->entry[ac->avail++] = objp;
|
|
|
+ ac_put_obj(cachep, ac, objp);
|
|
|
}
|
|
|
|
|
|
/**
|