|
@@ -115,6 +115,7 @@
|
|
#include <linux/reciprocal_div.h>
|
|
#include <linux/reciprocal_div.h>
|
|
#include <linux/debugobjects.h>
|
|
#include <linux/debugobjects.h>
|
|
#include <linux/kmemcheck.h>
|
|
#include <linux/kmemcheck.h>
|
|
|
|
+#include <linux/memory.h>
|
|
|
|
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/tlbflush.h>
|
|
@@ -144,30 +145,6 @@
|
|
#define BYTES_PER_WORD sizeof(void *)
|
|
#define BYTES_PER_WORD sizeof(void *)
|
|
#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
|
|
#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
|
|
|
|
|
|
-#ifndef ARCH_KMALLOC_MINALIGN
|
|
|
|
-/*
|
|
|
|
- * Enforce a minimum alignment for the kmalloc caches.
|
|
|
|
- * Usually, the kmalloc caches are cache_line_size() aligned, except when
|
|
|
|
- * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
|
|
|
|
- * Some archs want to perform DMA into kmalloc caches and need a guaranteed
|
|
|
|
- * alignment larger than the alignment of a 64-bit integer.
|
|
|
|
- * ARCH_KMALLOC_MINALIGN allows that.
|
|
|
|
- * Note that increasing this value may disable some debug features.
|
|
|
|
- */
|
|
|
|
-#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
-#ifndef ARCH_SLAB_MINALIGN
|
|
|
|
-/*
|
|
|
|
- * Enforce a minimum alignment for all caches.
|
|
|
|
- * Intended for archs that get misalignment faults even for BYTES_PER_WORD
|
|
|
|
- * aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
|
|
|
|
- * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
|
|
|
|
- * some debug features.
|
|
|
|
- */
|
|
|
|
-#define ARCH_SLAB_MINALIGN 0
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
#ifndef ARCH_KMALLOC_FLAGS
|
|
#ifndef ARCH_KMALLOC_FLAGS
|
|
#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
|
|
#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
|
|
#endif
|
|
#endif
|
|
@@ -1102,6 +1079,52 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Allocates and initializes nodelists for a node on each slab cache, used for
|
|
|
|
+ * either memory or cpu hotplug. If memory is being hot-added, the kmem_list3
|
|
|
|
+ * will be allocated off-node since memory is not yet online for the new node.
|
|
|
|
+ * When hotplugging memory or a cpu, existing nodelists are not replaced if
|
|
|
|
+ * already in use.
|
|
|
|
+ *
|
|
|
|
+ * Must hold cache_chain_mutex.
|
|
|
|
+ */
|
|
|
|
+static int init_cache_nodelists_node(int node)
|
|
|
|
+{
|
|
|
|
+ struct kmem_cache *cachep;
|
|
|
|
+ struct kmem_list3 *l3;
|
|
|
|
+ const int memsize = sizeof(struct kmem_list3);
|
|
|
|
+
|
|
|
|
+ list_for_each_entry(cachep, &cache_chain, next) {
|
|
|
|
+ /*
|
|
|
|
+ * Set up the size64 kmemlist for cpu before we can
|
|
|
|
+ * begin anything. Make sure some other cpu on this
|
|
|
|
+ * node has not already allocated this
|
|
|
|
+ */
|
|
|
|
+ if (!cachep->nodelists[node]) {
|
|
|
|
+ l3 = kmalloc_node(memsize, GFP_KERNEL, node);
|
|
|
|
+ if (!l3)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ kmem_list3_init(l3);
|
|
|
|
+ l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
|
|
|
|
+ ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * The l3s don't come and go as CPUs come and
|
|
|
|
+ * go. cache_chain_mutex is sufficient
|
|
|
|
+ * protection here.
|
|
|
|
+ */
|
|
|
|
+ cachep->nodelists[node] = l3;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ spin_lock_irq(&cachep->nodelists[node]->list_lock);
|
|
|
|
+ cachep->nodelists[node]->free_limit =
|
|
|
|
+ (1 + nr_cpus_node(node)) *
|
|
|
|
+ cachep->batchcount + cachep->num;
|
|
|
|
+ spin_unlock_irq(&cachep->nodelists[node]->list_lock);
|
|
|
|
+ }
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
static void __cpuinit cpuup_canceled(long cpu)
|
|
static void __cpuinit cpuup_canceled(long cpu)
|
|
{
|
|
{
|
|
struct kmem_cache *cachep;
|
|
struct kmem_cache *cachep;
|
|
@@ -1172,7 +1195,7 @@ static int __cpuinit cpuup_prepare(long cpu)
|
|
struct kmem_cache *cachep;
|
|
struct kmem_cache *cachep;
|
|
struct kmem_list3 *l3 = NULL;
|
|
struct kmem_list3 *l3 = NULL;
|
|
int node = cpu_to_node(cpu);
|
|
int node = cpu_to_node(cpu);
|
|
- const int memsize = sizeof(struct kmem_list3);
|
|
|
|
|
|
+ int err;
|
|
|
|
|
|
/*
|
|
/*
|
|
* We need to do this right in the beginning since
|
|
* We need to do this right in the beginning since
|
|
@@ -1180,35 +1203,9 @@ static int __cpuinit cpuup_prepare(long cpu)
|
|
* kmalloc_node allows us to add the slab to the right
|
|
* kmalloc_node allows us to add the slab to the right
|
|
* kmem_list3 and not this cpu's kmem_list3
|
|
* kmem_list3 and not this cpu's kmem_list3
|
|
*/
|
|
*/
|
|
-
|
|
|
|
- list_for_each_entry(cachep, &cache_chain, next) {
|
|
|
|
- /*
|
|
|
|
- * Set up the size64 kmemlist for cpu before we can
|
|
|
|
- * begin anything. Make sure some other cpu on this
|
|
|
|
- * node has not already allocated this
|
|
|
|
- */
|
|
|
|
- if (!cachep->nodelists[node]) {
|
|
|
|
- l3 = kmalloc_node(memsize, GFP_KERNEL, node);
|
|
|
|
- if (!l3)
|
|
|
|
- goto bad;
|
|
|
|
- kmem_list3_init(l3);
|
|
|
|
- l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
|
|
|
|
- ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * The l3s don't come and go as CPUs come and
|
|
|
|
- * go. cache_chain_mutex is sufficient
|
|
|
|
- * protection here.
|
|
|
|
- */
|
|
|
|
- cachep->nodelists[node] = l3;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- spin_lock_irq(&cachep->nodelists[node]->list_lock);
|
|
|
|
- cachep->nodelists[node]->free_limit =
|
|
|
|
- (1 + nr_cpus_node(node)) *
|
|
|
|
- cachep->batchcount + cachep->num;
|
|
|
|
- spin_unlock_irq(&cachep->nodelists[node]->list_lock);
|
|
|
|
- }
|
|
|
|
|
|
+ err = init_cache_nodelists_node(node);
|
|
|
|
+ if (err < 0)
|
|
|
|
+ goto bad;
|
|
|
|
|
|
/*
|
|
/*
|
|
* Now we can go ahead with allocating the shared arrays and
|
|
* Now we can go ahead with allocating the shared arrays and
|
|
@@ -1331,11 +1328,75 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
|
|
&cpuup_callback, NULL, 0
|
|
&cpuup_callback, NULL, 0
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
|
|
|
|
+/*
|
|
|
|
+ * Drains freelist for a node on each slab cache, used for memory hot-remove.
|
|
|
|
+ * Returns -EBUSY if all objects cannot be drained so that the node is not
|
|
|
|
+ * removed.
|
|
|
|
+ *
|
|
|
|
+ * Must hold cache_chain_mutex.
|
|
|
|
+ */
|
|
|
|
+static int __meminit drain_cache_nodelists_node(int node)
|
|
|
|
+{
|
|
|
|
+ struct kmem_cache *cachep;
|
|
|
|
+ int ret = 0;
|
|
|
|
+
|
|
|
|
+ list_for_each_entry(cachep, &cache_chain, next) {
|
|
|
|
+ struct kmem_list3 *l3;
|
|
|
|
+
|
|
|
|
+ l3 = cachep->nodelists[node];
|
|
|
|
+ if (!l3)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ drain_freelist(cachep, l3, l3->free_objects);
|
|
|
|
+
|
|
|
|
+ if (!list_empty(&l3->slabs_full) ||
|
|
|
|
+ !list_empty(&l3->slabs_partial)) {
|
|
|
|
+ ret = -EBUSY;
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int __meminit slab_memory_callback(struct notifier_block *self,
|
|
|
|
+ unsigned long action, void *arg)
|
|
|
|
+{
|
|
|
|
+ struct memory_notify *mnb = arg;
|
|
|
|
+ int ret = 0;
|
|
|
|
+ int nid;
|
|
|
|
+
|
|
|
|
+ nid = mnb->status_change_nid;
|
|
|
|
+ if (nid < 0)
|
|
|
|
+ goto out;
|
|
|
|
+
|
|
|
|
+ switch (action) {
|
|
|
|
+ case MEM_GOING_ONLINE:
|
|
|
|
+ mutex_lock(&cache_chain_mutex);
|
|
|
|
+ ret = init_cache_nodelists_node(nid);
|
|
|
|
+ mutex_unlock(&cache_chain_mutex);
|
|
|
|
+ break;
|
|
|
|
+ case MEM_GOING_OFFLINE:
|
|
|
|
+ mutex_lock(&cache_chain_mutex);
|
|
|
|
+ ret = drain_cache_nodelists_node(nid);
|
|
|
|
+ mutex_unlock(&cache_chain_mutex);
|
|
|
|
+ break;
|
|
|
|
+ case MEM_ONLINE:
|
|
|
|
+ case MEM_OFFLINE:
|
|
|
|
+ case MEM_CANCEL_ONLINE:
|
|
|
|
+ case MEM_CANCEL_OFFLINE:
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+out:
|
|
|
|
+ return ret ? notifier_from_errno(ret) : NOTIFY_OK;
|
|
|
|
+}
|
|
|
|
+#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* swap the static kmem_list3 with kmalloced memory
|
|
* swap the static kmem_list3 with kmalloced memory
|
|
*/
|
|
*/
|
|
-static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
|
|
|
|
- int nodeid)
|
|
|
|
|
|
+static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
|
|
|
|
+ int nodeid)
|
|
{
|
|
{
|
|
struct kmem_list3 *ptr;
|
|
struct kmem_list3 *ptr;
|
|
|
|
|
|
@@ -1580,6 +1641,14 @@ void __init kmem_cache_init_late(void)
|
|
*/
|
|
*/
|
|
register_cpu_notifier(&cpucache_notifier);
|
|
register_cpu_notifier(&cpucache_notifier);
|
|
|
|
|
|
|
|
+#ifdef CONFIG_NUMA
|
|
|
|
+ /*
|
|
|
|
+ * Register a memory hotplug callback that initializes and frees
|
|
|
|
+ * nodelists.
|
|
|
|
+ */
|
|
|
|
+ hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
|
|
|
|
+#endif
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* The reap timers are started later, with a module init call: That part
|
|
* The reap timers are started later, with a module init call: That part
|
|
* of the kernel is not yet operational.
|
|
* of the kernel is not yet operational.
|
|
@@ -2220,8 +2289,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
|
|
if (ralign < align) {
|
|
if (ralign < align) {
|
|
ralign = align;
|
|
ralign = align;
|
|
}
|
|
}
|
|
- /* disable debug if necessary */
|
|
|
|
- if (ralign > __alignof__(unsigned long long))
|
|
|
|
|
|
+ /* disable debug if not aligning with REDZONE_ALIGN */
|
|
|
|
+ if (ralign & (__alignof__(unsigned long long) - 1))
|
|
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
|
|
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
|
|
/*
|
|
/*
|
|
* 4) Store it.
|
|
* 4) Store it.
|
|
@@ -2247,8 +2316,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
|
|
*/
|
|
*/
|
|
if (flags & SLAB_RED_ZONE) {
|
|
if (flags & SLAB_RED_ZONE) {
|
|
/* add space for red zone words */
|
|
/* add space for red zone words */
|
|
- cachep->obj_offset += sizeof(unsigned long long);
|
|
|
|
- size += 2 * sizeof(unsigned long long);
|
|
|
|
|
|
+ cachep->obj_offset += align;
|
|
|
|
+ size += align + sizeof(unsigned long long);
|
|
}
|
|
}
|
|
if (flags & SLAB_STORE_USER) {
|
|
if (flags & SLAB_STORE_USER) {
|
|
/* user store requires one word storage behind the end of
|
|
/* user store requires one word storage behind the end of
|
|
@@ -4216,10 +4285,11 @@ static int s_show(struct seq_file *m, void *p)
|
|
unsigned long node_frees = cachep->node_frees;
|
|
unsigned long node_frees = cachep->node_frees;
|
|
unsigned long overflows = cachep->node_overflow;
|
|
unsigned long overflows = cachep->node_overflow;
|
|
|
|
|
|
- seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
|
|
|
|
- %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
|
|
|
|
- reaped, errors, max_freeable, node_allocs,
|
|
|
|
- node_frees, overflows);
|
|
|
|
|
|
+ seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
|
|
|
|
+ "%4lu %4lu %4lu %4lu %4lu",
|
|
|
|
+ allocs, high, grown,
|
|
|
|
+ reaped, errors, max_freeable, node_allocs,
|
|
|
|
+ node_frees, overflows);
|
|
}
|
|
}
|
|
/* cpu stats */
|
|
/* cpu stats */
|
|
{
|
|
{
|