13 years ago · 6296e5d3c0
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2395,6 +2395,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
				 
			
 
				 	slram=		[HW,MTD]
			
 
				 
			
 
				+	slab_max_order=	[MM, SLAB]
			
 
				+			Determines the maximum allowed order for slabs.
			
 
				+			A high setting may cause OOMs due to memory
			
 
				+			fragmentation.  Defaults to 1 for systems with
			
 
				+			more than 32MB of RAM, 0 otherwise.
			
 
				+
			
 
				 	slub_debug[=options[,slabs]]	[MM, SLUB]
			
 
				 			Enabling slub_debug allows one to determine the
			
 
				 			culprit if slab objects become corrupted. Enabling
			
--- a/Documentation/vm/slub.txt
+++ b/Documentation/vm/slub.txt
@@ -117,7 +117,7 @@ can be influenced by kernel parameters:
 
				 
			
 
				 slub_min_objects=x		(default 4)
			
 
				 slub_min_order=x		(default 0)
			
 
				-slub_max_order=x		(default 1)
			
 
				+slub_max_order=x		(default 3 (PAGE_ALLOC_COSTLY_ORDER))
			
 
				 
			
 
				 slub_min_objects allows to specify how many objects must at least fit
			
 
				 into one slab in order for the allocation order to be acceptable.
			
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -481,11 +481,13 @@ EXPORT_SYMBOL(slab_buffer_size);
 
				 #endif
			
 
				 
			
 
				 /*
			
 
				- * Do not go above this order unless 0 objects fit into the slab.
			
 
				+ * Do not go above this order unless 0 objects fit into the slab or
			
 
				+ * overridden on the command line.
			
 
				  */
			
 
				-#define	BREAK_GFP_ORDER_HI	1
			
 
				-#define	BREAK_GFP_ORDER_LO	0
			
 
				-static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
			
 
				+#define	SLAB_MAX_ORDER_HI	1
			
 
				+#define	SLAB_MAX_ORDER_LO	0
			
 
				+static int slab_max_order = SLAB_MAX_ORDER_LO;
			
 
				+static bool slab_max_order_set __initdata;
			
 
				 
			
 
				 /*
			
 
				  * Functions for storing/retrieving the cachep and or slab from the page
			
@@ -854,6 +856,17 @@ static int __init noaliencache_setup(char *s)
 
				 }
			
 
				 __setup("noaliencache", noaliencache_setup);
			
 
				 
			
 
				+static int __init slab_max_order_setup(char *str)
			
 
				+{
			
 
				+	get_option(&str, &slab_max_order);
			
 
				+	slab_max_order = slab_max_order < 0 ? 0 :
			
 
				+				min(slab_max_order, MAX_ORDER - 1);
			
 
				+	slab_max_order_set = true;
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+__setup("slab_max_order=", slab_max_order_setup);
			
 
				+
			
 
				 #ifdef CONFIG_NUMA
			
 
				 /*
			
 
				  * Special reaping functions for NUMA systems called from cache_reap().
			
@@ -1502,10 +1515,11 @@ void __init kmem_cache_init(void)
 
				 
			
 
				 	/*
			
 
				 	 * Fragmentation resistance on low memory - only use bigger
			
 
				-	 * page orders on machines with more than 32MB of memory.
			
 
				+	 * page orders on machines with more than 32MB of memory if
			
 
				+	 * not overridden on the command line.
			
 
				 	 */
			
 
				-	if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
			
 
				-		slab_break_gfp_order = BREAK_GFP_ORDER_HI;
			
 
				+	if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
			
 
				+		slab_max_order = SLAB_MAX_ORDER_HI;
			
 
				 
			
 
				 	/* Bootstrap is tricky, because several objects are allocated
			
 
				 	 * from caches that do not exist yet:
			
@@ -1932,8 +1946,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
 
				 			/* Print header */
			
 
				 			if (lines == 0) {
			
 
				 				printk(KERN_ERR
			
 
				-					"Slab corruption: %s start=%p, len=%d\n",
			
 
				-					cachep->name, realobj, size);
			
 
				+					"Slab corruption (%s): %s start=%p, len=%d\n",
			
 
				+					print_tainted(), cachep->name, realobj, size);
			
 
				 				print_objinfo(cachep, objp, 0);
			
 
				 			}
			
 
				 			/* Hexdump the affected line */
			
@@ -2117,7 +2131,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
 
				 		 * Large number of objects is good, but very large slabs are
			
 
				 		 * currently bad for the gfp()s.
			
 
				 		 */
			
 
				-		if (gfporder >= slab_break_gfp_order)
			
 
				+		if (gfporder >= slab_max_order)
			
 
				 			break;
			
 
				 
			
 
				 		/*
			
@@ -3042,8 +3056,9 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
 
				 	if (entries != cachep->num - slabp->inuse) {
			
 
				 bad:
			
 
				 		printk(KERN_ERR "slab: Internal list corruption detected in "
			
 
				-				"cache '%s'(%d), slabp %p(%d). Hexdump:\n",
			
 
				-			cachep->name, cachep->num, slabp, slabp->inuse);
			
 
				+			"cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
			
 
				+			cachep->name, cachep->num, slabp, slabp->inuse,
			
 
				+			print_tainted());
			
 
				 		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
			
 
				 			sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
			
 
				 			1);
			
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -570,7 +570,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
 
				 	va_end(args);
			
 
				 	printk(KERN_ERR "========================================"
			
 
				 			"=====================================\n");
			
 
				-	printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
			
 
				+	printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
			
 
				 	printk(KERN_ERR "----------------------------------------"
			
 
				 			"-------------------------------------\n\n");
			
 
				 }
			
@@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s)
 
				 			}
			
 
				 
			
 
				 			if (l != m) {
			
 
				-				if (l == M_PARTIAL)
			
 
				+				if (l == M_PARTIAL) {
			
 
				 					remove_partial(n, page);
			
 
				-				else
			
 
				+					stat(s, FREE_REMOVE_PARTIAL);
			
 
				+				} else {
			
 
				 					add_partial(n, page,
			
 
				 						DEACTIVATE_TO_TAIL);
			
 
				+					stat(s, FREE_ADD_PARTIAL);
			
 
				+				}
			
 
				 
			
 
				 				l = m;
			
 
				 			}
			
@@ -2123,6 +2126,37 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
 
				 	return object;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
			
 
				+ * or deactivate the page.
			
 
				+ *
			
 
				+ * The page is still frozen if the return value is not NULL.
			
 
				+ *
			
 
				+ * If this function returns NULL then the page has been unfrozen.
			
 
				+ */
			
 
				+static inline void *get_freelist(struct kmem_cache *s, struct page *page)
			
 
				+{
			
 
				+	struct page new;
			
 
				+	unsigned long counters;
			
 
				+	void *freelist;
			
 
				+
			
 
				+	do {
			
 
				+		freelist = page->freelist;
			
 
				+		counters = page->counters;
			
 
				+		new.counters = counters;
			
 
				+		VM_BUG_ON(!new.frozen);
			
 
				+
			
 
				+		new.inuse = page->objects;
			
 
				+		new.frozen = freelist != NULL;
			
 
				+
			
 
				+	} while (!cmpxchg_double_slab(s, page,
			
 
				+		freelist, counters,
			
 
				+		NULL, new.counters,
			
 
				+		"get_freelist"));
			
 
				+
			
 
				+	return freelist;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Slow path. The lockless freelist is empty or we need to perform
			
 
				  * debugging duties.
			
@@ -2144,8 +2178,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 
				 {
			
 
				 	void **object;
			
 
				 	unsigned long flags;
			
 
				-	struct page new;
			
 
				-	unsigned long counters;
			
 
				 
			
 
				 	local_irq_save(flags);
			
 
				 #ifdef CONFIG_PREEMPT
			
@@ -2166,31 +2198,14 @@ redo:
 
				 		goto new_slab;
			
 
				 	}
			
 
				 
			
 
				-	stat(s, ALLOC_SLOWPATH);
			
 
				-
			
 
				-	do {
			
 
				-		object = c->page->freelist;
			
 
				-		counters = c->page->counters;
			
 
				-		new.counters = counters;
			
 
				-		VM_BUG_ON(!new.frozen);
			
 
				-
			
 
				-		/*
			
 
				-		 * If there is no object left then we use this loop to
			
 
				-		 * deactivate the slab which is simple since no objects
			
 
				-		 * are left in the slab and therefore we do not need to
			
 
				-		 * put the page back onto the partial list.
			
 
				-		 *
			
 
				-		 * If there are objects left then we retrieve them
			
 
				-		 * and use them to refill the per cpu queue.
			
 
				-		 */
			
 
				+	/* must check again c->freelist in case of cpu migration or IRQ */
			
 
				+	object = c->freelist;
			
 
				+	if (object)
			
 
				+		goto load_freelist;
			
 
				 
			
 
				-		new.inuse = c->page->objects;
			
 
				-		new.frozen = object != NULL;
			
 
				+	stat(s, ALLOC_SLOWPATH);
			
 
				 
			
 
				-	} while (!__cmpxchg_double_slab(s, c->page,
			
 
				-			object, counters,
			
 
				-			NULL, new.counters,
			
 
				-			"__slab_alloc"));
			
 
				+	object = get_freelist(s, c->page);
			
 
				 
			
 
				 	if (!object) {
			
 
				 		c->page = NULL;
			
@@ -3028,7 +3043,9 @@ static int kmem_cache_open(struct kmem_cache *s,
 
				 	 *    per node list when we run out of per cpu objects. We only fetch 50%
			
 
				 	 *    to keep some capacity around for frees.
			
 
				 	 */
			
 
				-	if (s->size >= PAGE_SIZE)
			
 
				+	if (kmem_cache_debug(s))
			
 
				+		s->cpu_partial = 0;
			
 
				+	else if (s->size >= PAGE_SIZE)
			
 
				 		s->cpu_partial = 2;
			
 
				 	else if (s->size >= 1024)
			
 
				 		s->cpu_partial = 6;
			
@@ -4637,6 +4654,8 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
 
				 	err = strict_strtoul(buf, 10, &objects);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				+	if (objects && kmem_cache_debug(s))
			
 
				+		return -EINVAL;
			
 
				 
			
 
				 	s->cpu_partial = objects;
			
 
				 	flush_all(s);