Browse Source

mm: convert mm->cpu_vm_cpumask into cpumask_var_t

cpumask_t is very big struct and cpu_vm_mask is placed wrong position.
It might lead to reduce cache hit ratio.

This patch has two change.
1) Move the place of cpumask into last of mm_struct. Because usually cpumask
   is accessed only front bits when the system has cpu-hotplug capability
2) Convert cpu_vm_mask into cpumask_var_t. It may help to reduce memory
   footprint if cpumask_size() will use nr_cpumask_bits properly in future.

In addition, this patch change the name of cpu_vm_mask with cpu_vm_mask_var.
It may help to detect out of tree cpu_vm_mask users.

This patch has no functional change.

[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
KOSAKI Motohiro 14 years ago
parent
commit
de03c72cfc
7 changed files with 44 additions and 9 deletions
  1. 1 1
      Documentation/cachetlb.txt
  2. 0 1
      arch/x86/kernel/tboot.c
  3. 6 3
      include/linux/mm_types.h
  4. 1 0
      include/linux/sched.h
  5. 2 0
      init/main.c
  6. 34 3
      kernel/fork.c
  7. 0 1
      mm/init-mm.c

+ 1 - 1
Documentation/cachetlb.txt

@@ -16,7 +16,7 @@ on all processors in the system.  Don't let this scare you into
 thinking SMP cache/tlb flushing must be so inefficient, this is in
 thinking SMP cache/tlb flushing must be so inefficient, this is in
 fact an area where many optimizations are possible.  For example,
 fact an area where many optimizations are possible.  For example,
 if it can be proven that a user address space has never executed
 if it can be proven that a user address space has never executed
-on a cpu (see vma->cpu_vm_mask), one need not perform a flush
+on a cpu (see mm_cpumask()), one need not perform a flush
 for this address space on that cpu.
 for this address space on that cpu.
 
 
 First, the TLB flushing interfaces, since they are the simplest.  The
 First, the TLB flushing interfaces, since they are the simplest.  The

+ 0 - 1
arch/x86/kernel/tboot.c

@@ -110,7 +110,6 @@ static struct mm_struct tboot_mm = {
 	.mmap_sem       = __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.mmap_sem       = __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
 	.mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
-	.cpu_vm_mask    = CPU_MASK_ALL,
 };
 };
 
 
 static inline void switch_to_tboot_pt(void)
 static inline void switch_to_tboot_pt(void)

+ 6 - 3
include/linux/mm_types.h

@@ -265,8 +265,6 @@ struct mm_struct {
 
 
 	struct linux_binfmt *binfmt;
 	struct linux_binfmt *binfmt;
 
 
-	cpumask_t cpu_vm_mask;
-
 	/* Architecture-specific MM context */
 	/* Architecture-specific MM context */
 	mm_context_t context;
 	mm_context_t context;
 
 
@@ -316,9 +314,14 @@ struct mm_struct {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
 #endif
+
+	cpumask_var_t cpu_vm_mask_var;
 };
 };
 
 
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
-#define mm_cpumask(mm) (&(mm)->cpu_vm_mask)
+static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
+{
+	return mm->cpu_vm_mask_var;
+}
 
 
 #endif /* _LINUX_MM_TYPES_H */
 #endif /* _LINUX_MM_TYPES_H */

+ 1 - 0
include/linux/sched.h

@@ -2176,6 +2176,7 @@ static inline void mmdrop(struct mm_struct * mm)
 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
 		__mmdrop(mm);
 		__mmdrop(mm);
 }
 }
+extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm);
 
 
 /* mmput gets rid of the mappings and all user-space */
 /* mmput gets rid of the mappings and all user-space */
 extern void mmput(struct mm_struct *);
 extern void mmput(struct mm_struct *);

+ 2 - 0
init/main.c

@@ -509,6 +509,8 @@ asmlinkage void __init start_kernel(void)
 	sort_main_extable();
 	sort_main_extable();
 	trap_init();
 	trap_init();
 	mm_init();
 	mm_init();
+	BUG_ON(mm_init_cpumask(&init_mm, 0));
+
 	/*
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
 	 * timer interrupt). Full topology setup happens at smp_init()

+ 34 - 3
kernel/fork.c

@@ -485,6 +485,20 @@ static void mm_init_aio(struct mm_struct *mm)
 #endif
 #endif
 }
 }
 
 
+int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
+		return -ENOMEM;
+
+	if (oldmm)
+		cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
+	else
+		memset(mm_cpumask(mm), 0, cpumask_size());
+#endif
+	return 0;
+}
+
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
 {
 	atomic_set(&mm->mm_users, 1);
 	atomic_set(&mm->mm_users, 1);
@@ -521,10 +535,20 @@ struct mm_struct * mm_alloc(void)
 	struct mm_struct * mm;
 	struct mm_struct * mm;
 
 
 	mm = allocate_mm();
 	mm = allocate_mm();
-	if (mm) {
-		memset(mm, 0, sizeof(*mm));
-		mm = mm_init(mm, current);
+	if (!mm)
+		return NULL;
+
+	memset(mm, 0, sizeof(*mm));
+	mm = mm_init(mm, current);
+	if (!mm)
+		return NULL;
+
+	if (mm_init_cpumask(mm, NULL)) {
+		mm_free_pgd(mm);
+		free_mm(mm);
+		return NULL;
 	}
 	}
+
 	return mm;
 	return mm;
 }
 }
 
 
@@ -536,6 +560,7 @@ struct mm_struct * mm_alloc(void)
 void __mmdrop(struct mm_struct *mm)
 void __mmdrop(struct mm_struct *mm)
 {
 {
 	BUG_ON(mm == &init_mm);
 	BUG_ON(mm == &init_mm);
+	free_cpumask_var(mm->cpu_vm_mask_var);
 	mm_free_pgd(mm);
 	mm_free_pgd(mm);
 	destroy_context(mm);
 	destroy_context(mm);
 	mmu_notifier_mm_destroy(mm);
 	mmu_notifier_mm_destroy(mm);
@@ -690,6 +715,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 	if (!mm_init(mm, tsk))
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 		goto fail_nomem;
 
 
+	if (mm_init_cpumask(mm, oldmm))
+		goto fail_nocpumask;
+
 	if (init_new_context(tsk, mm))
 	if (init_new_context(tsk, mm))
 		goto fail_nocontext;
 		goto fail_nocontext;
 
 
@@ -716,6 +744,9 @@ fail_nomem:
 	return NULL;
 	return NULL;
 
 
 fail_nocontext:
 fail_nocontext:
+	free_cpumask_var(mm->cpu_vm_mask_var);
+
+fail_nocpumask:
 	/*
 	/*
 	 * If init_new_context() failed, we cannot use mmput() to free the mm
 	 * If init_new_context() failed, we cannot use mmput() to free the mm
 	 * because it calls destroy_context()
 	 * because it calls destroy_context()

+ 0 - 1
mm/init-mm.c

@@ -21,6 +21,5 @@ struct mm_struct init_mm = {
 	.mmap_sem	= __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.mmap_sem	= __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
-	.cpu_vm_mask	= CPU_MASK_ALL,
 	INIT_MM_CONTEXT(init_mm)
 	INIT_MM_CONTEXT(init_mm)
 };
 };