浏览代码

Merge branch 'master'

Jeff Garzik 19 年之前
父节点
当前提交
81cfb8864c
共有 100 个文件被更改,包括 1113 次插入1899 次删除
  1. 0 9
      Documentation/cachetlb.txt
  2. 0 2
      Documentation/kernel-parameters.txt
  3. 1 23
      Documentation/m68k/kernel-options.txt
  4. 3 0
      arch/alpha/mm/numa.c
  5. 1 5
      arch/alpha/mm/remap.c
  6. 18 78
      arch/arm/kernel/signal.c
  7. 9 5
      arch/arm/kernel/traps.c
  8. 1 5
      arch/arm/mm/consistent.c
  9. 6 1
      arch/arm/mm/fault-armv.c
  10. 1 3
      arch/arm/mm/ioremap.c
  11. 1 14
      arch/arm/mm/mm-armv.c
  12. 9 37
      arch/arm/oprofile/backtrace.c
  13. 2 16
      arch/arm26/mm/memc.c
  14. 4 2
      arch/cris/arch-v32/mm/tlb.c
  15. 1 3
      arch/cris/mm/ioremap.c
  16. 1 4
      arch/frv/mm/dma-alloc.c
  17. 2 2
      arch/frv/mm/pgalloc.c
  18. 7 10
      arch/i386/kernel/vm86.c
  19. 2 2
      arch/i386/mm/discontig.c
  20. 57 5
      arch/i386/mm/init.c
  21. 1 3
      arch/i386/mm/ioremap.c
  22. 7 4
      arch/i386/mm/pgtable.c
  23. 13 25
      arch/i386/oprofile/backtrace.c
  24. 2 1
      arch/ia64/kernel/perfmon.c
  25. 6 1
      arch/ia64/mm/discontig.c
  26. 7 27
      arch/ia64/mm/fault.c
  27. 4 9
      arch/ia64/mm/init.c
  28. 2 0
      arch/ia64/mm/tlb.c
  29. 8 1
      arch/m32r/mm/init.c
  30. 1 3
      arch/m32r/mm/ioremap.c
  31. 1 23
      arch/m68k/Kconfig
  32. 15 901
      arch/m68k/atari/stram.c
  33. 1 1
      arch/m68k/mm/kmap.c
  34. 1 1
      arch/m68k/sun3x/dvma.c
  35. 0 1
      arch/mips/kernel/irixelf.c
  36. 1 3
      arch/mips/mm/ioremap.c
  37. 9 15
      arch/parisc/kernel/cache.c
  38. 1 1
      arch/parisc/kernel/pci-dma.c
  39. 3 0
      arch/parisc/mm/init.c
  40. 2 4
      arch/parisc/mm/ioremap.c
  41. 1 5
      arch/ppc/kernel/dma-mapping.c
  42. 0 4
      arch/ppc/mm/4xx_mmu.c
  43. 1 3
      arch/ppc/mm/pgtable.c
  44. 7 5
      arch/ppc64/kernel/vdso.c
  45. 0 5
      arch/ppc64/mm/imalloc.c
  46. 84 3
      arch/ppc64/mm/init.c
  47. 1 3
      arch/s390/mm/ioremap.c
  48. 23 17
      arch/sh/mm/fault.c
  49. 0 2
      arch/sh/mm/hugetlbpage.c
  50. 1 3
      arch/sh/mm/ioremap.c
  51. 30 38
      arch/sh64/mm/cache.c
  52. 12 176
      arch/sh64/mm/hugetlbpage.c
  53. 1 3
      arch/sh64/mm/ioremap.c
  54. 4 3
      arch/sparc/mm/generic.c
  55. 0 1
      arch/sparc64/kernel/binfmt_aout32.c
  56. 5 4
      arch/sparc64/mm/generic.c
  57. 3 4
      arch/sparc64/mm/tlb.c
  58. 0 1
      arch/um/include/tlb.h
  59. 5 3
      arch/um/kernel/process_kern.c
  60. 1 3
      arch/um/kernel/skas/mmu.c
  61. 0 36
      arch/um/kernel/tt/tlb.c
  62. 0 1
      arch/x86_64/ia32/ia32_aout.c
  63. 1 3
      arch/x86_64/mm/ioremap.c
  64. 4 1
      crypto/api.c
  65. 5 14
      crypto/hmac.c
  66. 17 39
      crypto/tcrypt.c
  67. 2 3
      drivers/acpi/acpi_memhotplug.c
  68. 1 0
      drivers/base/Makefile
  69. 2 0
      drivers/base/init.c
  70. 452 0
      drivers/base/memory.c
  71. 4 8
      drivers/md/dm-crypt.c
  72. 3 4
      drivers/net/wireless/airo.c
  73. 3 3
      drivers/scsi/arm/scsi.h
  74. 2 8
      drivers/scsi/libata-core.c
  75. 10 7
      drivers/scsi/sg.c
  76. 7 3
      drivers/scsi/st.c
  77. 2 5
      drivers/usb/misc/usbtest.c
  78. 2 2
      fs/afs/file.c
  79. 0 1
      fs/binfmt_aout.c
  80. 0 1
      fs/binfmt_elf.c
  81. 0 7
      fs/binfmt_elf_fdpic.c
  82. 0 1
      fs/binfmt_flat.c
  83. 0 1
      fs/binfmt_som.c
  84. 1 1
      fs/buffer.c
  85. 0 1
      fs/compat.c
  86. 3 1
      fs/direct-io.c
  87. 6 11
      fs/exec.c
  88. 120 86
      fs/hugetlbfs/inode.c
  89. 6 6
      fs/jfs/jfs_metapage.c
  90. 1 1
      fs/proc/array.c
  91. 31 20
      fs/proc/task_mmu.c
  92. 4 3
      fs/xfs/linux-2.6/xfs_buf.c
  93. 2 0
      include/asm-alpha/barrier.h
  94. 5 0
      include/asm-alpha/rwsem.h
  95. 3 20
      include/asm-arm/tlb.h
  96. 16 31
      include/asm-arm26/tlb.h
  97. 3 8
      include/asm-generic/4level-fixup.h
  98. 1 1
      include/asm-generic/pgtable.h
  99. 4 19
      include/asm-generic/tlb.h
  100. 0 6
      include/asm-i386/mmzone.h

+ 0 - 9
Documentation/cachetlb.txt

@@ -49,9 +49,6 @@ changes occur:
 	page table operations such as what happens during
 	page table operations such as what happens during
 	fork, and exec.
 	fork, and exec.
 
 
-	Platform developers note that generic code will always
-	invoke this interface without mm->page_table_lock held.
-
 3) void flush_tlb_range(struct vm_area_struct *vma,
 3) void flush_tlb_range(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end)
 			unsigned long start, unsigned long end)
 
 
@@ -72,9 +69,6 @@ changes occur:
 	call flush_tlb_page (see below) for each entry which may be
 	call flush_tlb_page (see below) for each entry which may be
 	modified.
 	modified.
 
 
-	Platform developers note that generic code will always
-	invoke this interface with mm->page_table_lock held.
-
 4) void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 4) void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 
 
 	This time we need to remove the PAGE_SIZE sized translation
 	This time we need to remove the PAGE_SIZE sized translation
@@ -93,9 +87,6 @@ changes occur:
 
 
 	This is used primarily during fault processing.
 	This is used primarily during fault processing.
 
 
-	Platform developers note that generic code will always
-	invoke this interface with mm->page_table_lock held.
-
 5) void flush_tlb_pgtables(struct mm_struct *mm,
 5) void flush_tlb_pgtables(struct mm_struct *mm,
 			   unsigned long start, unsigned long end)
 			   unsigned long start, unsigned long end)
 
 

+ 0 - 2
Documentation/kernel-parameters.txt

@@ -1460,8 +1460,6 @@ running once the system is up.
 	stifb=		[HW]
 	stifb=		[HW]
 			Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
 			Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
 
 
-	stram_swap=	[HW,M68k]
-
 	swiotlb=	[IA-64] Number of I/O TLB slabs
 	swiotlb=	[IA-64] Number of I/O TLB slabs
 
 
 	switches=	[HW,M68k]
 	switches=	[HW,M68k]

+ 1 - 23
Documentation/m68k/kernel-options.txt

@@ -626,7 +626,7 @@ ignored (others aren't affected).
     can be performed in optimal order. Not all SCSI devices support
     can be performed in optimal order. Not all SCSI devices support
     tagged queuing (:-().
     tagged queuing (:-().
 
 
-4.6 switches=
+4.5 switches=
 -------------
 -------------
 
 
 Syntax: switches=<list of switches>
 Syntax: switches=<list of switches>
@@ -661,28 +661,6 @@ correctly.
 earlier initialization ("ov_"-less) takes precedence. But the
 earlier initialization ("ov_"-less) takes precedence. But the
 switching-off on reset still happens in this case.
 switching-off on reset still happens in this case.
 
 
-4.5) stram_swap=
-----------------
-
-Syntax: stram_swap=<do_swap>[,<max_swap>]
-
-  This option is available only if the kernel has been compiled with
-CONFIG_STRAM_SWAP enabled. Normally, the kernel then determines
-dynamically whether to actually use ST-RAM as swap space. (Currently,
-the fraction of ST-RAM must be less or equal 1/3 of total memory to
-enable this swapping.) You can override the kernel's decision by
-specifying this option. 1 for <do_swap> means always enable the swap,
-even if you have less alternate RAM. 0 stands for never swap to
-ST-RAM, even if it's small enough compared to the rest of memory.
-
-  If ST-RAM swapping is enabled, the kernel usually uses all free
-ST-RAM as swap "device". If the kernel resides in ST-RAM, the region
-allocated by it is obviously never used for swapping :-) You can also
-limit this amount by specifying the second parameter, <max_swap>, if
-you want to use parts of ST-RAM as normal system memory. <max_swap> is
-in kBytes and the number should be a multiple of 4 (otherwise: rounded
-down).
-
 5) Options for Amiga Only:
 5) Options for Amiga Only:
 ==========================
 ==========================
 
 

+ 3 - 0
arch/alpha/mm/numa.c

@@ -371,6 +371,8 @@ show_mem(void)
 	show_free_areas();
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_online_node(nid) {
 	for_each_online_node(nid) {
+		unsigned long flags;
+		pgdat_resize_lock(NODE_DATA(nid), &flags);
 		i = node_spanned_pages(nid);
 		i = node_spanned_pages(nid);
 		while (i-- > 0) {
 		while (i-- > 0) {
 			struct page *page = nid_page_nr(nid, i);
 			struct page *page = nid_page_nr(nid, i);
@@ -384,6 +386,7 @@ show_mem(void)
 			else
 			else
 				shared += page_count(page) - 1;
 				shared += page_count(page) - 1;
 		}
 		}
+		pgdat_resize_unlock(NODE_DATA(nid), &flags);
 	}
 	}
 	printk("%ld pages of RAM\n",total);
 	printk("%ld pages of RAM\n",total);
 	printk("%ld free pages\n",free);
 	printk("%ld free pages\n",free);

+ 1 - 5
arch/alpha/mm/remap.c

@@ -2,7 +2,6 @@
 #include <asm/pgalloc.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
 
 
-/* called with the page_table_lock held */
 static inline void 
 static inline void 
 remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, 
 remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, 
 	       unsigned long phys_addr, unsigned long flags)
 	       unsigned long phys_addr, unsigned long flags)
@@ -31,7 +30,6 @@ remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
 	} while (address && (address < end));
 	} while (address && (address < end));
 }
 }
 
 
-/* called with the page_table_lock held */
 static inline int 
 static inline int 
 remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, 
 remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, 
 	       unsigned long phys_addr, unsigned long flags)
 	       unsigned long phys_addr, unsigned long flags)
@@ -46,7 +44,7 @@ remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
 	do {
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 		if (!pte)
 			return -ENOMEM;
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, 
 		remap_area_pte(pte, address, end - address, 
@@ -70,7 +68,6 @@ __alpha_remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	flush_cache_all();
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 	do {
 		pmd_t *pmd;
 		pmd_t *pmd;
 		pmd = pmd_alloc(&init_mm, dir, address);
 		pmd = pmd_alloc(&init_mm, dir, address);
@@ -84,7 +81,6 @@ __alpha_remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 		dir++;
 	} while (address && (address < end));
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	return error;
 	return error;
 }
 }
 
 

+ 18 - 78
arch/arm/kernel/signal.c

@@ -139,93 +139,33 @@ struct iwmmxt_sigframe {
 	unsigned long	storage[0x98/4];
 	unsigned long	storage[0x98/4];
 };
 };
 
 
-static int page_present(struct mm_struct *mm, void __user *uptr, int wr)
-{
-	unsigned long addr = (unsigned long)uptr;
-	pgd_t *pgd = pgd_offset(mm, addr);
-	if (pgd_present(*pgd)) {
-		pmd_t *pmd = pmd_offset(pgd, addr);
-		if (pmd_present(*pmd)) {
-			pte_t *pte = pte_offset_map(pmd, addr);
-			return (pte_present(*pte) && (!wr || pte_write(*pte)));
-		}
-	}
-	return 0;
-}
-
-static int copy_locked(void __user *uptr, void *kptr, size_t size, int write,
-		       void (*copyfn)(void *, void __user *))
-{
-	unsigned char v, __user *userptr = uptr;
-	int err = 0;
-
-	do {
-		struct mm_struct *mm;
-
-		if (write) {
-			__put_user_error(0, userptr, err);
-			__put_user_error(0, userptr + size - 1, err);
-		} else {
-			__get_user_error(v, userptr, err);
-			__get_user_error(v, userptr + size - 1, err);
-		}
-
-		if (err)
-			break;
-
-		mm = current->mm;
-		spin_lock(&mm->page_table_lock);
-		if (page_present(mm, userptr, write) &&
-		    page_present(mm, userptr + size - 1, write)) {
-		    	copyfn(kptr, uptr);
-		} else
-			err = 1;
-		spin_unlock(&mm->page_table_lock);
-	} while (err);
-
-	return err;
-}
-
 static int preserve_iwmmxt_context(struct iwmmxt_sigframe *frame)
 static int preserve_iwmmxt_context(struct iwmmxt_sigframe *frame)
 {
 {
-	int err = 0;
+	char kbuf[sizeof(*frame) + 8];
+	struct iwmmxt_sigframe *kframe;
 
 
 	/* the iWMMXt context must be 64 bit aligned */
 	/* the iWMMXt context must be 64 bit aligned */
-	WARN_ON((unsigned long)frame & 7);
-
-	__put_user_error(IWMMXT_MAGIC0, &frame->magic0, err);
-	__put_user_error(IWMMXT_MAGIC1, &frame->magic1, err);
-
-	/*
-	 * iwmmxt_task_copy() doesn't check user permissions.
-	 * Let's do a dummy write on the upper boundary to ensure
-	 * access to user mem is OK all way up.
-	 */
-	err |= copy_locked(&frame->storage, current_thread_info(),
-			   sizeof(frame->storage), 1, iwmmxt_task_copy);
-	return err;
+	kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7);
+	kframe->magic0 = IWMMXT_MAGIC0;
+	kframe->magic1 = IWMMXT_MAGIC1;
+	iwmmxt_task_copy(current_thread_info(), &kframe->storage);
+	return __copy_to_user(frame, kframe, sizeof(*frame));
 }
 }
 
 
 static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame)
 static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame)
 {
 {
-	unsigned long magic0, magic1;
-	int err = 0;
+	char kbuf[sizeof(*frame) + 8];
+	struct iwmmxt_sigframe *kframe;
 
 
-	/* the iWMMXt context is 64 bit aligned */
-	WARN_ON((unsigned long)frame & 7);
-
-	/*
-	 * Validate iWMMXt context signature.
-	 * Also, iwmmxt_task_restore() doesn't check user permissions.
-	 * Let's do a dummy write on the upper boundary to ensure
-	 * access to user mem is OK all way up.
-	 */
-	__get_user_error(magic0, &frame->magic0, err);
-	__get_user_error(magic1, &frame->magic1, err);
-	if (!err && magic0 == IWMMXT_MAGIC0 && magic1 == IWMMXT_MAGIC1)
-		err = copy_locked(&frame->storage, current_thread_info(),
-				  sizeof(frame->storage), 0, iwmmxt_task_restore);
-	return err;
+	/* the iWMMXt context must be 64 bit aligned */
+	kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7);
+	if (__copy_from_user(kframe, frame, sizeof(*frame)))
+		return -1;
+	if (kframe->magic0 != IWMMXT_MAGIC0 ||
+	    kframe->magic1 != IWMMXT_MAGIC1)
+		return -1;
+	iwmmxt_task_restore(current_thread_info(), &kframe->storage);
+	return 0;
 }
 }
 
 
 #endif
 #endif

+ 9 - 5
arch/arm/kernel/traps.c

@@ -483,29 +483,33 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		unsigned long addr = regs->ARM_r2;
 		unsigned long addr = regs->ARM_r2;
 		struct mm_struct *mm = current->mm;
 		struct mm_struct *mm = current->mm;
 		pgd_t *pgd; pmd_t *pmd; pte_t *pte;
 		pgd_t *pgd; pmd_t *pmd; pte_t *pte;
+		spinlock_t *ptl;
 
 
 		regs->ARM_cpsr &= ~PSR_C_BIT;
 		regs->ARM_cpsr &= ~PSR_C_BIT;
-		spin_lock(&mm->page_table_lock);
+		down_read(&mm->mmap_sem);
 		pgd = pgd_offset(mm, addr);
 		pgd = pgd_offset(mm, addr);
 		if (!pgd_present(*pgd))
 		if (!pgd_present(*pgd))
 			goto bad_access;
 			goto bad_access;
 		pmd = pmd_offset(pgd, addr);
 		pmd = pmd_offset(pgd, addr);
 		if (!pmd_present(*pmd))
 		if (!pmd_present(*pmd))
 			goto bad_access;
 			goto bad_access;
-		pte = pte_offset_map(pmd, addr);
-		if (!pte_present(*pte) || !pte_write(*pte))
+		pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+		if (!pte_present(*pte) || !pte_write(*pte)) {
+			pte_unmap_unlock(pte, ptl);
 			goto bad_access;
 			goto bad_access;
+		}
 		val = *(unsigned long *)addr;
 		val = *(unsigned long *)addr;
 		val -= regs->ARM_r0;
 		val -= regs->ARM_r0;
 		if (val == 0) {
 		if (val == 0) {
 			*(unsigned long *)addr = regs->ARM_r1;
 			*(unsigned long *)addr = regs->ARM_r1;
 			regs->ARM_cpsr |= PSR_C_BIT;
 			regs->ARM_cpsr |= PSR_C_BIT;
 		}
 		}
-		spin_unlock(&mm->page_table_lock);
+		pte_unmap_unlock(pte, ptl);
+		up_read(&mm->mmap_sem);
 		return val;
 		return val;
 
 
 		bad_access:
 		bad_access:
-		spin_unlock(&mm->page_table_lock);
+		up_read(&mm->mmap_sem);
 		/* simulate a write access fault */
 		/* simulate a write access fault */
 		do_DataAbort(addr, 15 + (1 << 11), regs);
 		do_DataAbort(addr, 15 + (1 << 11), regs);
 		return -1;
 		return -1;

+ 1 - 5
arch/arm/mm/consistent.c

@@ -397,8 +397,6 @@ static int __init consistent_init(void)
 	pte_t *pte;
 	pte_t *pte;
 	int ret = 0;
 	int ret = 0;
 
 
-	spin_lock(&init_mm.page_table_lock);
-
 	do {
 	do {
 		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
 		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
 		pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
 		pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
@@ -409,7 +407,7 @@ static int __init consistent_init(void)
 		}
 		}
 		WARN_ON(!pmd_none(*pmd));
 		WARN_ON(!pmd_none(*pmd));
 
 
-		pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE);
+		pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
 		if (!pte) {
 		if (!pte) {
 			printk(KERN_ERR "%s: no pte tables\n", __func__);
 			printk(KERN_ERR "%s: no pte tables\n", __func__);
 			ret = -ENOMEM;
 			ret = -ENOMEM;
@@ -419,8 +417,6 @@ static int __init consistent_init(void)
 		consistent_pte = pte;
 		consistent_pte = pte;
 	} while (0);
 	} while (0);
 
 
-	spin_unlock(&init_mm.page_table_lock);
-
 	return ret;
 	return ret;
 }
 }
 
 

+ 6 - 1
arch/arm/mm/fault-armv.c

@@ -26,6 +26,11 @@ static unsigned long shared_pte_mask = L_PTE_CACHEABLE;
 /*
 /*
  * We take the easy way out of this problem - we make the
  * We take the easy way out of this problem - we make the
  * PTE uncacheable.  However, we leave the write buffer on.
  * PTE uncacheable.  However, we leave the write buffer on.
+ *
+ * Note that the pte lock held when calling update_mmu_cache must also
+ * guard the pte (somewhere else in the same mm) that we modify here.
+ * Therefore those configurations which might call adjust_pte (those
+ * without CONFIG_CPU_CACHE_VIPT) cannot support split page_table_lock.
  */
  */
 static int adjust_pte(struct vm_area_struct *vma, unsigned long address)
 static int adjust_pte(struct vm_area_struct *vma, unsigned long address)
 {
 {
@@ -127,7 +132,7 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page);
  *  2. If we have multiple shared mappings of the same space in
  *  2. If we have multiple shared mappings of the same space in
  *     an object, we need to deal with the cache aliasing issues.
  *     an object, we need to deal with the cache aliasing issues.
  *
  *
- * Note that the page_table_lock will be held.
+ * Note that the pte lock will be held.
  */
  */
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
 {
 {

+ 1 - 3
arch/arm/mm/ioremap.c

@@ -75,7 +75,7 @@ remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
 
 
 	pgprot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | flags);
 	pgprot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | flags);
 	do {
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 		if (!pte)
 			return -ENOMEM;
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, pgprot);
 		remap_area_pte(pte, address, end - address, address + phys_addr, pgprot);
@@ -97,7 +97,6 @@ remap_area_pages(unsigned long start, unsigned long phys_addr,
 	phys_addr -= address;
 	phys_addr -= address;
 	dir = pgd_offset(&init_mm, address);
 	dir = pgd_offset(&init_mm, address);
 	BUG_ON(address >= end);
 	BUG_ON(address >= end);
-	spin_lock(&init_mm.page_table_lock);
 	do {
 	do {
 		pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
 		pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
 		if (!pmd) {
 		if (!pmd) {
@@ -114,7 +113,6 @@ remap_area_pages(unsigned long start, unsigned long phys_addr,
 		dir++;
 		dir++;
 	} while (address && (address < end));
 	} while (address && (address < end));
 
 
-	spin_unlock(&init_mm.page_table_lock);
 	flush_cache_vmap(start, end);
 	flush_cache_vmap(start, end);
 	return err;
 	return err;
 }
 }

+ 1 - 14
arch/arm/mm/mm-armv.c

@@ -179,11 +179,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
 	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
 
 
 	if (!vectors_high()) {
 	if (!vectors_high()) {
-		/*
-		 * This lock is here just to satisfy pmd_alloc and pte_lock
-		 */
-		spin_lock(&mm->page_table_lock);
-
 		/*
 		/*
 		 * On ARM, first page must always be allocated since it
 		 * On ARM, first page must always be allocated since it
 		 * contains the machine vectors.
 		 * contains the machine vectors.
@@ -201,23 +196,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 		set_pte(new_pte, *init_pte);
 		set_pte(new_pte, *init_pte);
 		pte_unmap_nested(init_pte);
 		pte_unmap_nested(init_pte);
 		pte_unmap(new_pte);
 		pte_unmap(new_pte);
-
-		spin_unlock(&mm->page_table_lock);
 	}
 	}
 
 
 	return new_pgd;
 	return new_pgd;
 
 
 no_pte:
 no_pte:
-	spin_unlock(&mm->page_table_lock);
 	pmd_free(new_pmd);
 	pmd_free(new_pmd);
-	free_pages((unsigned long)new_pgd, 2);
-	return NULL;
-
 no_pmd:
 no_pmd:
-	spin_unlock(&mm->page_table_lock);
 	free_pages((unsigned long)new_pgd, 2);
 	free_pages((unsigned long)new_pgd, 2);
-	return NULL;
-
 no_pgd:
 no_pgd:
 	return NULL;
 	return NULL;
 }
 }
@@ -243,6 +229,7 @@ void free_pgd_slow(pgd_t *pgd)
 	pte = pmd_page(*pmd);
 	pte = pmd_page(*pmd);
 	pmd_clear(pmd);
 	pmd_clear(pmd);
 	dec_page_state(nr_page_table_pages);
 	dec_page_state(nr_page_table_pages);
+	pte_lock_deinit(pte);
 	pte_free(pte);
 	pte_free(pte);
 	pmd_free(pmd);
 	pmd_free(pmd);
 free:
 free:

+ 9 - 37
arch/arm/oprofile/backtrace.c

@@ -49,42 +49,22 @@ static struct frame_tail* kernel_backtrace(struct frame_tail *tail)
 
 
 static struct frame_tail* user_backtrace(struct frame_tail *tail)
 static struct frame_tail* user_backtrace(struct frame_tail *tail)
 {
 {
-	struct frame_tail buftail;
+	struct frame_tail buftail[2];
 
 
-	/* hardware pte might not be valid due to dirty/accessed bit emulation
-	 * so we use copy_from_user and benefit from exception fixups */
-	if (copy_from_user(&buftail, tail, sizeof(struct frame_tail)))
+	/* Also check accessibility of one struct frame_tail beyond */
+	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+		return NULL;
+	if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail)))
 		return NULL;
 		return NULL;
 
 
-	oprofile_add_trace(buftail.lr);
+	oprofile_add_trace(buftail[0].lr);
 
 
 	/* frame pointers should strictly progress back up the stack
 	/* frame pointers should strictly progress back up the stack
 	 * (towards higher addresses) */
 	 * (towards higher addresses) */
-	if (tail >= buftail.fp)
+	if (tail >= buftail[0].fp)
 		return NULL;
 		return NULL;
 
 
-	return buftail.fp-1;
-}
-
-/* Compare two addresses and see if they're on the same page */
-#define CMP_ADDR_EQUAL(x,y,offset) ((((unsigned long) x) >> PAGE_SHIFT) \
-	== ((((unsigned long) y) + offset) >> PAGE_SHIFT))
-
-/* check that the page(s) containing the frame tail are present */
-static int pages_present(struct frame_tail *tail)
-{
-	struct mm_struct * mm = current->mm;
-
-	if (!check_user_page_readable(mm, (unsigned long)tail))
-		return 0;
-
-	if (CMP_ADDR_EQUAL(tail, tail, 8))
-		return 1;
-
-	if (!check_user_page_readable(mm, ((unsigned long)tail) + 8))
-		return 0;
-
-	return 1;
+	return buftail[0].fp-1;
 }
 }
 
 
 /*
 /*
@@ -118,7 +98,6 @@ static int valid_kernel_stack(struct frame_tail *tail, struct pt_regs *regs)
 void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
 void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
 {
 	struct frame_tail *tail;
 	struct frame_tail *tail;
-	unsigned long last_address = 0;
 
 
 	tail = ((struct frame_tail *) regs->ARM_fp) - 1;
 	tail = ((struct frame_tail *) regs->ARM_fp) - 1;
 
 
@@ -132,13 +111,6 @@ void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
 		return;
 		return;
 	}
 	}
 
 
-	while (depth-- && tail && !((unsigned long) tail & 3)) {
-		if ((!CMP_ADDR_EQUAL(last_address, tail, 0)
-			|| !CMP_ADDR_EQUAL(last_address, tail, 8))
-				&& !pages_present(tail))
-			return;
-		last_address = (unsigned long) tail;
+	while (depth-- && tail && !((unsigned long) tail & 3))
 		tail = user_backtrace(tail);
 		tail = user_backtrace(tail);
-	}
 }
 }
-

+ 2 - 16
arch/arm26/mm/memc.c

@@ -78,12 +78,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	if (!new_pgd)
 	if (!new_pgd)
 		goto no_pgd;
 		goto no_pgd;
 
 
-	/*
-	 * This lock is here just to satisfy pmd_alloc and pte_lock
-         * FIXME: I bet we could avoid taking it pretty much altogether
-	 */
-	spin_lock(&mm->page_table_lock);
-
 	/*
 	/*
 	 * On ARM, first page must always be allocated since it contains
 	 * On ARM, first page must always be allocated since it contains
 	 * the machine vectors.
 	 * the machine vectors.
@@ -92,7 +86,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	if (!new_pmd)
 	if (!new_pmd)
 		goto no_pmd;
 		goto no_pmd;
 
 
-	new_pte = pte_alloc_kernel(mm, new_pmd, 0);
+	new_pte = pte_alloc_map(mm, new_pmd, 0);
 	if (!new_pte)
 	if (!new_pte)
 		goto no_pte;
 		goto no_pte;
 
 
@@ -101,6 +95,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	init_pte = pte_offset(init_pmd, 0);
 	init_pte = pte_offset(init_pmd, 0);
 
 
 	set_pte(new_pte, *init_pte);
 	set_pte(new_pte, *init_pte);
+	pte_unmap(new_pte);
 
 
 	/*
 	/*
 	 * the page table entries are zeroed
 	 * the page table entries are zeroed
@@ -112,23 +107,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
 	memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
 		(PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
 		(PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
 
 
-	spin_unlock(&mm->page_table_lock);
-
 	/* update MEMC tables */
 	/* update MEMC tables */
 	cpu_memc_update_all(new_pgd);
 	cpu_memc_update_all(new_pgd);
 	return new_pgd;
 	return new_pgd;
 
 
 no_pte:
 no_pte:
-	spin_unlock(&mm->page_table_lock);
 	pmd_free(new_pmd);
 	pmd_free(new_pmd);
-	free_pgd_slow(new_pgd);
-	return NULL;
-
 no_pmd:
 no_pmd:
-	spin_unlock(&mm->page_table_lock);
 	free_pgd_slow(new_pgd);
 	free_pgd_slow(new_pgd);
-	return NULL;
-
 no_pgd:
 no_pgd:
 	return NULL;
 	return NULL;
 }
 }

+ 4 - 2
arch/cris/arch-v32/mm/tlb.c

@@ -175,6 +175,8 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	return 0;
 	return 0;
 }
 }
 
 
+static DEFINE_SPINLOCK(mmu_context_lock);
+
 /* Called in schedule() just before actually doing the switch_to. */
 /* Called in schedule() just before actually doing the switch_to. */
 void
 void
 switch_mm(struct mm_struct *prev, struct mm_struct *next,
 switch_mm(struct mm_struct *prev, struct mm_struct *next,
@@ -183,10 +185,10 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	int cpu = smp_processor_id();
 	int cpu = smp_processor_id();
 
 
 	/* Make sure there is a MMU context. */
 	/* Make sure there is a MMU context. */
-	spin_lock(&next->page_table_lock);
+	spin_lock(&mmu_context_lock);
 	get_mmu_context(next);
 	get_mmu_context(next);
 	cpu_set(cpu, next->cpu_vm_mask);
 	cpu_set(cpu, next->cpu_vm_mask);
-	spin_unlock(&next->page_table_lock);
+	spin_unlock(&mmu_context_lock);
 
 
 	/*
 	/*
 	 * Remember the pgd for the fault handlers. Keep a seperate copy of it
 	 * Remember the pgd for the fault handlers. Keep a seperate copy of it

+ 1 - 3
arch/cris/mm/ioremap.c

@@ -52,7 +52,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
 	do {
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 		if (!pte)
 			return -ENOMEM;
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, prot);
 		remap_area_pte(pte, address, end - address, address + phys_addr, prot);
@@ -74,7 +74,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	flush_cache_all();
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 	do {
 		pud_t *pud;
 		pud_t *pud;
 		pmd_t *pmd;
 		pmd_t *pmd;
@@ -94,7 +93,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 		dir++;
 	} while (address && (address < end));
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	flush_tlb_all();
 	return error;
 	return error;
 }
 }

+ 1 - 4
arch/frv/mm/dma-alloc.c

@@ -55,21 +55,18 @@ static int map_page(unsigned long va, unsigned long pa, pgprot_t prot)
 	pte_t *pte;
 	pte_t *pte;
 	int err = -ENOMEM;
 	int err = -ENOMEM;
 
 
-	spin_lock(&init_mm.page_table_lock);
-
 	/* Use upper 10 bits of VA to index the first level map */
 	/* Use upper 10 bits of VA to index the first level map */
 	pge = pgd_offset_k(va);
 	pge = pgd_offset_k(va);
 	pue = pud_offset(pge, va);
 	pue = pud_offset(pge, va);
 	pme = pmd_offset(pue, va);
 	pme = pmd_offset(pue, va);
 
 
 	/* Use middle 10 bits of VA to index the second-level map */
 	/* Use middle 10 bits of VA to index the second-level map */
-	pte = pte_alloc_kernel(&init_mm, pme, va);
+	pte = pte_alloc_kernel(pme, va);
 	if (pte != 0) {
 	if (pte != 0) {
 		err = 0;
 		err = 0;
 		set_pte(pte, mk_pte_phys(pa & PAGE_MASK, prot));
 		set_pte(pte, mk_pte_phys(pa & PAGE_MASK, prot));
 	}
 	}
 
 
-	spin_unlock(&init_mm.page_table_lock);
 	return err;
 	return err;
 }
 }
 
 

+ 2 - 2
arch/frv/mm/pgalloc.c

@@ -87,14 +87,14 @@ static inline void pgd_list_add(pgd_t *pgd)
 	if (pgd_list)
 	if (pgd_list)
 		pgd_list->private = (unsigned long) &page->index;
 		pgd_list->private = (unsigned long) &page->index;
 	pgd_list = page;
 	pgd_list = page;
-	page->private = (unsigned long) &pgd_list;
+	set_page_private(page, (unsigned long)&pgd_list);
 }
 }
 
 
 static inline void pgd_list_del(pgd_t *pgd)
 static inline void pgd_list_del(pgd_t *pgd)
 {
 {
 	struct page *next, **pprev, *page = virt_to_page(pgd);
 	struct page *next, **pprev, *page = virt_to_page(pgd);
 	next = (struct page *) page->index;
 	next = (struct page *) page->index;
-	pprev = (struct page **) page->private;
+	pprev = (struct page **)page_private(page);
 	*pprev = next;
 	*pprev = next;
 	if (next)
 	if (next)
 		next->private = (unsigned long) pprev;
 		next->private = (unsigned long) pprev;

+ 7 - 10
arch/i386/kernel/vm86.c

@@ -134,17 +134,16 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
 	return ret;
 	return ret;
 }
 }
 
 
-static void mark_screen_rdonly(struct task_struct * tsk)
+static void mark_screen_rdonly(struct mm_struct *mm)
 {
 {
 	pgd_t *pgd;
 	pgd_t *pgd;
 	pud_t *pud;
 	pud_t *pud;
 	pmd_t *pmd;
 	pmd_t *pmd;
-	pte_t *pte, *mapped;
+	pte_t *pte;
+	spinlock_t *ptl;
 	int i;
 	int i;
 
 
-	preempt_disable();
-	spin_lock(&tsk->mm->page_table_lock);
-	pgd = pgd_offset(tsk->mm, 0xA0000);
+	pgd = pgd_offset(mm, 0xA0000);
 	if (pgd_none_or_clear_bad(pgd))
 	if (pgd_none_or_clear_bad(pgd))
 		goto out;
 		goto out;
 	pud = pud_offset(pgd, 0xA0000);
 	pud = pud_offset(pgd, 0xA0000);
@@ -153,16 +152,14 @@ static void mark_screen_rdonly(struct task_struct * tsk)
 	pmd = pmd_offset(pud, 0xA0000);
 	pmd = pmd_offset(pud, 0xA0000);
 	if (pmd_none_or_clear_bad(pmd))
 	if (pmd_none_or_clear_bad(pmd))
 		goto out;
 		goto out;
-	pte = mapped = pte_offset_map(pmd, 0xA0000);
+	pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
 	for (i = 0; i < 32; i++) {
 	for (i = 0; i < 32; i++) {
 		if (pte_present(*pte))
 		if (pte_present(*pte))
 			set_pte(pte, pte_wrprotect(*pte));
 			set_pte(pte, pte_wrprotect(*pte));
 		pte++;
 		pte++;
 	}
 	}
-	pte_unmap(mapped);
+	pte_unmap_unlock(pte, ptl);
 out:
 out:
-	spin_unlock(&tsk->mm->page_table_lock);
-	preempt_enable();
 	flush_tlb();
 	flush_tlb();
 }
 }
 
 
@@ -306,7 +303,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 
 
 	tsk->thread.screen_bitmap = info->screen_bitmap;
 	tsk->thread.screen_bitmap = info->screen_bitmap;
 	if (info->flags & VM86_SCREEN_BITMAP)
 	if (info->flags & VM86_SCREEN_BITMAP)
-		mark_screen_rdonly(tsk);
+		mark_screen_rdonly(tsk->mm);
 	__asm__ __volatile__(
 	__asm__ __volatile__(
 		"xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
 		"xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
 		"movl %0,%%esp\n\t"
 		"movl %0,%%esp\n\t"

+ 2 - 2
arch/i386/mm/discontig.c

@@ -98,7 +98,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
 
 
 extern unsigned long find_max_low_pfn(void);
 extern unsigned long find_max_low_pfn(void);
 extern void find_max_pfn(void);
 extern void find_max_pfn(void);
-extern void one_highpage_init(struct page *, int, int);
+extern void add_one_highpage_init(struct page *, int, int);
 
 
 extern struct e820map e820;
 extern struct e820map e820;
 extern unsigned long init_pg_tables_end;
 extern unsigned long init_pg_tables_end;
@@ -427,7 +427,7 @@ void __init set_highmem_pages_init(int bad_ppro)
 			if (!pfn_valid(node_pfn))
 			if (!pfn_valid(node_pfn))
 				continue;
 				continue;
 			page = pfn_to_page(node_pfn);
 			page = pfn_to_page(node_pfn);
-			one_highpage_init(page, node_pfn, bad_ppro);
+			add_one_highpage_init(page, node_pfn, bad_ppro);
 		}
 		}
 	}
 	}
 	totalram_pages += totalhigh_pages;
 	totalram_pages += totalhigh_pages;

+ 57 - 5
arch/i386/mm/init.c

@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/proc_fs.h>
 #include <linux/efi.h>
 #include <linux/efi.h>
+#include <linux/memory_hotplug.h>
 
 
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/system.h>
@@ -266,17 +267,46 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
 	pkmap_page_table = pte;	
 	pkmap_page_table = pte;	
 }
 }
 
 
-void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+void __devinit free_new_highpage(struct page *page)
+{
+	set_page_count(page, 1);
+	__free_page(page);
+	totalhigh_pages++;
+}
+
+void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
 {
 {
 	if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
 	if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
 		ClearPageReserved(page);
 		ClearPageReserved(page);
-		set_page_count(page, 1);
-		__free_page(page);
-		totalhigh_pages++;
+		free_new_highpage(page);
 	} else
 	} else
 		SetPageReserved(page);
 		SetPageReserved(page);
 }
 }
 
 
+static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
+{
+	free_new_highpage(page);
+	totalram_pages++;
+#ifdef CONFIG_FLATMEM
+	max_mapnr = max(pfn, max_mapnr);
+#endif
+	num_physpages++;
+	return 0;
+}
+
+/*
+ * Not currently handling the NUMA case.
+ * Assuming single node and all memory that
+ * has been added dynamically that would be
+ * onlined here is in HIGHMEM
+ */
+void online_page(struct page *page)
+{
+	ClearPageReserved(page);
+	add_one_highpage_hotplug(page, page_to_pfn(page));
+}
+
+
 #ifdef CONFIG_NUMA
 #ifdef CONFIG_NUMA
 extern void set_highmem_pages_init(int);
 extern void set_highmem_pages_init(int);
 #else
 #else
@@ -284,7 +314,7 @@ static void __init set_highmem_pages_init(int bad_ppro)
 {
 {
 	int pfn;
 	int pfn;
 	for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
 	for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
-		one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
+		add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
 	totalram_pages += totalhigh_pages;
 	totalram_pages += totalhigh_pages;
 }
 }
 #endif /* CONFIG_FLATMEM */
 #endif /* CONFIG_FLATMEM */
@@ -615,6 +645,28 @@ void __init mem_init(void)
 #endif
 #endif
 }
 }
 
 
+/*
+ * this is for the non-NUMA, single node SMP system case.
+ * Specifically, in the case of x86, we will always add
+ * memory to the highmem for now.
+ */
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+int add_memory(u64 start, u64 size)
+{
+	struct pglist_data *pgdata = &contig_page_data;
+	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+
+	return __add_pages(zone, start_pfn, nr_pages);
+}
+
+int remove_memory(u64 start, u64 size)
+{
+	return -EINVAL;
+}
+#endif
+
 kmem_cache_t *pgd_cache;
 kmem_cache_t *pgd_cache;
 kmem_cache_t *pmd_cache;
 kmem_cache_t *pmd_cache;
 
 

+ 1 - 3
arch/i386/mm/ioremap.c

@@ -28,7 +28,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
 	unsigned long pfn;
 	unsigned long pfn;
 
 
 	pfn = phys_addr >> PAGE_SHIFT;
 	pfn = phys_addr >> PAGE_SHIFT;
-	pte = pte_alloc_kernel(&init_mm, pmd, addr);
+	pte = pte_alloc_kernel(pmd, addr);
 	if (!pte)
 	if (!pte)
 		return -ENOMEM;
 		return -ENOMEM;
 	do {
 	do {
@@ -87,14 +87,12 @@ static int ioremap_page_range(unsigned long addr,
 	flush_cache_all();
 	flush_cache_all();
 	phys_addr -= addr;
 	phys_addr -= addr;
 	pgd = pgd_offset_k(addr);
 	pgd = pgd_offset_k(addr);
-	spin_lock(&init_mm.page_table_lock);
 	do {
 	do {
 		next = pgd_addr_end(addr, end);
 		next = pgd_addr_end(addr, end);
 		err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags);
 		err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags);
 		if (err)
 		if (err)
 			break;
 			break;
 	} while (pgd++, addr = next, addr != end);
 	} while (pgd++, addr = next, addr != end);
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	flush_tlb_all();
 	return err;
 	return err;
 }
 }

+ 7 - 4
arch/i386/mm/pgtable.c

@@ -31,11 +31,13 @@ void show_mem(void)
 	pg_data_t *pgdat;
 	pg_data_t *pgdat;
 	unsigned long i;
 	unsigned long i;
 	struct page_state ps;
 	struct page_state ps;
+	unsigned long flags;
 
 
 	printk(KERN_INFO "Mem-info:\n");
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
 	show_free_areas();
 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
 	for_each_pgdat(pgdat) {
+		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pgdat_page_nr(pgdat, i);
 			page = pgdat_page_nr(pgdat, i);
 			total++;
 			total++;
@@ -48,6 +50,7 @@ void show_mem(void)
 			else if (page_count(page))
 			else if (page_count(page))
 				shared += page_count(page) - 1;
 				shared += page_count(page) - 1;
 		}
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
 	}
 	printk(KERN_INFO "%d pages of RAM\n", total);
 	printk(KERN_INFO "%d pages of RAM\n", total);
 	printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
 	printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
@@ -188,19 +191,19 @@ static inline void pgd_list_add(pgd_t *pgd)
 	struct page *page = virt_to_page(pgd);
 	struct page *page = virt_to_page(pgd);
 	page->index = (unsigned long)pgd_list;
 	page->index = (unsigned long)pgd_list;
 	if (pgd_list)
 	if (pgd_list)
-		pgd_list->private = (unsigned long)&page->index;
+		set_page_private(pgd_list, (unsigned long)&page->index);
 	pgd_list = page;
 	pgd_list = page;
-	page->private = (unsigned long)&pgd_list;
+	set_page_private(page, (unsigned long)&pgd_list);
 }
 }
 
 
 static inline void pgd_list_del(pgd_t *pgd)
 static inline void pgd_list_del(pgd_t *pgd)
 {
 {
 	struct page *next, **pprev, *page = virt_to_page(pgd);
 	struct page *next, **pprev, *page = virt_to_page(pgd);
 	next = (struct page *)page->index;
 	next = (struct page *)page->index;
-	pprev = (struct page **)page->private;
+	pprev = (struct page **)page_private(page);
 	*pprev = next;
 	*pprev = next;
 	if (next)
 	if (next)
-		next->private = (unsigned long)pprev;
+		set_page_private(next, (unsigned long)pprev);
 }
 }
 
 
 void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
 void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)

+ 13 - 25
arch/i386/oprofile/backtrace.c

@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
+#include <asm/uaccess.h>
 
 
 struct frame_head {
 struct frame_head {
 	struct frame_head * ebp;
 	struct frame_head * ebp;
@@ -21,26 +22,22 @@ struct frame_head {
 static struct frame_head *
 static struct frame_head *
 dump_backtrace(struct frame_head * head)
 dump_backtrace(struct frame_head * head)
 {
 {
-	oprofile_add_trace(head->ret);
+	struct frame_head bufhead[2];
 
 
-	/* frame pointers should strictly progress back up the stack
-	 * (towards higher addresses) */
-	if (head >= head->ebp)
+	/* Also check accessibility of one struct frame_head beyond */
+	if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
+		return NULL;
+	if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
 		return NULL;
 		return NULL;
 
 
-	return head->ebp;
-}
-
-/* check that the page(s) containing the frame head are present */
-static int pages_present(struct frame_head * head)
-{
-	struct mm_struct * mm = current->mm;
+	oprofile_add_trace(bufhead[0].ret);
 
 
-	/* FIXME: only necessary once per page */
-	if (!check_user_page_readable(mm, (unsigned long)head))
-		return 0;
+	/* frame pointers should strictly progress back up the stack
+	 * (towards higher addresses) */
+	if (head >= bufhead[0].ebp)
+		return NULL;
 
 
-	return check_user_page_readable(mm, (unsigned long)(head + 1));
+	return bufhead[0].ebp;
 }
 }
 
 
 /*
 /*
@@ -97,15 +94,6 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 		return;
 		return;
 	}
 	}
 
 
-#ifdef CONFIG_SMP
-	if (!spin_trylock(&current->mm->page_table_lock))
-		return;
-#endif
-
-	while (depth-- && head && pages_present(head))
+	while (depth-- && head)
 		head = dump_backtrace(head);
 		head = dump_backtrace(head);
-
-#ifdef CONFIG_SMP
-	spin_unlock(&current->mm->page_table_lock);
-#endif
 }
 }

+ 2 - 1
arch/ia64/kernel/perfmon.c

@@ -2352,7 +2352,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
 	insert_vm_struct(mm, vma);
 	insert_vm_struct(mm, vma);
 
 
 	mm->total_vm  += size >> PAGE_SHIFT;
 	mm->total_vm  += size >> PAGE_SHIFT;
-	vm_stat_account(vma);
+	vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
+							vma_pages(vma));
 	up_write(&task->mm->mmap_sem);
 	up_write(&task->mm->mmap_sem);
 
 
 	/*
 	/*

+ 6 - 1
arch/ia64/mm/discontig.c

@@ -555,9 +555,13 @@ void show_mem(void)
 	show_free_areas();
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
 	for_each_pgdat(pgdat) {
-		unsigned long present = pgdat->node_present_pages;
+		unsigned long present;
+		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
 		int shared = 0, cached = 0, reserved = 0;
+
 		printk("Node ID: %d\n", pgdat->node_id);
 		printk("Node ID: %d\n", pgdat->node_id);
+		pgdat_resize_lock(pgdat, &flags);
+		present = pgdat->node_present_pages;
 		for(i = 0; i < pgdat->node_spanned_pages; i++) {
 		for(i = 0; i < pgdat->node_spanned_pages; i++) {
 			struct page *page;
 			struct page *page;
 			if (pfn_valid(pgdat->node_start_pfn + i))
 			if (pfn_valid(pgdat->node_start_pfn + i))
@@ -571,6 +575,7 @@ void show_mem(void)
 			else if (page_count(page))
 			else if (page_count(page))
 				shared += page_count(page)-1;
 				shared += page_count(page)-1;
 		}
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 		total_present += present;
 		total_present += present;
 		total_reserved += reserved;
 		total_reserved += reserved;
 		total_cached += cached;
 		total_cached += cached;

+ 7 - 27
arch/ia64/mm/fault.c

@@ -19,32 +19,6 @@
 
 
 extern void die (char *, struct pt_regs *, long);
 extern void die (char *, struct pt_regs *, long);
 
 
-/*
- * This routine is analogous to expand_stack() but instead grows the
- * register backing store (which grows towards higher addresses).
- * Since the register backing store is access sequentially, we
- * disallow growing the RBS by more than a page at a time.  Note that
- * the VM_GROWSUP flag can be set on any VM area but that's fine
- * because the total process size is still limited by RLIMIT_STACK and
- * RLIMIT_AS.
- */
-static inline long
-expand_backing_store (struct vm_area_struct *vma, unsigned long address)
-{
-	unsigned long grow;
-
-	grow = PAGE_SIZE >> PAGE_SHIFT;
-	if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur
-	    || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur))
-		return -ENOMEM;
-	vma->vm_end += PAGE_SIZE;
-	vma->vm_mm->total_vm += grow;
-	if (vma->vm_flags & VM_LOCKED)
-		vma->vm_mm->locked_vm += grow;
-	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
-	return 0;
-}
-
 /*
 /*
  * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
  * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
  * (inside region 5, on ia64) and that page is present.
  * (inside region 5, on ia64) and that page is present.
@@ -185,7 +159,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 		if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
 		if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
 		    || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
 		    || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
 			goto bad_area;
 			goto bad_area;
-		if (expand_backing_store(vma, address))
+		/*
+		 * Since the register backing store is accessed sequentially,
+		 * we disallow growing it by more than a page at a time.
+		 */
+		if (address > vma->vm_end + PAGE_SIZE - sizeof(long))
+			goto bad_area;
+		if (expand_upwards(vma, address))
 			goto bad_area;
 			goto bad_area;
 	}
 	}
 	goto good_area;
 	goto good_area;

+ 4 - 9
arch/ia64/mm/init.c

@@ -158,7 +158,7 @@ ia64_init_addr_space (void)
 		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
 		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
 		vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
 		vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
-		vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP;
+		vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
 		down_write(&current->mm->mmap_sem);
 		down_write(&current->mm->mmap_sem);
 		if (insert_vm_struct(current->mm, vma)) {
 		if (insert_vm_struct(current->mm, vma)) {
 			up_write(&current->mm->mmap_sem);
 			up_write(&current->mm->mmap_sem);
@@ -275,26 +275,21 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
 
 
 	pgd = pgd_offset_k(address);		/* note: this is NOT pgd_offset()! */
 	pgd = pgd_offset_k(address);		/* note: this is NOT pgd_offset()! */
 
 
-	spin_lock(&init_mm.page_table_lock);
 	{
 	{
 		pud = pud_alloc(&init_mm, pgd, address);
 		pud = pud_alloc(&init_mm, pgd, address);
 		if (!pud)
 		if (!pud)
 			goto out;
 			goto out;
-
 		pmd = pmd_alloc(&init_mm, pud, address);
 		pmd = pmd_alloc(&init_mm, pud, address);
 		if (!pmd)
 		if (!pmd)
 			goto out;
 			goto out;
-		pte = pte_alloc_map(&init_mm, pmd, address);
+		pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 		if (!pte)
 			goto out;
 			goto out;
-		if (!pte_none(*pte)) {
-			pte_unmap(pte);
+		if (!pte_none(*pte))
 			goto out;
 			goto out;
-		}
 		set_pte(pte, mk_pte(page, pgprot));
 		set_pte(pte, mk_pte(page, pgprot));
-		pte_unmap(pte);
 	}
 	}
-  out:	spin_unlock(&init_mm.page_table_lock);
+  out:
 	/* no need for flush_tlb */
 	/* no need for flush_tlb */
 	return page;
 	return page;
 }
 }

+ 2 - 0
arch/ia64/mm/tlb.c

@@ -158,10 +158,12 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long
 # ifdef CONFIG_SMP
 # ifdef CONFIG_SMP
 	platform_global_tlb_purge(mm, start, end, nbits);
 	platform_global_tlb_purge(mm, start, end, nbits);
 # else
 # else
+	preempt_disable();
 	do {
 	do {
 		ia64_ptcl(start, (nbits<<2));
 		ia64_ptcl(start, (nbits<<2));
 		start += (1UL << nbits);
 		start += (1UL << nbits);
 	} while (start < end);
 	} while (start < end);
+	preempt_enable();
 # endif
 # endif
 
 
 	ia64_srlz_i();			/* srlz.i implies srlz.d */
 	ia64_srlz_i();			/* srlz.i implies srlz.d */

+ 8 - 1
arch/m32r/mm/init.c

@@ -48,6 +48,8 @@ void show_mem(void)
 	show_free_areas();
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
 	printk("Free swap:       %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
 	for_each_pgdat(pgdat) {
+		unsigned long flags;
+		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pgdat_page_nr(pgdat, i);
 			page = pgdat_page_nr(pgdat, i);
 			total++;
 			total++;
@@ -60,6 +62,7 @@ void show_mem(void)
 			else if (page_count(page))
 			else if (page_count(page))
 				shared += page_count(page) - 1;
 				shared += page_count(page) - 1;
 		}
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
 	}
 	printk("%d pages of RAM\n", total);
 	printk("%d pages of RAM\n", total);
 	printk("%d pages of HIGHMEM\n",highmem);
 	printk("%d pages of HIGHMEM\n",highmem);
@@ -150,10 +153,14 @@ int __init reservedpages_count(void)
 	int reservedpages, nid, i;
 	int reservedpages, nid, i;
 
 
 	reservedpages = 0;
 	reservedpages = 0;
-	for_each_online_node(nid)
+	for_each_online_node(nid) {
+		unsigned long flags;
+		pgdat_resize_lock(NODE_DATA(nid), &flags);
 		for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++)
 		for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++)
 			if (PageReserved(nid_page_nr(nid, i)))
 			if (PageReserved(nid_page_nr(nid, i)))
 				reservedpages++;
 				reservedpages++;
+		pgdat_resize_unlock(NODE_DATA(nid), &flags);
+	}
 
 
 	return reservedpages;
 	return reservedpages;
 }
 }

+ 1 - 3
arch/m32r/mm/ioremap.c

@@ -67,7 +67,7 @@ remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
 	do {
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 		if (!pte)
 			return -ENOMEM;
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -90,7 +90,6 @@ remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	flush_cache_all();
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 	do {
 		pmd_t *pmd;
 		pmd_t *pmd;
 		pmd = pmd_alloc(&init_mm, dir, address);
 		pmd = pmd_alloc(&init_mm, dir, address);
@@ -104,7 +103,6 @@ remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 		dir++;
 	} while (address && (address < end));
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	flush_tlb_all();
 	return error;
 	return error;
 }
 }

+ 1 - 23
arch/m68k/Kconfig

@@ -388,33 +388,11 @@ config AMIGA_PCMCIA
 	  Include support in the kernel for pcmcia on Amiga 1200 and Amiga
 	  Include support in the kernel for pcmcia on Amiga 1200 and Amiga
 	  600. If you intend to use pcmcia cards say Y; otherwise say N.
 	  600. If you intend to use pcmcia cards say Y; otherwise say N.
 
 
-config STRAM_SWAP
-	bool "Support for ST-RAM as swap space"
-	depends on ATARI && BROKEN
-	---help---
-	  Some Atari 68k machines (including the 520STF and 1020STE) divide
-	  their addressable memory into ST and TT sections.  The TT section
-	  (up to 512MB) is the main memory; the ST section (up to 4MB) is
-	  accessible to the built-in graphics board, runs slower, and is
-	  present mainly for backward compatibility with older machines.
-
-	  This enables support for using (parts of) ST-RAM as swap space,
-	  instead of as normal system memory. This can first enhance system
-	  performance if you have lots of alternate RAM (compared to the size
-	  of ST-RAM), because executable code always will reside in faster
-	  memory. ST-RAM will remain as ultra-fast swap space. On the other
-	  hand, it allows much improved dynamic allocations of ST-RAM buffers
-	  for device driver modules (e.g. floppy, ACSI, SLM printer, DMA
-	  sound). The probability that such allocations at module load time
-	  fail is drastically reduced.
-
 config STRAM_PROC
 config STRAM_PROC
 	bool "ST-RAM statistics in /proc"
 	bool "ST-RAM statistics in /proc"
 	depends on ATARI
 	depends on ATARI
 	help
 	help
-	  Say Y here to report ST-RAM usage statistics in /proc/stram.  See
-	  the help for CONFIG_STRAM_SWAP for discussion of ST-RAM and its
-	  uses.
+	  Say Y here to report ST-RAM usage statistics in /proc/stram.
 
 
 config HEARTBEAT
 config HEARTBEAT
 	bool "Use power LED as a heartbeat" if AMIGA || APOLLO || ATARI || MAC ||Q40
 	bool "Use power LED as a heartbeat" if AMIGA || APOLLO || ATARI || MAC ||Q40

文件差异内容过多而无法显示
+ 15 - 901
arch/m68k/atari/stram.c


+ 1 - 1
arch/m68k/mm/kmap.c

@@ -201,7 +201,7 @@ void *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag)
 			virtaddr += PTRTREESIZE;
 			virtaddr += PTRTREESIZE;
 			size -= PTRTREESIZE;
 			size -= PTRTREESIZE;
 		} else {
 		} else {
-			pte_dir = pte_alloc_kernel(&init_mm, pmd_dir, virtaddr);
+			pte_dir = pte_alloc_kernel(pmd_dir, virtaddr);
 			if (!pte_dir) {
 			if (!pte_dir) {
 				printk("ioremap: no mem for pte_dir\n");
 				printk("ioremap: no mem for pte_dir\n");
 				return NULL;
 				return NULL;

+ 1 - 1
arch/m68k/sun3x/dvma.c

@@ -116,7 +116,7 @@ inline int dvma_map_cpu(unsigned long kaddr,
 			pte_t *pte;
 			pte_t *pte;
 			unsigned long end3;
 			unsigned long end3;
 
 
-			if((pte = pte_alloc_kernel(&init_mm, pmd, vaddr)) == NULL) {
+			if((pte = pte_alloc_kernel(pmd, vaddr)) == NULL) {
 				ret = -ENOMEM;
 				ret = -ENOMEM;
 				goto out;
 				goto out;
 			}
 			}

+ 0 - 1
arch/mips/kernel/irixelf.c

@@ -697,7 +697,6 @@ static int load_irix_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	/* Do this so that we can load the interpreter, if need be.  We will
 	/* Do this so that we can load the interpreter, if need be.  We will
 	 * change some of these later.
 	 * change some of these later.
 	 */
 	 */
-	set_mm_counter(current->mm, rss, 0);
 	setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
 	setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
 	current->mm->start_stack = bprm->p;
 	current->mm->start_stack = bprm->p;
 
 

+ 1 - 3
arch/mips/mm/ioremap.c

@@ -55,7 +55,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address,
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
 	do {
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 		if (!pte)
 			return -ENOMEM;
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -77,7 +77,6 @@ static int remap_area_pages(unsigned long address, phys_t phys_addr,
 	flush_cache_all();
 	flush_cache_all();
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 	do {
 		pud_t *pud;
 		pud_t *pud;
 		pmd_t *pmd;
 		pmd_t *pmd;
@@ -96,7 +95,6 @@ static int remap_area_pages(unsigned long address, phys_t phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 		dir++;
 	} while (address && (address < end));
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	flush_tlb_all();
 	return error;
 	return error;
 }
 }

+ 9 - 15
arch/parisc/kernel/cache.c

@@ -270,7 +270,6 @@ void flush_dcache_page(struct page *page)
 	unsigned long offset;
 	unsigned long offset;
 	unsigned long addr;
 	unsigned long addr;
 	pgoff_t pgoff;
 	pgoff_t pgoff;
-	pte_t *pte;
 	unsigned long pfn = page_to_pfn(page);
 	unsigned long pfn = page_to_pfn(page);
 
 
 
 
@@ -301,21 +300,16 @@ void flush_dcache_page(struct page *page)
 		 * taking a page fault if the pte doesn't exist.
 		 * taking a page fault if the pte doesn't exist.
 		 * This is just for speed.  If the page translation
 		 * This is just for speed.  If the page translation
 		 * isn't there, there's no point exciting the
 		 * isn't there, there's no point exciting the
-		 * nadtlb handler into a nullification frenzy */
-
-
-  		if(!(pte = translation_exists(mpnt, addr)))
-			continue;
-
-		/* make sure we really have this page: the private
+		 * nadtlb handler into a nullification frenzy.
+		 *
+		 * Make sure we really have this page: the private
 		 * mappings may cover this area but have COW'd this
 		 * mappings may cover this area but have COW'd this
-		 * particular page */
-		if(pte_pfn(*pte) != pfn)
-  			continue;
-
-		__flush_cache_page(mpnt, addr);
-
-		break;
+		 * particular page.
+		 */
+  		if (translation_exists(mpnt, addr, pfn)) {
+			__flush_cache_page(mpnt, addr);
+			break;
+		}
 	}
 	}
 	flush_dcache_mmap_unlock(mapping);
 	flush_dcache_mmap_unlock(mapping);
 }
 }

+ 1 - 1
arch/parisc/kernel/pci-dma.c

@@ -114,7 +114,7 @@ static inline int map_pmd_uncached(pmd_t * pmd, unsigned long vaddr,
 	if (end > PGDIR_SIZE)
 	if (end > PGDIR_SIZE)
 		end = PGDIR_SIZE;
 		end = PGDIR_SIZE;
 	do {
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, vaddr);
+		pte_t * pte = pte_alloc_kernel(pmd, vaddr);
 		if (!pte)
 		if (!pte)
 			return -ENOMEM;
 			return -ENOMEM;
 		if (map_pte_uncached(pte, orig_vaddr, end - vaddr, paddr_ptr))
 		if (map_pte_uncached(pte, orig_vaddr, end - vaddr, paddr_ptr))

+ 3 - 0
arch/parisc/mm/init.c

@@ -505,7 +505,9 @@ void show_mem(void)
 
 
 		for (j = node_start_pfn(i); j < node_end_pfn(i); j++) {
 		for (j = node_start_pfn(i); j < node_end_pfn(i); j++) {
 			struct page *p;
 			struct page *p;
+			unsigned long flags;
 
 
+			pgdat_resize_lock(NODE_DATA(i), &flags);
 			p = nid_page_nr(i, j) - node_start_pfn(i);
 			p = nid_page_nr(i, j) - node_start_pfn(i);
 
 
 			total++;
 			total++;
@@ -517,6 +519,7 @@ void show_mem(void)
 				free++;
 				free++;
 			else
 			else
 				shared += page_count(p) - 1;
 				shared += page_count(p) - 1;
+			pgdat_resize_unlock(NODE_DATA(i), &flags);
         	}
         	}
 	}
 	}
 #endif
 #endif

+ 2 - 4
arch/parisc/mm/ioremap.c

@@ -52,7 +52,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
 	do {
 	do {
-		pte_t * pte = pte_alloc_kernel(NULL, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 		if (!pte)
 			return -ENOMEM;
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -75,10 +75,9 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	flush_cache_all();
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 	do {
 		pmd_t *pmd;
 		pmd_t *pmd;
-		pmd = pmd_alloc(dir, address);
+		pmd = pmd_alloc(&init_mm, dir, address);
 		error = -ENOMEM;
 		error = -ENOMEM;
 		if (!pmd)
 		if (!pmd)
 			break;
 			break;
@@ -89,7 +88,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 		dir++;
 	} while (address && (address < end));
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	flush_tlb_all();
 	return error;
 	return error;
 }
 }

+ 1 - 5
arch/ppc/kernel/dma-mapping.c

@@ -335,8 +335,6 @@ static int __init dma_alloc_init(void)
 	pte_t *pte;
 	pte_t *pte;
 	int ret = 0;
 	int ret = 0;
 
 
-	spin_lock(&init_mm.page_table_lock);
-
 	do {
 	do {
 		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
 		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
 		pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
 		pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
@@ -347,7 +345,7 @@ static int __init dma_alloc_init(void)
 		}
 		}
 		WARN_ON(!pmd_none(*pmd));
 		WARN_ON(!pmd_none(*pmd));
 
 
-		pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE);
+		pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
 		if (!pte) {
 		if (!pte) {
 			printk(KERN_ERR "%s: no pte tables\n", __func__);
 			printk(KERN_ERR "%s: no pte tables\n", __func__);
 			ret = -ENOMEM;
 			ret = -ENOMEM;
@@ -357,8 +355,6 @@ static int __init dma_alloc_init(void)
 		consistent_pte = pte;
 		consistent_pte = pte;
 	} while (0);
 	} while (0);
 
 
-	spin_unlock(&init_mm.page_table_lock);
-
 	return ret;
 	return ret;
 }
 }
 
 

+ 0 - 4
arch/ppc/mm/4xx_mmu.c

@@ -110,13 +110,11 @@ unsigned long __init mmu_mapin_ram(void)
 		pmd_t *pmdp;
 		pmd_t *pmdp;
 		unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
 		unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
 
 
-		spin_lock(&init_mm.page_table_lock);
 		pmdp = pmd_offset(pgd_offset_k(v), v);
 		pmdp = pmd_offset(pgd_offset_k(v), v);
 		pmd_val(*pmdp++) = val;
 		pmd_val(*pmdp++) = val;
 		pmd_val(*pmdp++) = val;
 		pmd_val(*pmdp++) = val;
 		pmd_val(*pmdp++) = val;
 		pmd_val(*pmdp++) = val;
 		pmd_val(*pmdp++) = val;
 		pmd_val(*pmdp++) = val;
-		spin_unlock(&init_mm.page_table_lock);
 
 
 		v += LARGE_PAGE_SIZE_16M;
 		v += LARGE_PAGE_SIZE_16M;
 		p += LARGE_PAGE_SIZE_16M;
 		p += LARGE_PAGE_SIZE_16M;
@@ -127,10 +125,8 @@ unsigned long __init mmu_mapin_ram(void)
 		pmd_t *pmdp;
 		pmd_t *pmdp;
 		unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
 		unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
 
 
-		spin_lock(&init_mm.page_table_lock);
 		pmdp = pmd_offset(pgd_offset_k(v), v);
 		pmdp = pmd_offset(pgd_offset_k(v), v);
 		pmd_val(*pmdp) = val;
 		pmd_val(*pmdp) = val;
-		spin_unlock(&init_mm.page_table_lock);
 
 
 		v += LARGE_PAGE_SIZE_4M;
 		v += LARGE_PAGE_SIZE_4M;
 		p += LARGE_PAGE_SIZE_4M;
 		p += LARGE_PAGE_SIZE_4M;

+ 1 - 3
arch/ppc/mm/pgtable.c

@@ -280,18 +280,16 @@ map_page(unsigned long va, phys_addr_t pa, int flags)
 	pte_t *pg;
 	pte_t *pg;
 	int err = -ENOMEM;
 	int err = -ENOMEM;
 
 
-	spin_lock(&init_mm.page_table_lock);
 	/* Use upper 10 bits of VA to index the first level map */
 	/* Use upper 10 bits of VA to index the first level map */
 	pd = pmd_offset(pgd_offset_k(va), va);
 	pd = pmd_offset(pgd_offset_k(va), va);
 	/* Use middle 10 bits of VA to index the second-level map */
 	/* Use middle 10 bits of VA to index the second-level map */
-	pg = pte_alloc_kernel(&init_mm, pd, va);
+	pg = pte_alloc_kernel(pd, va);
 	if (pg != 0) {
 	if (pg != 0) {
 		err = 0;
 		err = 0;
 		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
 		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
 		if (mem_init_done)
 		if (mem_init_done)
 			flush_HPTE(0, va, pmd_val(*pd));
 			flush_HPTE(0, va, pmd_val(*pd));
 	}
 	}
-	spin_unlock(&init_mm.page_table_lock);
 	return err;
 	return err;
 }
 }
 
 

+ 7 - 5
arch/ppc64/kernel/vdso.c

@@ -176,13 +176,13 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
 		return NOPAGE_SIGBUS;
 		return NOPAGE_SIGBUS;
 
 
 	/*
 	/*
-	 * Last page is systemcfg, special handling here, no get_page() a
-	 * this is a reserved page
+	 * Last page is systemcfg.
 	 */
 	 */
 	if ((vma->vm_end - address) <= PAGE_SIZE)
 	if ((vma->vm_end - address) <= PAGE_SIZE)
-		return virt_to_page(systemcfg);
+		pg = virt_to_page(systemcfg);
+	else
+		pg = virt_to_page(vbase + offset);
 
 
-	pg = virt_to_page(vbase + offset);
 	get_page(pg);
 	get_page(pg);
 	DBG(" ->page count: %d\n", page_count(pg));
 	DBG(" ->page count: %d\n", page_count(pg));
 
 
@@ -259,7 +259,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack)
 	 * gettimeofday will be totally dead. It's fine to use that for setting
 	 * gettimeofday will be totally dead. It's fine to use that for setting
 	 * breakpoints in the vDSO code pages though
 	 * breakpoints in the vDSO code pages though
 	 */
 	 */
-	vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+	vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_RESERVED;
 	vma->vm_flags |= mm->def_flags;
 	vma->vm_flags |= mm->def_flags;
 	vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
 	vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
 	vma->vm_ops = &vdso_vmops;
 	vma->vm_ops = &vdso_vmops;
@@ -603,6 +603,8 @@ void __init vdso_init(void)
 		ClearPageReserved(pg);
 		ClearPageReserved(pg);
 		get_page(pg);
 		get_page(pg);
 	}
 	}
+
+	get_page(virt_to_page(systemcfg));
 }
 }
 
 
 int in_gate_area_no_task(unsigned long addr)
 int in_gate_area_no_task(unsigned long addr)

+ 0 - 5
arch/ppc64/mm/imalloc.c

@@ -300,12 +300,7 @@ void im_free(void * addr)
 	for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
 	for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
 		if (tmp->addr == addr) {
 		if (tmp->addr == addr) {
 			*p = tmp->next;
 			*p = tmp->next;
-
-			/* XXX: do we need the lock? */
-			spin_lock(&init_mm.page_table_lock);
 			unmap_vm_area(tmp);
 			unmap_vm_area(tmp);
-			spin_unlock(&init_mm.page_table_lock);
-
 			kfree(tmp);
 			kfree(tmp);
 			up(&imlist_sem);
 			up(&imlist_sem);
 			return;
 			return;

+ 84 - 3
arch/ppc64/mm/init.c

@@ -104,6 +104,8 @@ void show_mem(void)
 	show_free_areas();
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
 	for_each_pgdat(pgdat) {
+		unsigned long flags;
+		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
 			page = pgdat_page_nr(pgdat, i);
 			page = pgdat_page_nr(pgdat, i);
 			total++;
 			total++;
@@ -114,6 +116,7 @@ void show_mem(void)
 			else if (page_count(page))
 			else if (page_count(page))
 				shared += page_count(page) - 1;
 				shared += page_count(page) - 1;
 		}
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
 	}
 	printk("%ld pages of RAM\n", total);
 	printk("%ld pages of RAM\n", total);
 	printk("%ld reserved pages\n", reserved);
 	printk("%ld reserved pages\n", reserved);
@@ -155,7 +158,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
 	unsigned long vsid;
 	unsigned long vsid;
 
 
 	if (mem_init_done) {
 	if (mem_init_done) {
-		spin_lock(&init_mm.page_table_lock);
 		pgdp = pgd_offset_k(ea);
 		pgdp = pgd_offset_k(ea);
 		pudp = pud_alloc(&init_mm, pgdp, ea);
 		pudp = pud_alloc(&init_mm, pgdp, ea);
 		if (!pudp)
 		if (!pudp)
@@ -163,12 +165,11 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
 		pmdp = pmd_alloc(&init_mm, pudp, ea);
 		pmdp = pmd_alloc(&init_mm, pudp, ea);
 		if (!pmdp)
 		if (!pmdp)
 			return -ENOMEM;
 			return -ENOMEM;
-		ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
+		ptep = pte_alloc_kernel(pmdp, ea);
 		if (!ptep)
 		if (!ptep)
 			return -ENOMEM;
 			return -ENOMEM;
 		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
 		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
 							  __pgprot(flags)));
 							  __pgprot(flags)));
-		spin_unlock(&init_mm.page_table_lock);
 	} else {
 	} else {
 		unsigned long va, vpn, hash, hpteg;
 		unsigned long va, vpn, hash, hpteg;
 
 
@@ -649,11 +650,14 @@ void __init mem_init(void)
 #endif
 #endif
 
 
 	for_each_pgdat(pgdat) {
 	for_each_pgdat(pgdat) {
+		unsigned long flags;
+		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
 			page = pgdat_page_nr(pgdat, i);
 			page = pgdat_page_nr(pgdat, i);
 			if (PageReserved(page))
 			if (PageReserved(page))
 				reservedpages++;
 				reservedpages++;
 		}
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
 	}
 
 
 	codesize = (unsigned long)&_etext - (unsigned long)&_stext;
 	codesize = (unsigned long)&_etext - (unsigned long)&_stext;
@@ -867,3 +871,80 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
 	return vma_prot;
 	return vma_prot;
 }
 }
 EXPORT_SYMBOL(phys_mem_access_prot);
 EXPORT_SYMBOL(phys_mem_access_prot);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+
+void online_page(struct page *page)
+{
+	ClearPageReserved(page);
+	free_cold_page(page);
+	totalram_pages++;
+	num_physpages++;
+}
+
+/*
+ * This works only for the non-NUMA case.  Later, we'll need a lookup
+ * to convert from real physical addresses to nid, that doesn't use
+ * pfn_to_nid().
+ */
+int __devinit add_memory(u64 start, u64 size)
+{
+	struct pglist_data *pgdata = NODE_DATA(0);
+	struct zone *zone;
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+
+	/* this should work for most non-highmem platforms */
+	zone = pgdata->node_zones;
+
+	return __add_pages(zone, start_pfn, nr_pages);
+
+	return 0;
+}
+
+/*
+ * First pass at this code will check to determine if the remove
+ * request is within the RMO.  Do not allow removal within the RMO.
+ */
+int __devinit remove_memory(u64 start, u64 size)
+{
+	struct zone *zone;
+	unsigned long start_pfn, end_pfn, nr_pages;
+
+	start_pfn = start >> PAGE_SHIFT;
+	nr_pages = size >> PAGE_SHIFT;
+	end_pfn = start_pfn + nr_pages;
+
+	printk("%s(): Attempting to remove memoy in range "
+			"%lx to %lx\n", __func__, start, start+size);
+	/*
+	 * check for range within RMO
+	 */
+	zone = page_zone(pfn_to_page(start_pfn));
+
+	printk("%s(): memory will be removed from "
+			"the %s zone\n", __func__, zone->name);
+
+	/*
+	 * not handling removing memory ranges that
+	 * overlap multiple zones yet
+	 */
+	if (end_pfn > (zone->zone_start_pfn + zone->spanned_pages))
+		goto overlap;
+
+	/* make sure it is NOT in RMO */
+	if ((start < lmb.rmo_size) || ((start+size) < lmb.rmo_size)) {
+		printk("%s(): range to be removed must NOT be in RMO!\n",
+			__func__);
+		goto in_rmo;
+	}
+
+	return __remove_pages(zone, start_pfn, nr_pages);
+
+overlap:
+	printk("%s(): memory range to be removed overlaps "
+		"multiple zones!!!\n", __func__);
+in_rmo:
+	return -1;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */

+ 1 - 3
arch/s390/mm/ioremap.c

@@ -58,7 +58,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
 	do {
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 		if (!pte)
 			return -ENOMEM;
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -80,7 +80,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	flush_cache_all();
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 	do {
 		pmd_t *pmd;
 		pmd_t *pmd;
 		pmd = pmd_alloc(&init_mm, dir, address);
 		pmd = pmd_alloc(&init_mm, dir, address);
@@ -94,7 +93,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 		dir++;
 	} while (address && (address < end));
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	flush_tlb_all();
 	return 0;
 	return 0;
 }
 }

+ 23 - 17
arch/sh/mm/fault.c

@@ -194,10 +194,13 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 			       unsigned long address)
 			       unsigned long address)
 {
 {
 	unsigned long addrmax = P4SEG;
 	unsigned long addrmax = P4SEG;
-	pgd_t *dir;
+	pgd_t *pgd;
 	pmd_t *pmd;
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t *pte;
 	pte_t entry;
 	pte_t entry;
+	struct mm_struct *mm;
+	spinlock_t *ptl;
+	int ret = 1;
 
 
 #ifdef CONFIG_SH_KGDB
 #ifdef CONFIG_SH_KGDB
 	if (kgdb_nofault && kgdb_bus_err_hook)
 	if (kgdb_nofault && kgdb_bus_err_hook)
@@ -208,28 +211,28 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 	addrmax = P4SEG_STORE_QUE + 0x04000000;
 	addrmax = P4SEG_STORE_QUE + 0x04000000;
 #endif
 #endif
 
 
-	if (address >= P3SEG && address < addrmax)
-		dir = pgd_offset_k(address);
-	else if (address >= TASK_SIZE)
+	if (address >= P3SEG && address < addrmax) {
+		pgd = pgd_offset_k(address);
+		mm = NULL;
+	} else if (address >= TASK_SIZE)
 		return 1;
 		return 1;
-	else if (!current->mm)
+	else if (!(mm = current->mm))
 		return 1;
 		return 1;
 	else
 	else
-		dir = pgd_offset(current->mm, address);
+		pgd = pgd_offset(mm, address);
 
 
-	pmd = pmd_offset(dir, address);
-	if (pmd_none(*pmd))
-		return 1;
-	if (pmd_bad(*pmd)) {
-		pmd_ERROR(*pmd);
-		pmd_clear(pmd);
+	pmd = pmd_offset(pgd, address);
+	if (pmd_none_or_clear_bad(pmd))
 		return 1;
 		return 1;
-	}
-	pte = pte_offset_kernel(pmd, address);
+	if (mm)
+		pte = pte_offset_map_lock(mm, pmd, address, &ptl);
+	else
+		pte = pte_offset_kernel(pmd, address);
+
 	entry = *pte;
 	entry = *pte;
 	if (pte_none(entry) || pte_not_present(entry)
 	if (pte_none(entry) || pte_not_present(entry)
 	    || (writeaccess && !pte_write(entry)))
 	    || (writeaccess && !pte_write(entry)))
-		return 1;
+		goto unlock;
 
 
 	if (writeaccess)
 	if (writeaccess)
 		entry = pte_mkdirty(entry);
 		entry = pte_mkdirty(entry);
@@ -251,8 +254,11 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 
 
 	set_pte(pte, entry);
 	set_pte(pte, entry);
 	update_mmu_cache(NULL, address, entry);
 	update_mmu_cache(NULL, address, entry);
-
-	return 0;
+	ret = 0;
+unlock:
+	if (mm)
+		pte_unmap_unlock(pte, ptl);
+	return ret;
 }
 }
 
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)

+ 0 - 2
arch/sh/mm/hugetlbpage.c

@@ -54,8 +54,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	return pte;
 	return pte;
 }
 }
 
 
-#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
-
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t entry)
 		     pte_t *ptep, pte_t entry)
 {
 {

+ 1 - 3
arch/sh/mm/ioremap.c

@@ -57,7 +57,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address,
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
 	do {
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 		if (!pte)
 			return -ENOMEM;
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -79,7 +79,6 @@ int remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	flush_cache_all();
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 	do {
 		pmd_t *pmd;
 		pmd_t *pmd;
 		pmd = pmd_alloc(&init_mm, dir, address);
 		pmd = pmd_alloc(&init_mm, dir, address);
@@ -93,7 +92,6 @@ int remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 		dir++;
 	} while (address && (address < end));
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	flush_tlb_all();
 	return error;
 	return error;
 }
 }

+ 30 - 38
arch/sh64/mm/cache.c

@@ -584,32 +584,36 @@ static void sh64_dcache_purge_phy_page(unsigned long paddr)
 	}
 	}
 }
 }
 
 
-static void sh64_dcache_purge_user_page(struct mm_struct *mm, unsigned long eaddr)
+static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
+				unsigned long addr, unsigned long end)
 {
 {
 	pgd_t *pgd;
 	pgd_t *pgd;
 	pmd_t *pmd;
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t *pte;
 	pte_t entry;
 	pte_t entry;
+	spinlock_t *ptl;
 	unsigned long paddr;
 	unsigned long paddr;
 
 
-	/* NOTE : all the callers of this have mm->page_table_lock held, so the
-	   following page table traversal is safe even on SMP/pre-emptible. */
-
-	if (!mm) return; /* No way to find physical address of page */
-	pgd = pgd_offset(mm, eaddr);
-	if (pgd_bad(*pgd)) return;
-
-	pmd = pmd_offset(pgd, eaddr);
-	if (pmd_none(*pmd) || pmd_bad(*pmd)) return;
-
-	pte = pte_offset_kernel(pmd, eaddr);
-	entry = *pte;
-	if (pte_none(entry) || !pte_present(entry)) return;
-
-	paddr = pte_val(entry) & PAGE_MASK;
-
-	sh64_dcache_purge_coloured_phy_page(paddr, eaddr);
-
+	if (!mm)
+		return; /* No way to find physical address of page */
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_bad(*pgd))
+		return;
+
+	pmd = pmd_offset(pgd, addr);
+	if (pmd_none(*pmd) || pmd_bad(*pmd))
+		return;
+
+	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	do {
+		entry = *pte;
+		if (pte_none(entry) || !pte_present(entry))
+			continue;
+		paddr = pte_val(entry) & PAGE_MASK;
+		sh64_dcache_purge_coloured_phy_page(paddr, addr);
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	pte_unmap_unlock(pte - 1, ptl);
 }
 }
 /****************************************************************************/
 /****************************************************************************/
 
 
@@ -668,7 +672,7 @@ static void sh64_dcache_purge_user_range(struct mm_struct *mm,
 	int n_pages;
 	int n_pages;
 
 
 	n_pages = ((end - start) >> PAGE_SHIFT);
 	n_pages = ((end - start) >> PAGE_SHIFT);
-	if (n_pages >= 64) {
+	if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
 #if 1
 #if 1
 		sh64_dcache_purge_all();
 		sh64_dcache_purge_all();
 #else
 #else
@@ -707,20 +711,10 @@ static void sh64_dcache_purge_user_range(struct mm_struct *mm,
 		}
 		}
 #endif
 #endif
 	} else {
 	} else {
-		/* 'Small' range */
-		unsigned long aligned_start;
-		unsigned long eaddr;
-		unsigned long last_page_start;
-
-		aligned_start = start & PAGE_MASK;
-		/* 'end' is 1 byte beyond the end of the range */
-		last_page_start = (end - 1) & PAGE_MASK;
-
-		eaddr = aligned_start;
-		while (eaddr <= last_page_start) {
-			sh64_dcache_purge_user_page(mm, eaddr);
-			eaddr += PAGE_SIZE;
-		}
+		/* Small range, covered by a single page table page */
+		start &= PAGE_MASK;	/* should already be so */
+		end = PAGE_ALIGN(end);	/* should already be so */
+		sh64_dcache_purge_user_pages(mm, start, end);
 	}
 	}
 	return;
 	return;
 }
 }
@@ -880,9 +874,7 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 	   addresses from the user address space specified by mm, after writing
 	   addresses from the user address space specified by mm, after writing
 	   back any dirty data.
 	   back any dirty data.
 
 
-	   Note(1), 'end' is 1 byte beyond the end of the range to flush.
-
-	   Note(2), this is called with mm->page_table_lock held.*/
+	   Note, 'end' is 1 byte beyond the end of the range to flush. */
 
 
 	sh64_dcache_purge_user_range(mm, start, end);
 	sh64_dcache_purge_user_range(mm, start, end);
 	sh64_icache_inv_user_page_range(mm, start, end);
 	sh64_icache_inv_user_page_range(mm, start, end);
@@ -898,7 +890,7 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned
 	   the I-cache must be searched too in case the page in question is
 	   the I-cache must be searched too in case the page in question is
 	   both writable and being executed from (e.g. stack trampolines.)
 	   both writable and being executed from (e.g. stack trampolines.)
 
 
-	   Note(1), this is called with mm->page_table_lock held.
+	   Note, this is called with pte lock held.
 	   */
 	   */
 
 
 	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
 	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);

+ 12 - 176
arch/sh64/mm/hugetlbpage.c

@@ -54,41 +54,31 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	return pte;
 	return pte;
 }
 }
 
 
-#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
-
-static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
-			 struct page *page, pte_t * page_table, int write_access)
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t entry)
 {
 {
-	unsigned long i;
-	pte_t entry;
-
-	add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
-
-	if (write_access)
-		entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
-						       vma->vm_page_prot)));
-	else
-		entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
-	entry = pte_mkyoung(entry);
-	mk_pte_huge(entry);
+	int i;
 
 
 	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
 	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-		set_pte(page_table, entry);
-		page_table++;
-
+		set_pte_at(mm, addr, ptep, entry);
+		ptep++;
+		addr += PAGE_SIZE;
 		pte_val(entry) += PAGE_SIZE;
 		pte_val(entry) += PAGE_SIZE;
 	}
 	}
 }
 }
 
 
-pte_t huge_ptep_get_and_clear(pte_t *ptep)
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep)
 {
 {
 	pte_t entry;
 	pte_t entry;
+	int i;
 
 
 	entry = *ptep;
 	entry = *ptep;
 
 
 	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
 	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-		pte_clear(pte);
-		pte++;
+		pte_clear(mm, addr, ptep);
+		addr += PAGE_SIZE;
+		ptep++;
 	}
 	}
 
 
 	return entry;
 	return entry;
@@ -106,79 +96,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
 	return 0;
 	return 0;
 }
 }
 
 
-int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
-			    struct vm_area_struct *vma)
-{
-	pte_t *src_pte, *dst_pte, entry;
-	struct page *ptepage;
-	unsigned long addr = vma->vm_start;
-	unsigned long end = vma->vm_end;
-	int i;
-
-	while (addr < end) {
-		dst_pte = huge_pte_alloc(dst, addr);
-		if (!dst_pte)
-			goto nomem;
-		src_pte = huge_pte_offset(src, addr);
-		BUG_ON(!src_pte || pte_none(*src_pte));
-		entry = *src_pte;
-		ptepage = pte_page(entry);
-		get_page(ptepage);
-		for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-			set_pte(dst_pte, entry);
-			pte_val(entry) += PAGE_SIZE;
-			dst_pte++;
-		}
-		add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
-		addr += HPAGE_SIZE;
-	}
-	return 0;
-
-nomem:
-	return -ENOMEM;
-}
-
-int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			struct page **pages, struct vm_area_struct **vmas,
-			unsigned long *position, int *length, int i)
-{
-	unsigned long vaddr = *position;
-	int remainder = *length;
-
-	WARN_ON(!is_vm_hugetlb_page(vma));
-
-	while (vaddr < vma->vm_end && remainder) {
-		if (pages) {
-			pte_t *pte;
-			struct page *page;
-
-			pte = huge_pte_offset(mm, vaddr);
-
-			/* hugetlb should be locked, and hence, prefaulted */
-			BUG_ON(!pte || pte_none(*pte));
-
-			page = pte_page(*pte);
-
-			WARN_ON(!PageCompound(page));
-
-			get_page(page);
-			pages[i] = page;
-		}
-
-		if (vmas)
-			vmas[i] = vma;
-
-		vaddr += PAGE_SIZE;
-		--remainder;
-		++i;
-	}
-
-	*length = remainder;
-	*position = vaddr;
-
-	return i;
-}
-
 struct page *follow_huge_addr(struct mm_struct *mm,
 struct page *follow_huge_addr(struct mm_struct *mm,
 			      unsigned long address, int write)
 			      unsigned long address, int write)
 {
 {
@@ -195,84 +112,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 {
 {
 	return NULL;
 	return NULL;
 }
 }
-
-void unmap_hugepage_range(struct vm_area_struct *vma,
-			  unsigned long start, unsigned long end)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	unsigned long address;
-	pte_t *pte;
-	struct page *page;
-	int i;
-
-	BUG_ON(start & (HPAGE_SIZE - 1));
-	BUG_ON(end & (HPAGE_SIZE - 1));
-
-	for (address = start; address < end; address += HPAGE_SIZE) {
-		pte = huge_pte_offset(mm, address);
-		BUG_ON(!pte);
-		if (pte_none(*pte))
-			continue;
-		page = pte_page(*pte);
-		put_page(page);
-		for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-			pte_clear(mm, address+(i*PAGE_SIZE), pte);
-			pte++;
-		}
-	}
-	add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
-	flush_tlb_range(vma, start, end);
-}
-
-int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
-{
-	struct mm_struct *mm = current->mm;
-	unsigned long addr;
-	int ret = 0;
-
-	BUG_ON(vma->vm_start & ~HPAGE_MASK);
-	BUG_ON(vma->vm_end & ~HPAGE_MASK);
-
-	spin_lock(&mm->page_table_lock);
-	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
-		unsigned long idx;
-		pte_t *pte = huge_pte_alloc(mm, addr);
-		struct page *page;
-
-		if (!pte) {
-			ret = -ENOMEM;
-			goto out;
-		}
-		if (!pte_none(*pte))
-			continue;
-
-		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
-			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
-		page = find_get_page(mapping, idx);
-		if (!page) {
-			/* charge the fs quota first */
-			if (hugetlb_get_quota(mapping)) {
-				ret = -ENOMEM;
-				goto out;
-			}
-			page = alloc_huge_page();
-			if (!page) {
-				hugetlb_put_quota(mapping);
-				ret = -ENOMEM;
-				goto out;
-			}
-			ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
-			if (! ret) {
-				unlock_page(page);
-			} else {
-				hugetlb_put_quota(mapping);
-				free_huge_page(page);
-				goto out;
-			}
-		}
-		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
-	}
-out:
-	spin_unlock(&mm->page_table_lock);
-	return ret;
-}

+ 1 - 3
arch/sh64/mm/ioremap.c

@@ -79,7 +79,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
 		BUG();
 		BUG();
 
 
 	do {
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 		if (!pte)
 			return -ENOMEM;
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -101,7 +101,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	flush_cache_all();
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 	do {
 		pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
 		pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
 		error = -ENOMEM;
 		error = -ENOMEM;
@@ -115,7 +114,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 		dir++;
 	} while (address && (address < end));
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	flush_tlb_all();
 	return 0;
 	return 0;
 }
 }

+ 4 - 3
arch/sparc/mm/generic.c

@@ -73,14 +73,16 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 	int space = GET_IOSPACE(pfn);
 	int space = GET_IOSPACE(pfn);
 	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
 	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
 
 
+	/* See comment in mm/memory.c remap_pfn_range */
+	vma->vm_flags |= VM_IO | VM_RESERVED;
+
 	prot = __pgprot(pg_iobits);
 	prot = __pgprot(pg_iobits);
 	offset -= from;
 	offset -= from;
 	dir = pgd_offset(mm, from);
 	dir = pgd_offset(mm, from);
 	flush_cache_range(vma, beg, end);
 	flush_cache_range(vma, beg, end);
 
 
-	spin_lock(&mm->page_table_lock);
 	while (from < end) {
 	while (from < end) {
-		pmd_t *pmd = pmd_alloc(current->mm, dir, from);
+		pmd_t *pmd = pmd_alloc(mm, dir, from);
 		error = -ENOMEM;
 		error = -ENOMEM;
 		if (!pmd)
 		if (!pmd)
 			break;
 			break;
@@ -90,7 +92,6 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 		from = (from + PGDIR_SIZE) & PGDIR_MASK;
 		from = (from + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 		dir++;
 	}
 	}
-	spin_unlock(&mm->page_table_lock);
 
 
 	flush_tlb_range(vma, beg, end);
 	flush_tlb_range(vma, beg, end);
 	return error;
 	return error;

+ 0 - 1
arch/sparc64/kernel/binfmt_aout32.c

@@ -241,7 +241,6 @@ static int load_aout32_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	current->mm->brk = ex.a_bss +
 	current->mm->brk = ex.a_bss +
 		(current->mm->start_brk = N_BSSADDR(ex));
 		(current->mm->start_brk = N_BSSADDR(ex));
 
 
-	set_mm_counter(current->mm, rss, 0);
 	current->mm->mmap = NULL;
 	current->mm->mmap = NULL;
 	compute_creds(bprm);
 	compute_creds(bprm);
  	current->flags &= ~PF_FORKNOEXEC;
  	current->flags &= ~PF_FORKNOEXEC;

+ 5 - 4
arch/sparc64/mm/generic.c

@@ -127,14 +127,16 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 	int space = GET_IOSPACE(pfn);
 	int space = GET_IOSPACE(pfn);
 	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
 	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
 
 
+	/* See comment in mm/memory.c remap_pfn_range */
+	vma->vm_flags |= VM_IO | VM_RESERVED;
+
 	prot = __pgprot(pg_iobits);
 	prot = __pgprot(pg_iobits);
 	offset -= from;
 	offset -= from;
 	dir = pgd_offset(mm, from);
 	dir = pgd_offset(mm, from);
 	flush_cache_range(vma, beg, end);
 	flush_cache_range(vma, beg, end);
 
 
-	spin_lock(&mm->page_table_lock);
 	while (from < end) {
 	while (from < end) {
-		pud_t *pud = pud_alloc(current->mm, dir, from);
+		pud_t *pud = pud_alloc(mm, dir, from);
 		error = -ENOMEM;
 		error = -ENOMEM;
 		if (!pud)
 		if (!pud)
 			break;
 			break;
@@ -144,8 +146,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 		from = (from + PGDIR_SIZE) & PGDIR_MASK;
 		from = (from + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 		dir++;
 	}
 	}
-	flush_tlb_range(vma, beg, end);
-	spin_unlock(&mm->page_table_lock);
 
 
+	flush_tlb_range(vma, beg, end);
 	return error;
 	return error;
 }
 }

+ 3 - 4
arch/sparc64/mm/tlb.c

@@ -18,8 +18,7 @@
 
 
 /* Heavily inspired by the ppc64 code.  */
 /* Heavily inspired by the ppc64 code.  */
 
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) =
-	{ NULL, 0, 0, 0, 0, 0, { 0 }, { NULL }, };
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) = { 0, };
 
 
 void flush_tlb_pending(void)
 void flush_tlb_pending(void)
 {
 {
@@ -72,7 +71,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t
 
 
 no_cache_flush:
 no_cache_flush:
 
 
-	if (mp->tlb_frozen)
+	if (mp->fullmm)
 		return;
 		return;
 
 
 	nr = mp->tlb_nr;
 	nr = mp->tlb_nr;
@@ -97,7 +96,7 @@ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long
 	unsigned long nr = mp->tlb_nr;
 	unsigned long nr = mp->tlb_nr;
 	long s = start, e = end, vpte_base;
 	long s = start, e = end, vpte_base;
 
 
-	if (mp->tlb_frozen)
+	if (mp->fullmm)
 		return;
 		return;
 
 
 	/* If start is greater than end, that is a real problem.  */
 	/* If start is greater than end, that is a real problem.  */

+ 0 - 1
arch/um/include/tlb.h

@@ -34,7 +34,6 @@ struct host_vm_op {
 	} u;
 	} u;
 };
 };
 
 
-extern void mprotect_kernel_vm(int w);
 extern void force_flush_all(void);
 extern void force_flush_all(void);
 extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
 extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
                              unsigned long end_addr, int force,
                              unsigned long end_addr, int force,

+ 5 - 3
arch/um/kernel/process_kern.c

@@ -222,6 +222,7 @@ void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
 	pud_t *pud;
 	pud_t *pud;
 	pmd_t *pmd;
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t *pte;
+	pte_t ptent;
 
 
 	if(task->mm == NULL) 
 	if(task->mm == NULL) 
 		return(ERR_PTR(-EINVAL));
 		return(ERR_PTR(-EINVAL));
@@ -238,12 +239,13 @@ void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
 		return(ERR_PTR(-EINVAL));
 		return(ERR_PTR(-EINVAL));
 
 
 	pte = pte_offset_kernel(pmd, addr);
 	pte = pte_offset_kernel(pmd, addr);
-	if(!pte_present(*pte)) 
+	ptent = *pte;
+	if(!pte_present(ptent))
 		return(ERR_PTR(-EINVAL));
 		return(ERR_PTR(-EINVAL));
 
 
 	if(pte_out != NULL)
 	if(pte_out != NULL)
-		*pte_out = *pte;
-	return((void *) (pte_val(*pte) & PAGE_MASK) + (addr & ~PAGE_MASK));
+		*pte_out = ptent;
+	return((void *) (pte_val(ptent) & PAGE_MASK) + (addr & ~PAGE_MASK));
 }
 }
 
 
 char *current_cmd(void)
 char *current_cmd(void)

+ 1 - 3
arch/um/kernel/skas/mmu.c

@@ -28,7 +28,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	pmd_t *pmd;
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t *pte;
 
 
-	spin_lock(&mm->page_table_lock);
 	pgd = pgd_offset(mm, proc);
 	pgd = pgd_offset(mm, proc);
 	pud = pud_alloc(mm, pgd, proc);
 	pud = pud_alloc(mm, pgd, proc);
 	if (!pud)
 	if (!pud)
@@ -63,7 +62,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	*pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
 	*pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
 	*pte = pte_mkexec(*pte);
 	*pte = pte_mkexec(*pte);
 	*pte = pte_wrprotect(*pte);
 	*pte = pte_wrprotect(*pte);
-	spin_unlock(&mm->page_table_lock);
 	return(0);
 	return(0);
 
 
  out_pmd:
  out_pmd:
@@ -71,7 +69,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
  out_pte:
  out_pte:
 	pmd_free(pmd);
 	pmd_free(pmd);
  out:
  out:
-	spin_unlock(&mm->page_table_lock);
 	return(-ENOMEM);
 	return(-ENOMEM);
 }
 }
 
 
@@ -147,6 +144,7 @@ void destroy_context_skas(struct mm_struct *mm)
 
 
 	if(!proc_mm || !ptrace_faultinfo){
 	if(!proc_mm || !ptrace_faultinfo){
 		free_page(mmu->id.stack);
 		free_page(mmu->id.stack);
+		pte_lock_deinit(virt_to_page(mmu->last_page_table));
 		pte_free_kernel((pte_t *) mmu->last_page_table);
 		pte_free_kernel((pte_t *) mmu->last_page_table);
                 dec_page_state(nr_page_table_pages);
                 dec_page_state(nr_page_table_pages);
 #ifdef CONFIG_3_LEVEL_PGTABLES
 #ifdef CONFIG_3_LEVEL_PGTABLES

+ 0 - 36
arch/um/kernel/tt/tlb.c

@@ -74,42 +74,6 @@ void flush_tlb_kernel_range_tt(unsigned long start, unsigned long end)
                 atomic_inc(&vmchange_seq);
                 atomic_inc(&vmchange_seq);
 }
 }
 
 
-static void protect_vm_page(unsigned long addr, int w, int must_succeed)
-{
-	int err;
-
-	err = protect_memory(addr, PAGE_SIZE, 1, w, 1, must_succeed);
-	if(err == 0) return;
-	else if((err == -EFAULT) || (err == -ENOMEM)){
-		flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
-		protect_vm_page(addr, w, 1);
-	}
-	else panic("protect_vm_page : protect failed, errno = %d\n", err);
-}
-
-void mprotect_kernel_vm(int w)
-{
-	struct mm_struct *mm;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long addr;
-	
-	mm = &init_mm;
-	for(addr = start_vm; addr < end_vm;){
-		pgd = pgd_offset(mm, addr);
-		pud = pud_offset(pgd, addr);
-		pmd = pmd_offset(pud, addr);
-		if(pmd_present(*pmd)){
-			pte = pte_offset_kernel(pmd, addr);
-			if(pte_present(*pte)) protect_vm_page(addr, w, 0);
-			addr += PAGE_SIZE;
-		}
-		else addr += PMD_SIZE;
-	}
-}
-
 void flush_tlb_kernel_vm_tt(void)
 void flush_tlb_kernel_vm_tt(void)
 {
 {
         flush_tlb_kernel_range(start_vm, end_vm);
         flush_tlb_kernel_range(start_vm, end_vm);

+ 0 - 1
arch/x86_64/ia32/ia32_aout.c

@@ -314,7 +314,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	current->mm->free_area_cache = TASK_UNMAPPED_BASE;
 	current->mm->free_area_cache = TASK_UNMAPPED_BASE;
 	current->mm->cached_hole_size = 0;
 	current->mm->cached_hole_size = 0;
 
 
-	set_mm_counter(current->mm, rss, 0);
 	current->mm->mmap = NULL;
 	current->mm->mmap = NULL;
 	compute_creds(bprm);
 	compute_creds(bprm);
  	current->flags &= ~PF_FORKNOEXEC;
  	current->flags &= ~PF_FORKNOEXEC;

+ 1 - 3
arch/x86_64/mm/ioremap.c

@@ -60,7 +60,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
 	do {
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 		if (!pte)
 			return -ENOMEM;
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -105,7 +105,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	flush_cache_all();
 	if (address >= end)
 	if (address >= end)
 		BUG();
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 	do {
 		pud_t *pud;
 		pud_t *pud;
 		pud = pud_alloc(&init_mm, pgd, address);
 		pud = pud_alloc(&init_mm, pgd, address);
@@ -119,7 +118,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		pgd++;
 		pgd++;
 	} while (address && (address < end));
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	flush_tlb_all();
 	return error;
 	return error;
 }
 }

+ 4 - 1
crypto/api.c

@@ -215,7 +215,10 @@ int crypto_register_alg(struct crypto_alg *alg)
 	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
 	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
 		return -EINVAL;
 		return -EINVAL;
 
 
-	if (alg->cra_alignmask > PAGE_SIZE)
+	if (alg->cra_alignmask & alg->cra_blocksize)
+		return -EINVAL;
+
+	if (alg->cra_blocksize > PAGE_SIZE)
 		return -EINVAL;
 		return -EINVAL;
 	
 	
 	down_write(&crypto_alg_sem);
 	down_write(&crypto_alg_sem);

+ 5 - 14
crypto/hmac.c

@@ -18,18 +18,15 @@
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 #include "internal.h"
 #include "internal.h"
 
 
 static void hash_key(struct crypto_tfm *tfm, u8 *key, unsigned int keylen)
 static void hash_key(struct crypto_tfm *tfm, u8 *key, unsigned int keylen)
 {
 {
 	struct scatterlist tmp;
 	struct scatterlist tmp;
 	
 	
-	tmp.page = virt_to_page(key);
-	tmp.offset = offset_in_page(key);
-	tmp.length = keylen;
+	sg_set_buf(&tmp, key, keylen);
 	crypto_digest_digest(tfm, &tmp, 1, key);
 	crypto_digest_digest(tfm, &tmp, 1, key);
-		
 }
 }
 
 
 int crypto_alloc_hmac_block(struct crypto_tfm *tfm)
 int crypto_alloc_hmac_block(struct crypto_tfm *tfm)
@@ -69,9 +66,7 @@ void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen)
 	for (i = 0; i < crypto_tfm_alg_blocksize(tfm); i++)
 	for (i = 0; i < crypto_tfm_alg_blocksize(tfm); i++)
 		ipad[i] ^= 0x36;
 		ipad[i] ^= 0x36;
 
 
-	tmp.page = virt_to_page(ipad);
-	tmp.offset = offset_in_page(ipad);
-	tmp.length = crypto_tfm_alg_blocksize(tfm);
+	sg_set_buf(&tmp, ipad, crypto_tfm_alg_blocksize(tfm));
 	
 	
 	crypto_digest_init(tfm);
 	crypto_digest_init(tfm);
 	crypto_digest_update(tfm, &tmp, 1);
 	crypto_digest_update(tfm, &tmp, 1);
@@ -103,16 +98,12 @@ void crypto_hmac_final(struct crypto_tfm *tfm, u8 *key,
 	for (i = 0; i < crypto_tfm_alg_blocksize(tfm); i++)
 	for (i = 0; i < crypto_tfm_alg_blocksize(tfm); i++)
 		opad[i] ^= 0x5c;
 		opad[i] ^= 0x5c;
 
 
-	tmp.page = virt_to_page(opad);
-	tmp.offset = offset_in_page(opad);
-	tmp.length = crypto_tfm_alg_blocksize(tfm);
+	sg_set_buf(&tmp, opad, crypto_tfm_alg_blocksize(tfm));
 
 
 	crypto_digest_init(tfm);
 	crypto_digest_init(tfm);
 	crypto_digest_update(tfm, &tmp, 1);
 	crypto_digest_update(tfm, &tmp, 1);
 	
 	
-	tmp.page = virt_to_page(out);
-	tmp.offset = offset_in_page(out);
-	tmp.length = crypto_tfm_alg_digestsize(tfm);
+	sg_set_buf(&tmp, out, crypto_tfm_alg_digestsize(tfm));
 	
 	
 	crypto_digest_update(tfm, &tmp, 1);
 	crypto_digest_update(tfm, &tmp, 1);
 	crypto_digest_final(tfm, out);
 	crypto_digest_final(tfm, out);

+ 17 - 39
crypto/tcrypt.c

@@ -21,7 +21,7 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 #include <linux/string.h>
 #include <linux/string.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/highmem.h>
 #include <linux/highmem.h>
@@ -86,7 +86,6 @@ static void hexdump(unsigned char *buf, unsigned int len)
 static void test_hash(char *algo, struct hash_testvec *template,
 static void test_hash(char *algo, struct hash_testvec *template,
 		      unsigned int tcount)
 		      unsigned int tcount)
 {
 {
-	char *p;
 	unsigned int i, j, k, temp;
 	unsigned int i, j, k, temp;
 	struct scatterlist sg[8];
 	struct scatterlist sg[8];
 	char result[64];
 	char result[64];
@@ -116,10 +115,7 @@ static void test_hash(char *algo, struct hash_testvec *template,
 		printk("test %u:\n", i + 1);
 		printk("test %u:\n", i + 1);
 		memset(result, 0, 64);
 		memset(result, 0, 64);
 
 
-		p = hash_tv[i].plaintext;
-		sg[0].page = virt_to_page(p);
-		sg[0].offset = offset_in_page(p);
-		sg[0].length = hash_tv[i].psize;
+		sg_set_buf(&sg[0], hash_tv[i].plaintext, hash_tv[i].psize);
 
 
 		crypto_digest_init(tfm);
 		crypto_digest_init(tfm);
 		if (tfm->crt_u.digest.dit_setkey) {
 		if (tfm->crt_u.digest.dit_setkey) {
@@ -154,10 +150,8 @@ static void test_hash(char *algo, struct hash_testvec *template,
 				       hash_tv[i].plaintext + temp,
 				       hash_tv[i].plaintext + temp,
 				       hash_tv[i].tap[k]);
 				       hash_tv[i].tap[k]);
 				temp += hash_tv[i].tap[k];
 				temp += hash_tv[i].tap[k];
-				p = &xbuf[IDX[k]];
-				sg[k].page = virt_to_page(p);
-				sg[k].offset = offset_in_page(p);
-				sg[k].length = hash_tv[i].tap[k];
+				sg_set_buf(&sg[k], &xbuf[IDX[k]],
+					    hash_tv[i].tap[k]);
 			}
 			}
 
 
 			crypto_digest_digest(tfm, sg, hash_tv[i].np, result);
 			crypto_digest_digest(tfm, sg, hash_tv[i].np, result);
@@ -179,7 +173,6 @@ static void test_hash(char *algo, struct hash_testvec *template,
 static void test_hmac(char *algo, struct hmac_testvec *template,
 static void test_hmac(char *algo, struct hmac_testvec *template,
 		      unsigned int tcount)
 		      unsigned int tcount)
 {
 {
-	char *p;
 	unsigned int i, j, k, temp;
 	unsigned int i, j, k, temp;
 	struct scatterlist sg[8];
 	struct scatterlist sg[8];
 	char result[64];
 	char result[64];
@@ -210,11 +203,8 @@ static void test_hmac(char *algo, struct hmac_testvec *template,
 		printk("test %u:\n", i + 1);
 		printk("test %u:\n", i + 1);
 		memset(result, 0, sizeof (result));
 		memset(result, 0, sizeof (result));
 
 
-		p = hmac_tv[i].plaintext;
 		klen = hmac_tv[i].ksize;
 		klen = hmac_tv[i].ksize;
-		sg[0].page = virt_to_page(p);
-		sg[0].offset = offset_in_page(p);
-		sg[0].length = hmac_tv[i].psize;
+		sg_set_buf(&sg[0], hmac_tv[i].plaintext, hmac_tv[i].psize);
 
 
 		crypto_hmac(tfm, hmac_tv[i].key, &klen, sg, 1, result);
 		crypto_hmac(tfm, hmac_tv[i].key, &klen, sg, 1, result);
 
 
@@ -243,10 +233,8 @@ static void test_hmac(char *algo, struct hmac_testvec *template,
 				       hmac_tv[i].plaintext + temp,
 				       hmac_tv[i].plaintext + temp,
 				       hmac_tv[i].tap[k]);
 				       hmac_tv[i].tap[k]);
 				temp += hmac_tv[i].tap[k];
 				temp += hmac_tv[i].tap[k];
-				p = &xbuf[IDX[k]];
-				sg[k].page = virt_to_page(p);
-				sg[k].offset = offset_in_page(p);
-				sg[k].length = hmac_tv[i].tap[k];
+				sg_set_buf(&sg[k], &xbuf[IDX[k]],
+					    hmac_tv[i].tap[k]);
 			}
 			}
 
 
 			crypto_hmac(tfm, hmac_tv[i].key, &klen, sg,
 			crypto_hmac(tfm, hmac_tv[i].key, &klen, sg,
@@ -270,7 +258,7 @@ static void test_cipher(char *algo, int mode, int enc,
 {
 {
 	unsigned int ret, i, j, k, temp;
 	unsigned int ret, i, j, k, temp;
 	unsigned int tsize;
 	unsigned int tsize;
-	char *p, *q;
+	char *q;
 	struct crypto_tfm *tfm;
 	struct crypto_tfm *tfm;
 	char *key;
 	char *key;
 	struct cipher_testvec *cipher_tv;
 	struct cipher_testvec *cipher_tv;
@@ -330,10 +318,8 @@ static void test_cipher(char *algo, int mode, int enc,
 					goto out;
 					goto out;
 			}
 			}
 
 
-			p = cipher_tv[i].input;
-			sg[0].page = virt_to_page(p);
-			sg[0].offset = offset_in_page(p);
-			sg[0].length = cipher_tv[i].ilen;
+			sg_set_buf(&sg[0], cipher_tv[i].input,
+				   cipher_tv[i].ilen);
 
 
 			if (!mode) {
 			if (!mode) {
 				crypto_cipher_set_iv(tfm, cipher_tv[i].iv,
 				crypto_cipher_set_iv(tfm, cipher_tv[i].iv,
@@ -389,10 +375,8 @@ static void test_cipher(char *algo, int mode, int enc,
 				       cipher_tv[i].input + temp,
 				       cipher_tv[i].input + temp,
 				       cipher_tv[i].tap[k]);
 				       cipher_tv[i].tap[k]);
 				temp += cipher_tv[i].tap[k];
 				temp += cipher_tv[i].tap[k];
-				p = &xbuf[IDX[k]];
-				sg[k].page = virt_to_page(p);
-				sg[k].offset = offset_in_page(p);
-				sg[k].length = cipher_tv[i].tap[k];
+				sg_set_buf(&sg[k], &xbuf[IDX[k]],
+					   cipher_tv[i].tap[k]);
 			}
 			}
 
 
 			if (!mode) {
 			if (!mode) {
@@ -431,14 +415,12 @@ out:
 static int test_cipher_jiffies(struct crypto_tfm *tfm, int enc, char *p,
 static int test_cipher_jiffies(struct crypto_tfm *tfm, int enc, char *p,
 			       int blen, int sec)
 			       int blen, int sec)
 {
 {
-	struct scatterlist sg[8];
+	struct scatterlist sg[1];
 	unsigned long start, end;
 	unsigned long start, end;
 	int bcount;
 	int bcount;
 	int ret;
 	int ret;
 
 
-	sg[0].page = virt_to_page(p);
-	sg[0].offset = offset_in_page(p);
-	sg[0].length = blen;
+	sg_set_buf(sg, p, blen);
 
 
 	for (start = jiffies, end = start + sec * HZ, bcount = 0;
 	for (start = jiffies, end = start + sec * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
 	     time_before(jiffies, end); bcount++) {
@@ -459,14 +441,12 @@ static int test_cipher_jiffies(struct crypto_tfm *tfm, int enc, char *p,
 static int test_cipher_cycles(struct crypto_tfm *tfm, int enc, char *p,
 static int test_cipher_cycles(struct crypto_tfm *tfm, int enc, char *p,
 			      int blen)
 			      int blen)
 {
 {
-	struct scatterlist sg[8];
+	struct scatterlist sg[1];
 	unsigned long cycles = 0;
 	unsigned long cycles = 0;
 	int ret = 0;
 	int ret = 0;
 	int i;
 	int i;
 
 
-	sg[0].page = virt_to_page(p);
-	sg[0].offset = offset_in_page(p);
-	sg[0].length = blen;
+	sg_set_buf(sg, p, blen);
 
 
 	local_bh_disable();
 	local_bh_disable();
 	local_irq_disable();
 	local_irq_disable();
@@ -709,9 +689,7 @@ static void test_crc32c(void)
 	for (i = 0; i < NUMVEC; i++) {
 	for (i = 0; i < NUMVEC; i++) {
 		for (j = 0; j < VECSIZE; j++)
 		for (j = 0; j < VECSIZE; j++)
 			test_vec[i][j] = ++b;
 			test_vec[i][j] = ++b;
-		sg[i].page = virt_to_page(test_vec[i]);
-		sg[i].offset = offset_in_page(test_vec[i]);
-		sg[i].length = VECSIZE;
+		sg_set_buf(&sg[i], test_vec[i], VECSIZE);
 	}
 	}
 
 
 	seed = SEEDTESTVAL;
 	seed = SEEDTESTVAL;

+ 2 - 3
drivers/acpi/acpi_memhotplug.c

@@ -200,8 +200,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 	 * Note: Assume that this function returns zero on success
 	 * Note: Assume that this function returns zero on success
 	 */
 	 */
 	result = add_memory(mem_device->start_addr,
 	result = add_memory(mem_device->start_addr,
-			    (mem_device->end_addr - mem_device->start_addr) + 1,
-			    mem_device->read_write_attribute);
+			    (mem_device->end_addr - mem_device->start_addr) + 1);
 	if (result) {
 	if (result) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "\nadd_memory failed\n"));
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "\nadd_memory failed\n"));
 		mem_device->state = MEMORY_INVALID_STATE;
 		mem_device->state = MEMORY_INVALID_STATE;
@@ -259,7 +258,7 @@ static int acpi_memory_disable_device(struct acpi_memory_device *mem_device)
 	 * Ask the VM to offline this memory range.
 	 * Ask the VM to offline this memory range.
 	 * Note: Assume that this function returns zero on success
 	 * Note: Assume that this function returns zero on success
 	 */
 	 */
-	result = remove_memory(start, len, attr);
+	result = remove_memory(start, len);
 	if (result) {
 	if (result) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Hot-Remove failed.\n"));
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Hot-Remove failed.\n"));
 		return_VALUE(result);
 		return_VALUE(result);

+ 1 - 0
drivers/base/Makefile

@@ -7,6 +7,7 @@ obj-y			:= core.o sys.o bus.o dd.o \
 obj-y			+= power/
 obj-y			+= power/
 obj-$(CONFIG_FW_LOADER)	+= firmware_class.o
 obj-$(CONFIG_FW_LOADER)	+= firmware_class.o
 obj-$(CONFIG_NUMA)	+= node.o
 obj-$(CONFIG_NUMA)	+= node.o
+obj-$(CONFIG_MEMORY_HOTPLUG) += memory.o
 
 
 ifeq ($(CONFIG_DEBUG_DRIVER),y)
 ifeq ($(CONFIG_DEBUG_DRIVER),y)
 EXTRA_CFLAGS += -DDEBUG
 EXTRA_CFLAGS += -DDEBUG

+ 2 - 0
drivers/base/init.c

@@ -9,6 +9,7 @@
 
 
 #include <linux/device.h>
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/init.h>
+#include <linux/memory.h>
 
 
 #include "base.h"
 #include "base.h"
 
 
@@ -33,5 +34,6 @@ void __init driver_init(void)
 	platform_bus_init();
 	platform_bus_init();
 	system_bus_init();
 	system_bus_init();
 	cpu_dev_init();
 	cpu_dev_init();
+	memory_dev_init();
 	attribute_container_init();
 	attribute_container_init();
 }
 }

+ 452 - 0
drivers/base/memory.c

@@ -0,0 +1,452 @@
+/*
+ * drivers/base/memory.c - basic Memory class support
+ *
+ * Written by Matt Tolentino <matthew.e.tolentino@intel.com>
+ *            Dave Hansen <haveblue@us.ibm.com>
+ *
+ * This file provides the necessary infrastructure to represent
+ * a SPARSEMEM-memory-model system's physical memory in /sysfs.
+ * All arch-independent code that assumes MEMORY_HOTPLUG requires
+ * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
+ */
+
+#include <linux/sysdev.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>	/* capable() */
+#include <linux/topology.h>
+#include <linux/device.h>
+#include <linux/memory.h>
+#include <linux/kobject.h>
+#include <linux/memory_hotplug.h>
+#include <linux/mm.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#define MEMORY_CLASS_NAME	"memory"
+
+static struct sysdev_class memory_sysdev_class = {
+	set_kset_name(MEMORY_CLASS_NAME),
+};
+EXPORT_SYMBOL(memory_sysdev_class);
+
+static char *memory_hotplug_name(struct kset *kset, struct kobject *kobj)
+{
+	return MEMORY_CLASS_NAME;
+}
+
+static int memory_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
+			int num_envp, char *buffer, int buffer_size)
+{
+	int retval = 0;
+
+	return retval;
+}
+
+static struct kset_hotplug_ops memory_hotplug_ops = {
+	.name		= memory_hotplug_name,
+	.hotplug	= memory_hotplug,
+};
+
+static struct notifier_block *memory_chain;
+
+static int register_memory_notifier(struct notifier_block *nb)
+{
+        return notifier_chain_register(&memory_chain, nb);
+}
+
+static void unregister_memory_notifier(struct notifier_block *nb)
+{
+        notifier_chain_unregister(&memory_chain, nb);
+}
+
+/*
+ * register_memory - Setup a sysfs device for a memory block
+ */
+static int
+register_memory(struct memory_block *memory, struct mem_section *section,
+		struct node *root)
+{
+	int error;
+
+	memory->sysdev.cls = &memory_sysdev_class;
+	memory->sysdev.id = __section_nr(section);
+
+	error = sysdev_register(&memory->sysdev);
+
+	if (root && !error)
+		error = sysfs_create_link(&root->sysdev.kobj,
+					  &memory->sysdev.kobj,
+					  kobject_name(&memory->sysdev.kobj));
+
+	return error;
+}
+
+static void
+unregister_memory(struct memory_block *memory, struct mem_section *section,
+		struct node *root)
+{
+	BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
+	BUG_ON(memory->sysdev.id != __section_nr(section));
+
+	sysdev_unregister(&memory->sysdev);
+	if (root)
+		sysfs_remove_link(&root->sysdev.kobj,
+				  kobject_name(&memory->sysdev.kobj));
+}
+
+/*
+ * use this as the physical section index that this memsection
+ * uses.
+ */
+
+static ssize_t show_mem_phys_index(struct sys_device *dev, char *buf)
+{
+	struct memory_block *mem =
+		container_of(dev, struct memory_block, sysdev);
+	return sprintf(buf, "%08lx\n", mem->phys_index);
+}
+
+/*
+ * online, offline, going offline, etc.
+ */
+static ssize_t show_mem_state(struct sys_device *dev, char *buf)
+{
+	struct memory_block *mem =
+		container_of(dev, struct memory_block, sysdev);
+	ssize_t len = 0;
+
+	/*
+	 * We can probably put these states in a nice little array
+	 * so that they're not open-coded
+	 */
+	switch (mem->state) {
+		case MEM_ONLINE:
+			len = sprintf(buf, "online\n");
+			break;
+		case MEM_OFFLINE:
+			len = sprintf(buf, "offline\n");
+			break;
+		case MEM_GOING_OFFLINE:
+			len = sprintf(buf, "going-offline\n");
+			break;
+		default:
+			len = sprintf(buf, "ERROR-UNKNOWN-%ld\n",
+					mem->state);
+			WARN_ON(1);
+			break;
+	}
+
+	return len;
+}
+
+static inline int memory_notify(unsigned long val, void *v)
+{
+	return notifier_call_chain(&memory_chain, val, v);
+}
+
+/*
+ * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
+ * OK to have direct references to sparsemem variables in here.
+ */
+static int
+memory_block_action(struct memory_block *mem, unsigned long action)
+{
+	int i;
+	unsigned long psection;
+	unsigned long start_pfn, start_paddr;
+	struct page *first_page;
+	int ret;
+	int old_state = mem->state;
+
+	psection = mem->phys_index;
+	first_page = pfn_to_page(psection << PFN_SECTION_SHIFT);
+
+	/*
+	 * The probe routines leave the pages reserved, just
+	 * as the bootmem code does.  Make sure they're still
+	 * that way.
+	 */
+	if (action == MEM_ONLINE) {
+		for (i = 0; i < PAGES_PER_SECTION; i++) {
+			if (PageReserved(first_page+i))
+				continue;
+
+			printk(KERN_WARNING "section number %ld page number %d "
+				"not reserved, was it already online? \n",
+				psection, i);
+			return -EBUSY;
+		}
+	}
+
+	switch (action) {
+		case MEM_ONLINE:
+			start_pfn = page_to_pfn(first_page);
+			ret = online_pages(start_pfn, PAGES_PER_SECTION);
+			break;
+		case MEM_OFFLINE:
+			mem->state = MEM_GOING_OFFLINE;
+			memory_notify(MEM_GOING_OFFLINE, NULL);
+			start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
+			ret = remove_memory(start_paddr,
+					    PAGES_PER_SECTION << PAGE_SHIFT);
+			if (ret) {
+				mem->state = old_state;
+				break;
+			}
+			memory_notify(MEM_MAPPING_INVALID, NULL);
+			break;
+		default:
+			printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
+					__FUNCTION__, mem, action, action);
+			WARN_ON(1);
+			ret = -EINVAL;
+	}
+	/*
+	 * For now, only notify on successful memory operations
+	 */
+	if (!ret)
+		memory_notify(action, NULL);
+
+	return ret;
+}
+
+static int memory_block_change_state(struct memory_block *mem,
+		unsigned long to_state, unsigned long from_state_req)
+{
+	int ret = 0;
+	down(&mem->state_sem);
+
+	if (mem->state != from_state_req) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = memory_block_action(mem, to_state);
+	if (!ret)
+		mem->state = to_state;
+
+out:
+	up(&mem->state_sem);
+	return ret;
+}
+
+static ssize_t
+store_mem_state(struct sys_device *dev, const char *buf, size_t count)
+{
+	struct memory_block *mem;
+	unsigned int phys_section_nr;
+	int ret = -EINVAL;
+
+	mem = container_of(dev, struct memory_block, sysdev);
+	phys_section_nr = mem->phys_index;
+
+	if (!valid_section_nr(phys_section_nr))
+		goto out;
+
+	if (!strncmp(buf, "online", min((int)count, 6)))
+		ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
+	else if(!strncmp(buf, "offline", min((int)count, 7)))
+		ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
+out:
+	if (ret)
+		return ret;
+	return count;
+}
+
+/*
+ * phys_device is a bad name for this.  What I really want
+ * is a way to differentiate between memory ranges that
+ * are part of physical devices that constitute
+ * a complete removable unit or fru.
+ * i.e. do these ranges belong to the same physical device,
+ * s.t. if I offline all of these sections I can then
+ * remove the physical device?
+ */
+static ssize_t show_phys_device(struct sys_device *dev, char *buf)
+{
+	struct memory_block *mem =
+		container_of(dev, struct memory_block, sysdev);
+	return sprintf(buf, "%d\n", mem->phys_device);
+}
+
+static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
+static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
+static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
+
+#define mem_create_simple_file(mem, attr_name)	\
+	sysdev_create_file(&mem->sysdev, &attr_##attr_name)
+#define mem_remove_simple_file(mem, attr_name)	\
+	sysdev_remove_file(&mem->sysdev, &attr_##attr_name)
+
+/*
+ * Block size attribute stuff
+ */
+static ssize_t
+print_block_size(struct class *class, char *buf)
+{
+	return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE);
+}
+
+static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
+
+static int block_size_init(void)
+{
+	sysfs_create_file(&memory_sysdev_class.kset.kobj,
+		&class_attr_block_size_bytes.attr);
+	return 0;
+}
+
+/*
+ * Some architectures will have custom drivers to do this, and
+ * will not need to do it from userspace.  The fake hot-add code
+ * as well as ppc64 will do all of their discovery in userspace
+ * and will require this interface.
+ */
+#ifdef CONFIG_ARCH_MEMORY_PROBE
+static ssize_t
+memory_probe_store(struct class *class, const char __user *buf, size_t count)
+{
+	u64 phys_addr;
+	int ret;
+
+	phys_addr = simple_strtoull(buf, NULL, 0);
+
+	ret = add_memory(phys_addr, PAGES_PER_SECTION << PAGE_SHIFT);
+
+	if (ret)
+		count = ret;
+
+	return count;
+}
+static CLASS_ATTR(probe, 0700, NULL, memory_probe_store);
+
+static int memory_probe_init(void)
+{
+	sysfs_create_file(&memory_sysdev_class.kset.kobj,
+		&class_attr_probe.attr);
+	return 0;
+}
+#else
+#define memory_probe_init(...)	do {} while (0)
+#endif
+
+/*
+ * Note that phys_device is optional.  It is here to allow for
+ * differentiation between which *physical* devices each
+ * section belongs to...
+ */
+
+static int add_memory_block(unsigned long node_id, struct mem_section *section,
+		     unsigned long state, int phys_device)
+{
+	struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	int ret = 0;
+
+	if (!mem)
+		return -ENOMEM;
+
+	mem->phys_index = __section_nr(section);
+	mem->state = state;
+	init_MUTEX(&mem->state_sem);
+	mem->phys_device = phys_device;
+
+	ret = register_memory(mem, section, NULL);
+	if (!ret)
+		ret = mem_create_simple_file(mem, phys_index);
+	if (!ret)
+		ret = mem_create_simple_file(mem, state);
+	if (!ret)
+		ret = mem_create_simple_file(mem, phys_device);
+
+	return ret;
+}
+
+/*
+ * For now, we have a linear search to go find the appropriate
+ * memory_block corresponding to a particular phys_index. If
+ * this gets to be a real problem, we can always use a radix
+ * tree or something here.
+ *
+ * This could be made generic for all sysdev classes.
+ */
+static struct memory_block *find_memory_block(struct mem_section *section)
+{
+	struct kobject *kobj;
+	struct sys_device *sysdev;
+	struct memory_block *mem;
+	char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
+
+	/*
+	 * This only works because we know that section == sysdev->id
+	 * slightly redundant with sysdev_register()
+	 */
+	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
+
+	kobj = kset_find_obj(&memory_sysdev_class.kset, name);
+	if (!kobj)
+		return NULL;
+
+	sysdev = container_of(kobj, struct sys_device, kobj);
+	mem = container_of(sysdev, struct memory_block, sysdev);
+
+	return mem;
+}
+
+int remove_memory_block(unsigned long node_id, struct mem_section *section,
+		int phys_device)
+{
+	struct memory_block *mem;
+
+	mem = find_memory_block(section);
+	mem_remove_simple_file(mem, phys_index);
+	mem_remove_simple_file(mem, state);
+	mem_remove_simple_file(mem, phys_device);
+	unregister_memory(mem, section, NULL);
+
+	return 0;
+}
+
+/*
+ * need an interface for the VM to add new memory regions,
+ * but without onlining it.
+ */
+int register_new_memory(struct mem_section *section)
+{
+	return add_memory_block(0, section, MEM_OFFLINE, 0);
+}
+
+int unregister_memory_section(struct mem_section *section)
+{
+	if (!valid_section(section))
+		return -EINVAL;
+
+	return remove_memory_block(0, section, 0);
+}
+
+/*
+ * Initialize the sysfs support for memory devices...
+ */
+int __init memory_dev_init(void)
+{
+	unsigned int i;
+	int ret;
+
+	memory_sysdev_class.kset.hotplug_ops = &memory_hotplug_ops;
+	ret = sysdev_class_register(&memory_sysdev_class);
+
+	/*
+	 * Create entries for memory sections that were found
+	 * during boot and have been initialized
+	 */
+	for (i = 0; i < NR_MEM_SECTIONS; i++) {
+		if (!valid_section_nr(i))
+			continue;
+		add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0);
+	}
+
+	memory_probe_init();
+	block_size_init();
+
+	return ret;
+}

+ 4 - 8
drivers/md/dm-crypt.c

@@ -15,7 +15,7 @@
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/workqueue.h>
 #include <linux/workqueue.h>
 #include <asm/atomic.h>
 #include <asm/atomic.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 #include <asm/page.h>
 #include <asm/page.h>
 
 
 #include "dm.h"
 #include "dm.h"
@@ -164,9 +164,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
-	sg.page = virt_to_page(cc->key);
-	sg.offset = offset_in_page(cc->key);
-	sg.length = cc->key_size;
+	sg_set_buf(&sg, cc->key, cc->key_size);
 	crypto_digest_digest(hash_tfm, &sg, 1, salt);
 	crypto_digest_digest(hash_tfm, &sg, 1, salt);
 	crypto_free_tfm(hash_tfm);
 	crypto_free_tfm(hash_tfm);
 
 
@@ -207,14 +205,12 @@ static void crypt_iv_essiv_dtr(struct crypt_config *cc)
 
 
 static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
 static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
 {
 {
-	struct scatterlist sg = { NULL, };
+	struct scatterlist sg;
 
 
 	memset(iv, 0, cc->iv_size);
 	memset(iv, 0, cc->iv_size);
 	*(u64 *)iv = cpu_to_le64(sector);
 	*(u64 *)iv = cpu_to_le64(sector);
 
 
-	sg.page = virt_to_page(iv);
-	sg.offset = offset_in_page(iv);
-	sg.length = cc->iv_size;
+	sg_set_buf(&sg, iv, cc->iv_size);
 	crypto_cipher_encrypt((struct crypto_tfm *)cc->iv_gen_private,
 	crypto_cipher_encrypt((struct crypto_tfm *)cc->iv_gen_private,
 	                      &sg, &sg, cc->iv_size);
 	                      &sg, &sg, cc->iv_size);
 
 

+ 3 - 4
drivers/net/wireless/airo.c

@@ -35,6 +35,7 @@
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
 #include <linux/in.h>
 #include <linux/in.h>
 #include <linux/bitops.h>
 #include <linux/bitops.h>
+#include <linux/scatterlist.h>
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/system.h>
 #include <asm/system.h>
 
 
@@ -1590,11 +1591,9 @@ static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, struct
 		aes_counter[12] = (u8)(counter >> 24);
 		aes_counter[12] = (u8)(counter >> 24);
 		counter++;
 		counter++;
 		memcpy (plain, aes_counter, 16);
 		memcpy (plain, aes_counter, 16);
-		sg[0].page = virt_to_page(plain);
-		sg[0].offset = ((long) plain & ~PAGE_MASK);
-		sg[0].length = 16;
+		sg_set_buf(sg, plain, 16);
 		crypto_cipher_encrypt(tfm, sg, sg, 16);
 		crypto_cipher_encrypt(tfm, sg, sg, 16);
-		cipher = kmap(sg[0].page) + sg[0].offset;
+		cipher = kmap(sg->page) + sg->offset;
 		for (j=0; (j<16) && (i< (sizeof(context->coeff)/sizeof(context->coeff[0]))); ) {
 		for (j=0; (j<16) && (i< (sizeof(context->coeff)/sizeof(context->coeff[0]))); ) {
 			context->coeff[i++] = ntohl(*(u32 *)&cipher[j]);
 			context->coeff[i++] = ntohl(*(u32 *)&cipher[j]);
 			j += 4;
 			j += 4;

+ 3 - 3
drivers/scsi/arm/scsi.h

@@ -10,6 +10,8 @@
  *  Commonly used scsi driver functions.
  *  Commonly used scsi driver functions.
  */
  */
 
 
+#include <linux/scatterlist.h>
+
 #define BELT_AND_BRACES
 #define BELT_AND_BRACES
 
 
 /*
 /*
@@ -22,9 +24,7 @@ static inline int copy_SCp_to_sg(struct scatterlist *sg, Scsi_Pointer *SCp, int
 
 
 	BUG_ON(bufs + 1 > max);
 	BUG_ON(bufs + 1 > max);
 
 
-	sg->page   = virt_to_page(SCp->ptr);
-	sg->offset = offset_in_page(SCp->ptr);
-	sg->length = SCp->this_residual;
+	sg_set_buf(sg, SCp->ptr, SCp->this_residual);
 
 
 	if (bufs)
 	if (bufs)
 		memcpy(sg + 1, SCp->buffer + 1,
 		memcpy(sg + 1, SCp->buffer + 1,

+ 2 - 8
drivers/scsi/libata-core.c

@@ -49,6 +49,7 @@
 #include <linux/suspend.h>
 #include <linux/suspend.h>
 #include <linux/workqueue.h>
 #include <linux/workqueue.h>
 #include <linux/jiffies.h>
 #include <linux/jiffies.h>
+#include <linux/scatterlist.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi.h>
 #include "scsi.h"
 #include "scsi.h"
 #include "scsi_priv.h"
 #include "scsi_priv.h"
@@ -2554,19 +2555,12 @@ void ata_qc_prep(struct ata_queued_cmd *qc)
 
 
 void ata_sg_init_one(struct ata_queued_cmd *qc, void *buf, unsigned int buflen)
 void ata_sg_init_one(struct ata_queued_cmd *qc, void *buf, unsigned int buflen)
 {
 {
-	struct scatterlist *sg;
-
 	qc->flags |= ATA_QCFLAG_SINGLE;
 	qc->flags |= ATA_QCFLAG_SINGLE;
 
 
-	memset(&qc->sgent, 0, sizeof(qc->sgent));
 	qc->sg = &qc->sgent;
 	qc->sg = &qc->sgent;
 	qc->n_elem = 1;
 	qc->n_elem = 1;
 	qc->buf_virt = buf;
 	qc->buf_virt = buf;
-
-	sg = qc->sg;
-	sg->page = virt_to_page(buf);
-	sg->offset = (unsigned long) buf & ~PAGE_MASK;
-	sg->length = buflen;
+	sg_init_one(qc->sg, buf, buflen);
 }
 }
 
 
 /**
 /**

+ 10 - 7
drivers/scsi/sg.c

@@ -49,6 +49,7 @@ static int sg_version_num = 30533;	/* 2 digits for each component */
 #include <linux/seq_file.h>
 #include <linux/seq_file.h>
 #include <linux/blkdev.h>
 #include <linux/blkdev.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
+#include <linux/scatterlist.h>
 
 
 #include "scsi.h"
 #include "scsi.h"
 #include <scsi/scsi_dbg.h>
 #include <scsi/scsi_dbg.h>
@@ -1886,13 +1887,17 @@ st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages,
 	int i;
 	int i;
 
 
 	for (i=0; i < nr_pages; i++) {
 	for (i=0; i < nr_pages; i++) {
-		if (dirtied && !PageReserved(sgl[i].page))
-			SetPageDirty(sgl[i].page);
-		/* unlock_page(sgl[i].page); */
+		struct page *page = sgl[i].page;
+
+		/* XXX: just for debug. Remove when PageReserved is removed */
+		BUG_ON(PageReserved(page));
+		if (dirtied)
+			SetPageDirty(page);
+		/* unlock_page(page); */
 		/* FIXME: cache flush missing for rw==READ
 		/* FIXME: cache flush missing for rw==READ
 		 * FIXME: call the correct reference counting function
 		 * FIXME: call the correct reference counting function
 		 */
 		 */
-		page_cache_release(sgl[i].page);
+		page_cache_release(page);
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -1992,9 +1997,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
 				if (!p)
 				if (!p)
 					break;
 					break;
 			}
 			}
-			sclp->page = virt_to_page(p);
-			sclp->offset = offset_in_page(p);
-			sclp->length = ret_sz;
+			sg_set_buf(sclp, p, ret_sz);
 
 
 			SCSI_LOG_TIMEOUT(5, printk("sg_build_build: k=%d, a=0x%p, len=%d\n",
 			SCSI_LOG_TIMEOUT(5, printk("sg_build_build: k=%d, a=0x%p, len=%d\n",
 					  k, sg_scatg2virt(sclp), ret_sz));
 					  k, sg_scatg2virt(sclp), ret_sz));

+ 7 - 3
drivers/scsi/st.c

@@ -4526,12 +4526,16 @@ static int sgl_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_p
 	int i;
 	int i;
 
 
 	for (i=0; i < nr_pages; i++) {
 	for (i=0; i < nr_pages; i++) {
-		if (dirtied && !PageReserved(sgl[i].page))
-			SetPageDirty(sgl[i].page);
+		struct page *page = sgl[i].page;
+
+		/* XXX: just for debug. Remove when PageReserved is removed */
+		BUG_ON(PageReserved(page));
+		if (dirtied)
+			SetPageDirty(page);
 		/* FIXME: cache flush missing for rw==READ
 		/* FIXME: cache flush missing for rw==READ
 		 * FIXME: call the correct reference counting function
 		 * FIXME: call the correct reference counting function
 		 */
 		 */
-		page_cache_release(sgl[i].page);
+		page_cache_release(page);
 	}
 	}
 
 
 	return 0;
 	return 0;

+ 2 - 5
drivers/usb/misc/usbtest.c

@@ -9,7 +9,7 @@
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/moduleparam.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 
 
 #include <linux/usb.h>
 #include <linux/usb.h>
 
 
@@ -381,7 +381,6 @@ alloc_sglist (int nents, int max, int vary)
 	sg = kmalloc (nents * sizeof *sg, SLAB_KERNEL);
 	sg = kmalloc (nents * sizeof *sg, SLAB_KERNEL);
 	if (!sg)
 	if (!sg)
 		return NULL;
 		return NULL;
-	memset (sg, 0, nents * sizeof *sg);
 
 
 	for (i = 0; i < nents; i++) {
 	for (i = 0; i < nents; i++) {
 		char		*buf;
 		char		*buf;
@@ -394,9 +393,7 @@ alloc_sglist (int nents, int max, int vary)
 		memset (buf, 0, size);
 		memset (buf, 0, size);
 
 
 		/* kmalloc pages are always physically contiguous! */
 		/* kmalloc pages are always physically contiguous! */
-		sg [i].page = virt_to_page (buf);
-		sg [i].offset = offset_in_page (buf);
-		sg [i].length = size;
+		sg_init_one(&sg[i], buf, size);
 
 
 		if (vary) {
 		if (vary) {
 			size += vary;
 			size += vary;

+ 2 - 2
fs/afs/file.c

@@ -291,8 +291,8 @@ static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
 		cachefs_uncache_page(vnode->cache, page);
 		cachefs_uncache_page(vnode->cache, page);
 #endif
 #endif
 
 
-		pageio = (struct cachefs_page *) page->private;
-		page->private = 0;
+		pageio = (struct cachefs_page *) page_private(page);
+		set_page_private(page, 0);
 		ClearPagePrivate(page);
 		ClearPagePrivate(page);
 
 
 		if (pageio)
 		if (pageio)

+ 0 - 1
fs/binfmt_aout.c

@@ -318,7 +318,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	current->mm->free_area_cache = current->mm->mmap_base;
 	current->mm->free_area_cache = current->mm->mmap_base;
 	current->mm->cached_hole_size = 0;
 	current->mm->cached_hole_size = 0;
 
 
-	set_mm_counter(current->mm, rss, 0);
 	current->mm->mmap = NULL;
 	current->mm->mmap = NULL;
 	compute_creds(bprm);
 	compute_creds(bprm);
  	current->flags &= ~PF_FORKNOEXEC;
  	current->flags &= ~PF_FORKNOEXEC;

+ 0 - 1
fs/binfmt_elf.c

@@ -773,7 +773,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 
 	/* Do this so that we can load the interpreter, if need be.  We will
 	/* Do this so that we can load the interpreter, if need be.  We will
 	   change some of these later */
 	   change some of these later */
-	set_mm_counter(current->mm, rss, 0);
 	current->mm->free_area_cache = current->mm->mmap_base;
 	current->mm->free_area_cache = current->mm->mmap_base;
 	current->mm->cached_hole_size = 0;
 	current->mm->cached_hole_size = 0;
 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),

+ 0 - 7
fs/binfmt_elf_fdpic.c

@@ -294,14 +294,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
 				  &interp_params,
 				  &interp_params,
 				  &current->mm->start_stack,
 				  &current->mm->start_stack,
 				  &current->mm->start_brk);
 				  &current->mm->start_brk);
-#endif
-
-	/* do this so that we can load the interpreter, if need be
-	 * - we will change some of these later
-	 */
-	set_mm_counter(current->mm, rss, 0);
 
 
-#ifdef CONFIG_MMU
 	retval = setup_arg_pages(bprm, current->mm->start_stack, executable_stack);
 	retval = setup_arg_pages(bprm, current->mm->start_stack, executable_stack);
 	if (retval < 0) {
 	if (retval < 0) {
 		send_sig(SIGKILL, current, 0);
 		send_sig(SIGKILL, current, 0);

+ 0 - 1
fs/binfmt_flat.c

@@ -650,7 +650,6 @@ static int load_flat_file(struct linux_binprm * bprm,
 		current->mm->start_brk = datapos + data_len + bss_len;
 		current->mm->start_brk = datapos + data_len + bss_len;
 		current->mm->brk = (current->mm->start_brk + 3) & ~3;
 		current->mm->brk = (current->mm->start_brk + 3) & ~3;
 		current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len;
 		current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len;
-		set_mm_counter(current->mm, rss, 0);
 	}
 	}
 
 
 	if (flags & FLAT_FLAG_KTRACE)
 	if (flags & FLAT_FLAG_KTRACE)

+ 0 - 1
fs/binfmt_som.c

@@ -259,7 +259,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	create_som_tables(bprm);
 	create_som_tables(bprm);
 
 
 	current->mm->start_stack = bprm->p;
 	current->mm->start_stack = bprm->p;
-	set_mm_counter(current->mm, rss, 0);
 
 
 #if 0
 #if 0
 	printk("(start_brk) %08lx\n" , (unsigned long) current->mm->start_brk);
 	printk("(start_brk) %08lx\n" , (unsigned long) current->mm->start_brk);

+ 1 - 1
fs/buffer.c

@@ -96,7 +96,7 @@ static void
 __clear_page_buffers(struct page *page)
 __clear_page_buffers(struct page *page)
 {
 {
 	ClearPagePrivate(page);
 	ClearPagePrivate(page);
-	page->private = 0;
+	set_page_private(page, 0);
 	page_cache_release(page);
 	page_cache_release(page);
 }
 }
 
 

+ 0 - 1
fs/compat.c

@@ -1490,7 +1490,6 @@ int compat_do_execve(char * filename,
 		/* execve success */
 		/* execve success */
 		security_bprm_free(bprm);
 		security_bprm_free(bprm);
 		acct_update_integrals(current);
 		acct_update_integrals(current);
-		update_mem_hiwater(current);
 		kfree(bprm);
 		kfree(bprm);
 		return retval;
 		return retval;
 	}
 	}

+ 3 - 1
fs/direct-io.c

@@ -162,6 +162,7 @@ static int dio_refill_pages(struct dio *dio)
 	up_read(&current->mm->mmap_sem);
 	up_read(&current->mm->mmap_sem);
 
 
 	if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) {
 	if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) {
+		struct page *page = ZERO_PAGE(dio->curr_user_address);
 		/*
 		/*
 		 * A memory fault, but the filesystem has some outstanding
 		 * A memory fault, but the filesystem has some outstanding
 		 * mapped blocks.  We need to use those blocks up to avoid
 		 * mapped blocks.  We need to use those blocks up to avoid
@@ -169,7 +170,8 @@ static int dio_refill_pages(struct dio *dio)
 		 */
 		 */
 		if (dio->page_errors == 0)
 		if (dio->page_errors == 0)
 			dio->page_errors = ret;
 			dio->page_errors = ret;
-		dio->pages[0] = ZERO_PAGE(dio->curr_user_address);
+		page_cache_get(page);
+		dio->pages[0] = page;
 		dio->head = 0;
 		dio->head = 0;
 		dio->tail = 1;
 		dio->tail = 1;
 		ret = 0;
 		ret = 0;

+ 6 - 11
fs/exec.c

@@ -309,40 +309,36 @@ void install_arg_page(struct vm_area_struct *vma,
 	pud_t * pud;
 	pud_t * pud;
 	pmd_t * pmd;
 	pmd_t * pmd;
 	pte_t * pte;
 	pte_t * pte;
+	spinlock_t *ptl;
 
 
 	if (unlikely(anon_vma_prepare(vma)))
 	if (unlikely(anon_vma_prepare(vma)))
-		goto out_sig;
+		goto out;
 
 
 	flush_dcache_page(page);
 	flush_dcache_page(page);
 	pgd = pgd_offset(mm, address);
 	pgd = pgd_offset(mm, address);
-
-	spin_lock(&mm->page_table_lock);
 	pud = pud_alloc(mm, pgd, address);
 	pud = pud_alloc(mm, pgd, address);
 	if (!pud)
 	if (!pud)
 		goto out;
 		goto out;
 	pmd = pmd_alloc(mm, pud, address);
 	pmd = pmd_alloc(mm, pud, address);
 	if (!pmd)
 	if (!pmd)
 		goto out;
 		goto out;
-	pte = pte_alloc_map(mm, pmd, address);
+	pte = pte_alloc_map_lock(mm, pmd, address, &ptl);
 	if (!pte)
 	if (!pte)
 		goto out;
 		goto out;
 	if (!pte_none(*pte)) {
 	if (!pte_none(*pte)) {
-		pte_unmap(pte);
+		pte_unmap_unlock(pte, ptl);
 		goto out;
 		goto out;
 	}
 	}
-	inc_mm_counter(mm, rss);
+	inc_mm_counter(mm, anon_rss);
 	lru_cache_add_active(page);
 	lru_cache_add_active(page);
 	set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
 	set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
 					page, vma->vm_page_prot))));
 					page, vma->vm_page_prot))));
 	page_add_anon_rmap(page, vma, address);
 	page_add_anon_rmap(page, vma, address);
-	pte_unmap(pte);
-	spin_unlock(&mm->page_table_lock);
+	pte_unmap_unlock(pte, ptl);
 
 
 	/* no need for flush_tlb */
 	/* no need for flush_tlb */
 	return;
 	return;
 out:
 out:
-	spin_unlock(&mm->page_table_lock);
-out_sig:
 	__free_page(page);
 	__free_page(page);
 	force_sig(SIGKILL, current);
 	force_sig(SIGKILL, current);
 }
 }
@@ -1207,7 +1203,6 @@ int do_execve(char * filename,
 		/* execve success */
 		/* execve success */
 		security_bprm_free(bprm);
 		security_bprm_free(bprm);
 		acct_update_integrals(current);
 		acct_update_integrals(current);
-		update_mem_hiwater(current);
 		kfree(bprm);
 		kfree(bprm);
 		return retval;
 		return retval;
 	}
 	}

+ 120 - 86
fs/hugetlbfs/inode.c

@@ -45,10 +45,58 @@ static struct backing_dev_info hugetlbfs_backing_dev_info = {
 
 
 int sysctl_hugetlb_shm_group;
 int sysctl_hugetlb_shm_group;
 
 
+static void huge_pagevec_release(struct pagevec *pvec)
+{
+	int i;
+
+	for (i = 0; i < pagevec_count(pvec); ++i)
+		put_page(pvec->pages[i]);
+
+	pagevec_reinit(pvec);
+}
+
+/*
+ * huge_pages_needed tries to determine the number of new huge pages that
+ * will be required to fully populate this VMA.  This will be equal to
+ * the size of the VMA in huge pages minus the number of huge pages
+ * (covered by this VMA) that are found in the page cache.
+ *
+ * Result is in bytes to be compatible with is_hugepage_mem_enough()
+ */
+unsigned long
+huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma)
+{
+	int i;
+	struct pagevec pvec;
+	unsigned long start = vma->vm_start;
+	unsigned long end = vma->vm_end;
+	unsigned long hugepages = (end - start) >> HPAGE_SHIFT;
+	pgoff_t next = vma->vm_pgoff;
+	pgoff_t endpg = next + ((end - start) >> PAGE_SHIFT);
+
+	pagevec_init(&pvec, 0);
+	while (next < endpg) {
+		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE))
+			break;
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
+			if (page->index > next)
+				next = page->index;
+			if (page->index >= endpg)
+				break;
+			next++;
+			hugepages--;
+		}
+		huge_pagevec_release(&pvec);
+	}
+	return hugepages << HPAGE_SHIFT;
+}
+
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct inode *inode = file->f_dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
 	struct address_space *mapping = inode->i_mapping;
+	unsigned long bytes;
 	loff_t len, vma_len;
 	loff_t len, vma_len;
 	int ret;
 	int ret;
 
 
@@ -67,6 +115,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if (vma->vm_end - vma->vm_start < HPAGE_SIZE)
 	if (vma->vm_end - vma->vm_start < HPAGE_SIZE)
 		return -EINVAL;
 		return -EINVAL;
 
 
+	bytes = huge_pages_needed(mapping, vma);
+	if (!is_hugepage_mem_enough(bytes))
+		return -ENOMEM;
+
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 
 
 	down(&inode->i_sem);
 	down(&inode->i_sem);
@@ -79,10 +131,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size)
 	if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size)
 		goto out;
 		goto out;
 
 
-	ret = hugetlb_prefault(mapping, vma);
-	if (ret)
-		goto out;
-
+	ret = 0;
+	hugetlb_prefault_arch_hook(vma->vm_mm);
 	if (inode->i_size < len)
 	if (inode->i_size < len)
 		inode->i_size = len;
 		inode->i_size = len;
 out:
 out:
@@ -92,7 +142,7 @@ out:
 }
 }
 
 
 /*
 /*
- * Called under down_write(mmap_sem), page_table_lock is not held
+ * Called under down_write(mmap_sem).
  */
  */
 
 
 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
@@ -171,16 +221,6 @@ static int hugetlbfs_commit_write(struct file *file,
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
-static void huge_pagevec_release(struct pagevec *pvec)
-{
-	int i;
-
-	for (i = 0; i < pagevec_count(pvec); ++i)
-		put_page(pvec->pages[i]);
-
-	pagevec_reinit(pvec);
-}
-
 static void truncate_huge_page(struct page *page)
 static void truncate_huge_page(struct page *page)
 {
 {
 	clear_page_dirty(page);
 	clear_page_dirty(page);
@@ -224,52 +264,35 @@ static void truncate_hugepages(struct address_space *mapping, loff_t lstart)
 
 
 static void hugetlbfs_delete_inode(struct inode *inode)
 static void hugetlbfs_delete_inode(struct inode *inode)
 {
 {
-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(inode->i_sb);
-
-	hlist_del_init(&inode->i_hash);
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
-	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
-	spin_unlock(&inode_lock);
-
 	if (inode->i_data.nrpages)
 	if (inode->i_data.nrpages)
 		truncate_hugepages(&inode->i_data, 0);
 		truncate_hugepages(&inode->i_data, 0);
-
-	security_inode_delete(inode);
-
-	if (sbinfo->free_inodes >= 0) {
-		spin_lock(&sbinfo->stat_lock);
-		sbinfo->free_inodes++;
-		spin_unlock(&sbinfo->stat_lock);
-	}
-
 	clear_inode(inode);
 	clear_inode(inode);
-	destroy_inode(inode);
 }
 }
 
 
 static void hugetlbfs_forget_inode(struct inode *inode)
 static void hugetlbfs_forget_inode(struct inode *inode)
 {
 {
-	struct super_block *super_block = inode->i_sb;
-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(super_block);
+	struct super_block *sb = inode->i_sb;
 
 
-	if (hlist_unhashed(&inode->i_hash))
-		goto out_truncate;
-
-	if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
-		list_del(&inode->i_list);
-		list_add(&inode->i_list, &inode_unused);
-	}
-	inodes_stat.nr_unused++;
-	if (!super_block || (super_block->s_flags & MS_ACTIVE)) {
+	if (!hlist_unhashed(&inode->i_hash)) {
+		if (!(inode->i_state & (I_DIRTY|I_LOCK)))
+			list_move(&inode->i_list, &inode_unused);
+		inodes_stat.nr_unused++;
+		if (!sb || (sb->s_flags & MS_ACTIVE)) {
+			spin_unlock(&inode_lock);
+			return;
+		}
+		inode->i_state |= I_WILL_FREE;
 		spin_unlock(&inode_lock);
 		spin_unlock(&inode_lock);
-		return;
+		/*
+		 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
+		 * in our backing_dev_info.
+		 */
+		write_inode_now(inode, 1);
+		spin_lock(&inode_lock);
+		inode->i_state &= ~I_WILL_FREE;
+		inodes_stat.nr_unused--;
+		hlist_del_init(&inode->i_hash);
 	}
 	}
-
-	/* write_inode_now() ? */
-	inodes_stat.nr_unused--;
-	hlist_del_init(&inode->i_hash);
-out_truncate:
 	list_del_init(&inode->i_list);
 	list_del_init(&inode->i_list);
 	list_del_init(&inode->i_sb_list);
 	list_del_init(&inode->i_sb_list);
 	inode->i_state |= I_FREEING;
 	inode->i_state |= I_FREEING;
@@ -277,13 +300,6 @@ out_truncate:
 	spin_unlock(&inode_lock);
 	spin_unlock(&inode_lock);
 	if (inode->i_data.nrpages)
 	if (inode->i_data.nrpages)
 		truncate_hugepages(&inode->i_data, 0);
 		truncate_hugepages(&inode->i_data, 0);
-
-	if (sbinfo->free_inodes >= 0) {
-		spin_lock(&sbinfo->stat_lock);
-		sbinfo->free_inodes++;
-		spin_unlock(&sbinfo->stat_lock);
-	}
-
 	clear_inode(inode);
 	clear_inode(inode);
 	destroy_inode(inode);
 	destroy_inode(inode);
 }
 }
@@ -291,7 +307,7 @@ out_truncate:
 static void hugetlbfs_drop_inode(struct inode *inode)
 static void hugetlbfs_drop_inode(struct inode *inode)
 {
 {
 	if (!inode->i_nlink)
 	if (!inode->i_nlink)
-		hugetlbfs_delete_inode(inode);
+		generic_delete_inode(inode);
 	else
 	else
 		hugetlbfs_forget_inode(inode);
 		hugetlbfs_forget_inode(inode);
 }
 }
@@ -308,7 +324,6 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
 
 
 	vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) {
 	vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) {
 		unsigned long h_vm_pgoff;
 		unsigned long h_vm_pgoff;
-		unsigned long v_length;
 		unsigned long v_offset;
 		unsigned long v_offset;
 
 
 		h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
 		h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
@@ -319,11 +334,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
 		if (h_vm_pgoff >= h_pgoff)
 		if (h_vm_pgoff >= h_pgoff)
 			v_offset = 0;
 			v_offset = 0;
 
 
-		v_length = vma->vm_end - vma->vm_start;
-
-		zap_hugepage_range(vma,
-				vma->vm_start + v_offset,
-				v_length - v_offset);
+		unmap_hugepage_range(vma,
+				vma->vm_start + v_offset, vma->vm_end);
 	}
 	}
 }
 }
 
 
@@ -379,17 +391,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 					gid_t gid, int mode, dev_t dev)
 					gid_t gid, int mode, dev_t dev)
 {
 {
 	struct inode *inode;
 	struct inode *inode;
-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
-
-	if (sbinfo->free_inodes >= 0) {
-		spin_lock(&sbinfo->stat_lock);
-		if (!sbinfo->free_inodes) {
-			spin_unlock(&sbinfo->stat_lock);
-			return NULL;
-		}
-		sbinfo->free_inodes--;
-		spin_unlock(&sbinfo->stat_lock);
-	}
 
 
 	inode = new_inode(sb);
 	inode = new_inode(sb);
 	if (inode) {
 	if (inode) {
@@ -531,29 +532,51 @@ static void hugetlbfs_put_super(struct super_block *sb)
 	}
 	}
 }
 }
 
 
+static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo)
+{
+	if (sbinfo->free_inodes >= 0) {
+		spin_lock(&sbinfo->stat_lock);
+		if (unlikely(!sbinfo->free_inodes)) {
+			spin_unlock(&sbinfo->stat_lock);
+			return 0;
+		}
+		sbinfo->free_inodes--;
+		spin_unlock(&sbinfo->stat_lock);
+	}
+
+	return 1;
+}
+
+static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo)
+{
+	if (sbinfo->free_inodes >= 0) {
+		spin_lock(&sbinfo->stat_lock);
+		sbinfo->free_inodes++;
+		spin_unlock(&sbinfo->stat_lock);
+	}
+}
+
+
 static kmem_cache_t *hugetlbfs_inode_cachep;
 static kmem_cache_t *hugetlbfs_inode_cachep;
 
 
 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 {
 {
+	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
 	struct hugetlbfs_inode_info *p;
 	struct hugetlbfs_inode_info *p;
 
 
+	if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
+		return NULL;
 	p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL);
 	p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL);
-	if (!p)
+	if (unlikely(!p)) {
+		hugetlbfs_inc_free_inodes(sbinfo);
 		return NULL;
 		return NULL;
+	}
 	return &p->vfs_inode;
 	return &p->vfs_inode;
 }
 }
 
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
-{
-	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
-
-	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-	    SLAB_CTOR_CONSTRUCTOR)
-		inode_init_once(&ei->vfs_inode);
-}
-
 static void hugetlbfs_destroy_inode(struct inode *inode)
 static void hugetlbfs_destroy_inode(struct inode *inode)
 {
 {
+	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
 	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
 	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
 	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
 	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
 }
 }
@@ -565,6 +588,16 @@ static struct address_space_operations hugetlbfs_aops = {
 	.set_page_dirty	= hugetlbfs_set_page_dirty,
 	.set_page_dirty	= hugetlbfs_set_page_dirty,
 };
 };
 
 
+
+static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR)
+		inode_init_once(&ei->vfs_inode);
+}
+
 struct file_operations hugetlbfs_file_operations = {
 struct file_operations hugetlbfs_file_operations = {
 	.mmap			= hugetlbfs_file_mmap,
 	.mmap			= hugetlbfs_file_mmap,
 	.fsync			= simple_sync_file,
 	.fsync			= simple_sync_file,
@@ -592,6 +625,7 @@ static struct super_operations hugetlbfs_ops = {
 	.alloc_inode    = hugetlbfs_alloc_inode,
 	.alloc_inode    = hugetlbfs_alloc_inode,
 	.destroy_inode  = hugetlbfs_destroy_inode,
 	.destroy_inode  = hugetlbfs_destroy_inode,
 	.statfs		= hugetlbfs_statfs,
 	.statfs		= hugetlbfs_statfs,
+	.delete_inode	= hugetlbfs_delete_inode,
 	.drop_inode	= hugetlbfs_drop_inode,
 	.drop_inode	= hugetlbfs_drop_inode,
 	.put_super	= hugetlbfs_put_super,
 	.put_super	= hugetlbfs_put_super,
 };
 };

+ 6 - 6
fs/jfs/jfs_metapage.c

@@ -86,7 +86,7 @@ struct meta_anchor {
 	atomic_t io_count;
 	atomic_t io_count;
 	struct metapage *mp[MPS_PER_PAGE];
 	struct metapage *mp[MPS_PER_PAGE];
 };
 };
-#define mp_anchor(page) ((struct meta_anchor *)page->private)
+#define mp_anchor(page) ((struct meta_anchor *)page_private(page))
 
 
 static inline struct metapage *page_to_mp(struct page *page, uint offset)
 static inline struct metapage *page_to_mp(struct page *page, uint offset)
 {
 {
@@ -108,7 +108,7 @@ static inline int insert_metapage(struct page *page, struct metapage *mp)
 		if (!a)
 		if (!a)
 			return -ENOMEM;
 			return -ENOMEM;
 		memset(a, 0, sizeof(struct meta_anchor));
 		memset(a, 0, sizeof(struct meta_anchor));
-		page->private = (unsigned long)a;
+		set_page_private(page, (unsigned long)a);
 		SetPagePrivate(page);
 		SetPagePrivate(page);
 		kmap(page);
 		kmap(page);
 	}
 	}
@@ -136,7 +136,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
 	a->mp[index] = NULL;
 	a->mp[index] = NULL;
 	if (--a->mp_count == 0) {
 	if (--a->mp_count == 0) {
 		kfree(a);
 		kfree(a);
-		page->private = 0;
+		set_page_private(page, 0);
 		ClearPagePrivate(page);
 		ClearPagePrivate(page);
 		kunmap(page);
 		kunmap(page);
 	}
 	}
@@ -156,13 +156,13 @@ static inline void dec_io(struct page *page, void (*handler) (struct page *))
 #else
 #else
 static inline struct metapage *page_to_mp(struct page *page, uint offset)
 static inline struct metapage *page_to_mp(struct page *page, uint offset)
 {
 {
-	return PagePrivate(page) ? (struct metapage *)page->private : NULL;
+	return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL;
 }
 }
 
 
 static inline int insert_metapage(struct page *page, struct metapage *mp)
 static inline int insert_metapage(struct page *page, struct metapage *mp)
 {
 {
 	if (mp) {
 	if (mp) {
-		page->private = (unsigned long)mp;
+		set_page_private(page, (unsigned long)mp);
 		SetPagePrivate(page);
 		SetPagePrivate(page);
 		kmap(page);
 		kmap(page);
 	}
 	}
@@ -171,7 +171,7 @@ static inline int insert_metapage(struct page *page, struct metapage *mp)
 
 
 static inline void remove_metapage(struct page *page, struct metapage *mp)
 static inline void remove_metapage(struct page *page, struct metapage *mp)
 {
 {
-	page->private = 0;
+	set_page_private(page, 0);
 	ClearPagePrivate(page);
 	ClearPagePrivate(page);
 	kunmap(page);
 	kunmap(page);
 }
 }

+ 1 - 1
fs/proc/array.c

@@ -438,7 +438,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 		jiffies_to_clock_t(it_real_value),
 		jiffies_to_clock_t(it_real_value),
 		start_time,
 		start_time,
 		vsize,
 		vsize,
-		mm ? get_mm_counter(mm, rss) : 0, /* you might want to shift this left 3 */
+		mm ? get_mm_rss(mm) : 0,
 	        rsslim,
 	        rsslim,
 		mm ? mm->start_code : 0,
 		mm ? mm->start_code : 0,
 		mm ? mm->end_code : 0,
 		mm ? mm->end_code : 0,

+ 31 - 20
fs/proc/task_mmu.c

@@ -14,22 +14,41 @@
 char *task_mem(struct mm_struct *mm, char *buffer)
 char *task_mem(struct mm_struct *mm, char *buffer)
 {
 {
 	unsigned long data, text, lib;
 	unsigned long data, text, lib;
+	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
+
+	/*
+	 * Note: to minimize their overhead, mm maintains hiwater_vm and
+	 * hiwater_rss only when about to *lower* total_vm or rss.  Any
+	 * collector of these hiwater stats must therefore get total_vm
+	 * and rss too, which will usually be the higher.  Barriers? not
+	 * worth the effort, such snapshots can always be inconsistent.
+	 */
+	hiwater_vm = total_vm = mm->total_vm;
+	if (hiwater_vm < mm->hiwater_vm)
+		hiwater_vm = mm->hiwater_vm;
+	hiwater_rss = total_rss = get_mm_rss(mm);
+	if (hiwater_rss < mm->hiwater_rss)
+		hiwater_rss = mm->hiwater_rss;
 
 
 	data = mm->total_vm - mm->shared_vm - mm->stack_vm;
 	data = mm->total_vm - mm->shared_vm - mm->stack_vm;
 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
 	buffer += sprintf(buffer,
 	buffer += sprintf(buffer,
+		"VmPeak:\t%8lu kB\n"
 		"VmSize:\t%8lu kB\n"
 		"VmSize:\t%8lu kB\n"
 		"VmLck:\t%8lu kB\n"
 		"VmLck:\t%8lu kB\n"
+		"VmHWM:\t%8lu kB\n"
 		"VmRSS:\t%8lu kB\n"
 		"VmRSS:\t%8lu kB\n"
 		"VmData:\t%8lu kB\n"
 		"VmData:\t%8lu kB\n"
 		"VmStk:\t%8lu kB\n"
 		"VmStk:\t%8lu kB\n"
 		"VmExe:\t%8lu kB\n"
 		"VmExe:\t%8lu kB\n"
 		"VmLib:\t%8lu kB\n"
 		"VmLib:\t%8lu kB\n"
 		"VmPTE:\t%8lu kB\n",
 		"VmPTE:\t%8lu kB\n",
-		(mm->total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
+		hiwater_vm << (PAGE_SHIFT-10),
+		(total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
 		mm->locked_vm << (PAGE_SHIFT-10),
 		mm->locked_vm << (PAGE_SHIFT-10),
-		get_mm_counter(mm, rss) << (PAGE_SHIFT-10),
+		hiwater_rss << (PAGE_SHIFT-10),
+		total_rss << (PAGE_SHIFT-10),
 		data << (PAGE_SHIFT-10),
 		data << (PAGE_SHIFT-10),
 		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
 		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
 		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
 		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
@@ -44,13 +63,11 @@ unsigned long task_vsize(struct mm_struct *mm)
 int task_statm(struct mm_struct *mm, int *shared, int *text,
 int task_statm(struct mm_struct *mm, int *shared, int *text,
 	       int *data, int *resident)
 	       int *data, int *resident)
 {
 {
-	int rss = get_mm_counter(mm, rss);
-
-	*shared = rss - get_mm_counter(mm, anon_rss);
+	*shared = get_mm_counter(mm, file_rss);
 	*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
 	*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
 								>> PAGE_SHIFT;
 								>> PAGE_SHIFT;
 	*data = mm->total_vm - mm->shared_vm;
 	*data = mm->total_vm - mm->shared_vm;
-	*resident = rss;
+	*resident = *shared + get_mm_counter(mm, anon_rss);
 	return mm->total_vm;
 	return mm->total_vm;
 }
 }
 
 
@@ -186,13 +203,14 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				struct mem_size_stats *mss)
 				struct mem_size_stats *mss)
 {
 {
 	pte_t *pte, ptent;
 	pte_t *pte, ptent;
+	spinlock_t *ptl;
 	unsigned long pfn;
 	unsigned long pfn;
 	struct page *page;
 	struct page *page;
 
 
-	pte = pte_offset_map(pmd, addr);
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	do {
 	do {
 		ptent = *pte;
 		ptent = *pte;
-		if (pte_none(ptent) || !pte_present(ptent))
+		if (!pte_present(ptent))
 			continue;
 			continue;
 
 
 		mss->resident += PAGE_SIZE;
 		mss->resident += PAGE_SIZE;
@@ -213,8 +231,8 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				mss->private_clean += PAGE_SIZE;
 				mss->private_clean += PAGE_SIZE;
 		}
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
-	pte_unmap(pte - 1);
-	cond_resched_lock(&vma->vm_mm->page_table_lock);
+	pte_unmap_unlock(pte - 1, ptl);
+	cond_resched();
 }
 }
 
 
 static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud,
@@ -268,17 +286,11 @@ static inline void smaps_pgd_range(struct vm_area_struct *vma,
 static int show_smap(struct seq_file *m, void *v)
 static int show_smap(struct seq_file *m, void *v)
 {
 {
 	struct vm_area_struct *vma = v;
 	struct vm_area_struct *vma = v;
-	struct mm_struct *mm = vma->vm_mm;
 	struct mem_size_stats mss;
 	struct mem_size_stats mss;
 
 
 	memset(&mss, 0, sizeof mss);
 	memset(&mss, 0, sizeof mss);
-
-	if (mm) {
-		spin_lock(&mm->page_table_lock);
+	if (vma->vm_mm)
 		smaps_pgd_range(vma, vma->vm_start, vma->vm_end, &mss);
 		smaps_pgd_range(vma, vma->vm_start, vma->vm_end, &mss);
-		spin_unlock(&mm->page_table_lock);
-	}
-
 	return show_map_internal(m, v, &mss);
 	return show_map_internal(m, v, &mss);
 }
 }
 
 
@@ -407,7 +419,6 @@ static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma)
 	for_each_node(i)
 	for_each_node(i)
 		md->node[i] =0;
 		md->node[i] =0;
 
 
-	spin_lock(&mm->page_table_lock);
  	for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
  	for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
 		page = follow_page(mm, vaddr, 0);
 		page = follow_page(mm, vaddr, 0);
 		if (page) {
 		if (page) {
@@ -422,8 +433,8 @@ static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma)
 				md->anon++;
 				md->anon++;
 			md->node[page_to_nid(page)]++;
 			md->node[page_to_nid(page)]++;
 		}
 		}
+		cond_resched();
 	}
 	}
-	spin_unlock(&mm->page_table_lock);
 	return md;
 	return md;
 }
 }
 
 
@@ -469,7 +480,7 @@ static int show_numa_map(struct seq_file *m, void *v)
 		seq_printf(m, " interleave={");
 		seq_printf(m, " interleave={");
 		first = 1;
 		first = 1;
 		for_each_node(n) {
 		for_each_node(n) {
-			if (test_bit(n, pol->v.nodes)) {
+			if (node_isset(n, pol->v.nodes)) {
 				if (!first)
 				if (!first)
 					seq_putc(m,',');
 					seq_putc(m,',');
 				else
 				else

+ 4 - 3
fs/xfs/linux-2.6/xfs_buf.c

@@ -181,8 +181,9 @@ set_page_region(
 	size_t		offset,
 	size_t		offset,
 	size_t		length)
 	size_t		length)
 {
 {
-	page->private |= page_region_mask(offset, length);
-	if (page->private == ~0UL)
+	set_page_private(page,
+		page_private(page) | page_region_mask(offset, length));
+	if (page_private(page) == ~0UL)
 		SetPageUptodate(page);
 		SetPageUptodate(page);
 }
 }
 
 
@@ -194,7 +195,7 @@ test_page_region(
 {
 {
 	unsigned long	mask = page_region_mask(offset, length);
 	unsigned long	mask = page_region_mask(offset, length);
 
 
-	return (mask && (page->private & mask) == mask);
+	return (mask && (page_private(page) & mask) == mask);
 }
 }
 
 
 /*
 /*

+ 2 - 0
include/asm-alpha/barrier.h

@@ -1,6 +1,8 @@
 #ifndef __BARRIER_H
 #ifndef __BARRIER_H
 #define __BARRIER_H
 #define __BARRIER_H
 
 
+#include <asm/compiler.h>
+
 #define mb() \
 #define mb() \
 __asm__ __volatile__("mb": : :"memory")
 __asm__ __volatile__("mb": : :"memory")
 
 

+ 5 - 0
include/asm-alpha/rwsem.h

@@ -262,5 +262,10 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
 #endif
 #endif
 }
 }
 
 
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	return (sem->count != 0);
+}
+
 #endif /* __KERNEL__ */
 #endif /* __KERNEL__ */
 #endif /* _ALPHA_RWSEM_H */
 #endif /* _ALPHA_RWSEM_H */

+ 3 - 20
include/asm-arm/tlb.h

@@ -27,11 +27,7 @@
  */
  */
 struct mmu_gather {
 struct mmu_gather {
 	struct mm_struct	*mm;
 	struct mm_struct	*mm;
-	unsigned int		freed;
 	unsigned int		fullmm;
 	unsigned int		fullmm;
-
-	unsigned int		flushes;
-	unsigned int		avoided_flushes;
 };
 };
 
 
 DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
 DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -39,11 +35,9 @@ DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
 static inline struct mmu_gather *
 static inline struct mmu_gather *
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 {
 {
-	int cpu = smp_processor_id();
-	struct mmu_gather *tlb = &per_cpu(mmu_gathers, cpu);
+	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
 
 	tlb->mm = mm;
 	tlb->mm = mm;
-	tlb->freed = 0;
 	tlb->fullmm = full_mm_flush;
 	tlb->fullmm = full_mm_flush;
 
 
 	return tlb;
 	return tlb;
@@ -52,24 +46,13 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 static inline void
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
 {
-	struct mm_struct *mm = tlb->mm;
-	unsigned long freed = tlb->freed;
-	int rss = get_mm_counter(mm, rss);
-
-	if (rss < freed)
-		freed = rss;
-	add_mm_counter(mm, rss, -freed);
-
 	if (tlb->fullmm)
 	if (tlb->fullmm)
-		flush_tlb_mm(mm);
+		flush_tlb_mm(tlb->mm);
 
 
 	/* keep the page table cache within bounds */
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
 	check_pgt_cache();
-}
 
 
-static inline unsigned int tlb_is_full_mm(struct mmu_gather *tlb)
-{
-	return tlb->fullmm;
+	put_cpu_var(mmu_gathers);
 }
 }
 
 
 #define tlb_remove_tlb_entry(tlb,ptep,address)	do { } while (0)
 #define tlb_remove_tlb_entry(tlb,ptep,address)	do { } while (0)

+ 16 - 31
include/asm-arm26/tlb.h

@@ -10,24 +10,20 @@
  */
  */
 struct mmu_gather {
 struct mmu_gather {
         struct mm_struct        *mm;
         struct mm_struct        *mm;
-        unsigned int            freed;
-	unsigned int            fullmm;
-
-        unsigned int            flushes;
-        unsigned int            avoided_flushes;
+        unsigned int            need_flush;
+        unsigned int            fullmm;
 };
 };
 
 
-extern struct mmu_gather mmu_gathers[NR_CPUS];
+DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 
 static inline struct mmu_gather *
 static inline struct mmu_gather *
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 {
 {
-        int cpu = smp_processor_id();
-        struct mmu_gather *tlb = &mmu_gathers[cpu];
+        struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
 
         tlb->mm = mm;
         tlb->mm = mm;
-        tlb->freed = 0;
-	tlb->fullmm = full_mm_flush;
+        tlb->need_flush = 0;
+        tlb->fullmm = full_mm_flush;
 
 
         return tlb;
         return tlb;
 }
 }
@@ -35,30 +31,13 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 static inline void
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
 {
-        struct mm_struct *mm = tlb->mm;
-        unsigned long freed = tlb->freed;
-        int rss = get_mm_counter(mm, rss);
-
-        if (rss < freed)
-                freed = rss;
-        add_mm_counter(mm, rss, -freed);
-
-        if (freed) {
-                flush_tlb_mm(mm);
-                tlb->flushes++;
-        } else {
-                tlb->avoided_flushes++;
-        }
+        if (tlb->need_flush)
+                flush_tlb_mm(tlb->mm);
 
 
         /* keep the page table cache within bounds */
         /* keep the page table cache within bounds */
         check_pgt_cache();
         check_pgt_cache();
-}
-
 
 
-static inline unsigned int
-tlb_is_full_mm(struct mmu_gather *tlb)
-{
-     return tlb->fullmm;
+        put_cpu_var(mmu_gathers);
 }
 }
 
 
 #define tlb_remove_tlb_entry(tlb,ptep,address)  do { } while (0)
 #define tlb_remove_tlb_entry(tlb,ptep,address)  do { } while (0)
@@ -71,7 +50,13 @@ tlb_is_full_mm(struct mmu_gather *tlb)
         } while (0)
         } while (0)
 #define tlb_end_vma(tlb,vma)                    do { } while (0)
 #define tlb_end_vma(tlb,vma)                    do { } while (0)
 
 
-#define tlb_remove_page(tlb,page)       free_page_and_swap_cache(page)
+static inline void
+tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+        tlb->need_flush = 1;
+        free_page_and_swap_cache(page);
+}
+
 #define pte_free_tlb(tlb,ptep)          pte_free(ptep)
 #define pte_free_tlb(tlb,ptep)          pte_free(ptep)
 #define pmd_free_tlb(tlb,pmdp)          pmd_free(pmdp)
 #define pmd_free_tlb(tlb,pmdp)          pmd_free(pmdp)
 
 

+ 3 - 8
include/asm-generic/4level-fixup.h

@@ -10,14 +10,9 @@
 
 
 #define pud_t				pgd_t
 #define pud_t				pgd_t
 
 
-#define pmd_alloc(mm, pud, address)			\
-({	pmd_t *ret;					\
-	if (pgd_none(*pud))				\
- 		ret = __pmd_alloc(mm, pud, address);	\
- 	else						\
-		ret = pmd_offset(pud, address);		\
- 	ret;						\
-})
+#define pmd_alloc(mm, pud, address) \
+	((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \
+ 		NULL: pmd_offset(pud, address))
 
 
 #define pud_alloc(mm, pgd, address)	(pgd)
 #define pud_alloc(mm, pgd, address)	(pgd)
 #define pud_offset(pgd, start)		(pgd)
 #define pud_offset(pgd, start)		(pgd)

+ 1 - 1
include/asm-generic/pgtable.h

@@ -8,7 +8,7 @@
  *  - update the page tables
  *  - update the page tables
  *  - inform the TLB about the new one
  *  - inform the TLB about the new one
  *
  *
- * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock.
+ * We hold the mm semaphore for reading, and the pte lock.
  *
  *
  * Note: the old pte is known to not be writable, so we don't need to
  * Note: the old pte is known to not be writable, so we don't need to
  * worry about dirty bits etc getting lost.
  * worry about dirty bits etc getting lost.

+ 4 - 19
include/asm-generic/tlb.h

@@ -35,16 +35,13 @@
 #endif
 #endif
 
 
 /* struct mmu_gather is an opaque type used by the mm code for passing around
 /* struct mmu_gather is an opaque type used by the mm code for passing around
- * any data needed by arch specific code for tlb_remove_page.  This structure
- * can be per-CPU or per-MM as the page table lock is held for the duration of
- * TLB shootdown.
+ * any data needed by arch specific code for tlb_remove_page.
  */
  */
 struct mmu_gather {
 struct mmu_gather {
 	struct mm_struct	*mm;
 	struct mm_struct	*mm;
 	unsigned int		nr;	/* set to ~0U means fast mode */
 	unsigned int		nr;	/* set to ~0U means fast mode */
 	unsigned int		need_flush;/* Really unmapped some ptes? */
 	unsigned int		need_flush;/* Really unmapped some ptes? */
 	unsigned int		fullmm; /* non-zero means full mm flush */
 	unsigned int		fullmm; /* non-zero means full mm flush */
-	unsigned long		freed;
 	struct page *		pages[FREE_PTE_NR];
 	struct page *		pages[FREE_PTE_NR];
 };
 };
 
 
@@ -57,7 +54,7 @@ DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
 static inline struct mmu_gather *
 static inline struct mmu_gather *
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 {
 {
-	struct mmu_gather *tlb = &per_cpu(mmu_gathers, smp_processor_id());
+	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
 
 	tlb->mm = mm;
 	tlb->mm = mm;
 
 
@@ -65,7 +62,6 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 	tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
 	tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
 
 
 	tlb->fullmm = full_mm_flush;
 	tlb->fullmm = full_mm_flush;
-	tlb->freed = 0;
 
 
 	return tlb;
 	return tlb;
 }
 }
@@ -85,28 +81,17 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 
 
 /* tlb_finish_mmu
 /* tlb_finish_mmu
  *	Called at the end of the shootdown operation to free up any resources
  *	Called at the end of the shootdown operation to free up any resources
- *	that were required.  The page table lock is still held at this point.
+ *	that were required.
  */
  */
 static inline void
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
 {
-	int freed = tlb->freed;
-	struct mm_struct *mm = tlb->mm;
-	int rss = get_mm_counter(mm, rss);
-
-	if (rss < freed)
-		freed = rss;
-	add_mm_counter(mm, rss, -freed);
 	tlb_flush_mmu(tlb, start, end);
 	tlb_flush_mmu(tlb, start, end);
 
 
 	/* keep the page table cache within bounds */
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
 	check_pgt_cache();
-}
 
 
-static inline unsigned int
-tlb_is_full_mm(struct mmu_gather *tlb)
-{
-	return tlb->fullmm;
+	put_cpu_var(mmu_gathers);
 }
 }
 
 
 /* tlb_remove_page
 /* tlb_remove_page

+ 0 - 6
include/asm-i386/mmzone.h

@@ -88,12 +88,6 @@ static inline int pfn_to_nid(unsigned long pfn)
 	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
 	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
 })
 })
 
 
-#define local_mapnr(kvaddr)						\
-({									\
-	unsigned long __pfn = __pa(kvaddr) >> PAGE_SHIFT;		\
-	(__pfn - node_start_pfn(pfn_to_nid(__pfn)));			\
-})
-
 /* XXX: FIXME -- wli */
 /* XXX: FIXME -- wli */
 #define kern_addr_valid(kaddr)	(0)
 #define kern_addr_valid(kaddr)	(0)
 
 

部分文件因为文件数量过多而无法显示