浏览代码

Merge x86-64 update from Andi

Linus Torvalds 19 年之前
父节点
当前提交
4060994c3e
共有 82 个文件被更改,包括 1481 次插入1097 次删除
  1. 10 2
      Documentation/x86_64/boot-options.txt
  2. 5 1
      Documentation/x86_64/mm.txt
  3. 8 9
      arch/i386/kernel/acpi/boot.c
  4. 6 6
      arch/i386/kernel/cpu/amd.c
  5. 16 24
      arch/i386/kernel/cpu/common.c
  6. 1 1
      arch/i386/kernel/cpu/intel.c
  7. 32 14
      arch/i386/kernel/cpu/intel_cacheinfo.c
  8. 8 0
      arch/i386/kernel/cpu/mtrr/main.c
  9. 3 4
      arch/i386/kernel/cpu/proc.c
  10. 53 20
      arch/i386/kernel/smpboot.c
  11. 2 2
      arch/i386/kernel/srat.c
  12. 4 0
      arch/ia64/Kconfig
  13. 41 16
      arch/x86_64/Kconfig
  14. 0 9
      arch/x86_64/Kconfig.debug
  15. 83 15
      arch/x86_64/defconfig
  16. 0 3
      arch/x86_64/ia32/ia32_aout.c
  17. 3 1
      arch/x86_64/ia32/ia32_binfmt.c
  18. 1 0
      arch/x86_64/kernel/Makefile
  19. 1 1
      arch/x86_64/kernel/aperture.c
  20. 10 0
      arch/x86_64/kernel/apic.c
  21. 1 2
      arch/x86_64/kernel/e820.c
  22. 3 0
      arch/x86_64/kernel/entry.S
  23. 23 14
      arch/x86_64/kernel/head.S
  24. 10 4
      arch/x86_64/kernel/head64.c
  25. 2 0
      arch/x86_64/kernel/i8259.c
  26. 73 7
      arch/x86_64/kernel/io_apic.c
  27. 12 5
      arch/x86_64/kernel/mce.c
  28. 538 0
      arch/x86_64/kernel/mce_amd.c
  29. 15 8
      arch/x86_64/kernel/mpparse.c
  30. 7 1
      arch/x86_64/kernel/pci-gart.c
  31. 27 20
      arch/x86_64/kernel/process.c
  32. 4 3
      arch/x86_64/kernel/reboot.c
  33. 41 48
      arch/x86_64/kernel/setup.c
  34. 1 1
      arch/x86_64/kernel/setup64.c
  35. 17 0
      arch/x86_64/kernel/signal.c
  36. 3 4
      arch/x86_64/kernel/smp.c
  37. 91 20
      arch/x86_64/kernel/smpboot.c
  38. 0 14
      arch/x86_64/kernel/sys_x86_64.c
  39. 4 40
      arch/x86_64/kernel/traps.c
  40. 1 1
      arch/x86_64/kernel/vmlinux.lds.S
  41. 3 0
      arch/x86_64/kernel/x8664_ksyms.c
  42. 0 38
      arch/x86_64/lib/clear_page.S
  43. 0 87
      arch/x86_64/lib/copy_page.S
  44. 2 91
      arch/x86_64/lib/memcpy.S
  45. 0 94
      arch/x86_64/lib/memset.S
  46. 7 12
      arch/x86_64/mm/fault.c
  47. 79 50
      arch/x86_64/mm/init.c
  48. 1 0
      arch/x86_64/mm/k8topology.c
  49. 73 49
      arch/x86_64/mm/numa.c
  50. 1 5
      arch/x86_64/mm/srat.c
  51. 14 3
      drivers/char/agp/amd64-agp.c
  52. 1 0
      include/asm-generic/sections.h
  53. 1 1
      include/asm-i386/mach-default/mach_reboot.h
  54. 3 1
      include/asm-i386/processor.h
  55. 2 0
      include/asm-x86_64/apic.h
  56. 1 1
      include/asm-x86_64/cache.h
  57. 3 0
      include/asm-x86_64/desc.h
  58. 9 2
      include/asm-x86_64/dma.h
  59. 21 14
      include/asm-x86_64/hpet.h
  60. 1 1
      include/asm-x86_64/hw_irq.h
  61. 5 0
      include/asm-x86_64/ia32.h
  62. 10 0
      include/asm-x86_64/mce.h
  63. 3 6
      include/asm-x86_64/mmzone.h
  64. 4 3
      include/asm-x86_64/mpspec.h
  65. 1 1
      include/asm-x86_64/msr.h
  66. 2 0
      include/asm-x86_64/numa.h
  67. 1 1
      include/asm-x86_64/page.h
  68. 1 0
      include/asm-x86_64/pda.h
  69. 3 2
      include/asm-x86_64/pgtable.h
  70. 3 1
      include/asm-x86_64/processor.h
  71. 4 0
      include/asm-x86_64/proto.h
  72. 0 283
      include/asm-x86_64/rwsem.h
  73. 2 1
      include/asm-x86_64/smp.h
  74. 6 6
      include/asm-x86_64/spinlock.h
  75. 2 0
      include/asm-x86_64/topology.h
  76. 2 1
      include/asm-x86_64/unistd.h
  77. 10 0
      include/linux/bitops.h
  78. 11 0
      include/linux/gfp.h
  79. 2 8
      include/linux/mm.h
  80. 12 8
      include/linux/mmzone.h
  81. 1 1
      mm/filemap.c
  82. 14 6
      mm/page_alloc.c

+ 10 - 2
Documentation/x86_64/boot-options.txt

@@ -7,10 +7,12 @@ Machine check
 
 
    mce=off disable machine check
    mce=off disable machine check
    mce=bootlog Enable logging of machine checks left over from booting.
    mce=bootlog Enable logging of machine checks left over from booting.
-               Disabled by default because some BIOS leave bogus ones.
+               Disabled by default on AMD because some BIOS leave bogus ones.
                If your BIOS doesn't do that it's a good idea to enable though
                If your BIOS doesn't do that it's a good idea to enable though
                to make sure you log even machine check events that result
                to make sure you log even machine check events that result
-               in a reboot.
+               in a reboot. On Intel systems it is enabled by default.
+   mce=nobootlog
+		Disable boot machine check logging.
    mce=tolerancelevel (number)
    mce=tolerancelevel (number)
 		0: always panic, 1: panic if deadlock possible,
 		0: always panic, 1: panic if deadlock possible,
 		2: try to avoid panic, 3: never panic or exit (for testing)
 		2: try to avoid panic, 3: never panic or exit (for testing)
@@ -122,6 +124,9 @@ SMP
 
 
   cpumask=MASK   only use cpus with bits set in mask
   cpumask=MASK   only use cpus with bits set in mask
 
 
+  additional_cpus=NUM Allow NUM more CPUs for hotplug
+		 (defaults are specified by the BIOS or half the available CPUs)
+
 NUMA
 NUMA
 
 
   numa=off	Only set up a single NUMA node spanning all memory.
   numa=off	Only set up a single NUMA node spanning all memory.
@@ -188,6 +193,9 @@ Debugging
 
 
   kstack=N   Print that many words from the kernel stack in oops dumps.
   kstack=N   Print that many words from the kernel stack in oops dumps.
 
 
+  pagefaulttrace Dump all page faults. Only useful for extreme debugging
+		and will create a lot of output.
+
 Misc
 Misc
 
 
   noreplacement  Don't replace instructions with more appropiate ones
   noreplacement  Don't replace instructions with more appropiate ones

+ 5 - 1
Documentation/x86_64/mm.txt

@@ -6,7 +6,7 @@ Virtual memory map with 4 level page tables:
 0000000000000000 - 00007fffffffffff (=47bits) user space, different per mm
 0000000000000000 - 00007fffffffffff (=47bits) user space, different per mm
 hole caused by [48:63] sign extension
 hole caused by [48:63] sign extension
 ffff800000000000 - ffff80ffffffffff (=40bits) guard hole
 ffff800000000000 - ffff80ffffffffff (=40bits) guard hole
-ffff810000000000 - ffffc0ffffffffff (=46bits) direct mapping of phys. memory
+ffff810000000000 - ffffc0ffffffffff (=46bits) direct mapping of all phys. memory
 ffffc10000000000 - ffffc1ffffffffff (=40bits) hole
 ffffc10000000000 - ffffc1ffffffffff (=40bits) hole
 ffffc20000000000 - ffffe1ffffffffff (=45bits) vmalloc/ioremap space
 ffffc20000000000 - ffffe1ffffffffff (=45bits) vmalloc/ioremap space
 ... unused hole ...
 ... unused hole ...
@@ -14,6 +14,10 @@ ffffffff80000000 - ffffffff82800000 (=40MB)   kernel text mapping, from phys 0
 ... unused hole ...
 ... unused hole ...
 ffffffff88000000 - fffffffffff00000 (=1919MB) module mapping space
 ffffffff88000000 - fffffffffff00000 (=1919MB) module mapping space
 
 
+The direct mapping covers all memory in the system upto the highest
+memory address (this means in some cases it can also include PCI memory
+holes)
+
 vmalloc space is lazily synchronized into the different PML4 pages of
 vmalloc space is lazily synchronized into the different PML4 pages of
 the processes using the page fault handler, with init_level4_pgt as
 the processes using the page fault handler, with init_level4_pgt as
 reference.
 reference.

+ 8 - 9
arch/i386/kernel/acpi/boot.c

@@ -39,17 +39,14 @@
 
 
 #ifdef	CONFIG_X86_64
 #ifdef	CONFIG_X86_64
 
 
-static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-}
 extern void __init clustered_apic_check(void);
 extern void __init clustered_apic_check(void);
-static inline int ioapic_setup_disabled(void)
-{
-	return 0;
-}
 
 
+extern int gsi_irq_sharing(int gsi);
 #include <asm/proto.h>
 #include <asm/proto.h>
 
 
+static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
+
+
 #else				/* X86 */
 #else				/* X86 */
 
 
 #ifdef	CONFIG_X86_LOCAL_APIC
 #ifdef	CONFIG_X86_LOCAL_APIC
@@ -57,6 +54,8 @@ static inline int ioapic_setup_disabled(void)
 #include <mach_mpparse.h>
 #include <mach_mpparse.h>
 #endif				/* CONFIG_X86_LOCAL_APIC */
 #endif				/* CONFIG_X86_LOCAL_APIC */
 
 
+static inline int gsi_irq_sharing(int gsi) { return gsi; }
+
 #endif				/* X86 */
 #endif				/* X86 */
 
 
 #define BAD_MADT_ENTRY(entry, end) (					    \
 #define BAD_MADT_ENTRY(entry, end) (					    \
@@ -459,7 +458,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
 		*irq = IO_APIC_VECTOR(gsi);
 		*irq = IO_APIC_VECTOR(gsi);
 	else
 	else
 #endif
 #endif
-		*irq = gsi;
+		*irq = gsi_irq_sharing(gsi);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -543,7 +542,7 @@ acpi_scan_rsdp(unsigned long start, unsigned long length)
 	 * RSDP signature.
 	 * RSDP signature.
 	 */
 	 */
 	for (offset = 0; offset < length; offset += 16) {
 	for (offset = 0; offset < length; offset += 16) {
-		if (strncmp((char *)(start + offset), "RSD PTR ", sig_len))
+		if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len))
 			continue;
 			continue;
 		return (start + offset);
 		return (start + offset);
 	}
 	}

+ 6 - 6
arch/i386/kernel/cpu/amd.c

@@ -206,9 +206,9 @@ static void __init init_amd(struct cpuinfo_x86 *c)
 	display_cacheinfo(c);
 	display_cacheinfo(c);
 
 
 	if (cpuid_eax(0x80000000) >= 0x80000008) {
 	if (cpuid_eax(0x80000000) >= 0x80000008) {
-		c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-		if (c->x86_num_cores & (c->x86_num_cores - 1))
-			c->x86_num_cores = 1;
+		c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+		if (c->x86_max_cores & (c->x86_max_cores - 1))
+			c->x86_max_cores = 1;
 	}
 	}
 
 
 #ifdef CONFIG_X86_HT
 #ifdef CONFIG_X86_HT
@@ -217,15 +217,15 @@ static void __init init_amd(struct cpuinfo_x86 *c)
 	 * distingush the cores.  Assumes number of cores is a power
 	 * distingush the cores.  Assumes number of cores is a power
 	 * of two.
 	 * of two.
 	 */
 	 */
-	if (c->x86_num_cores > 1) {
+	if (c->x86_max_cores > 1) {
 		int cpu = smp_processor_id();
 		int cpu = smp_processor_id();
 		unsigned bits = 0;
 		unsigned bits = 0;
-		while ((1 << bits) < c->x86_num_cores)
+		while ((1 << bits) < c->x86_max_cores)
 			bits++;
 			bits++;
 		cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
 		cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
 		phys_proc_id[cpu] >>= bits;
 		phys_proc_id[cpu] >>= bits;
 		printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
 		printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
-		       cpu, c->x86_num_cores, cpu_core_id[cpu]);
+		       cpu, c->x86_max_cores, cpu_core_id[cpu]);
 	}
 	}
 #endif
 #endif
 }
 }

+ 16 - 24
arch/i386/kernel/cpu/common.c

@@ -231,10 +231,10 @@ static void __init early_cpu_detect(void)
 		cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
 		cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
 		c->x86 = (tfms >> 8) & 15;
 		c->x86 = (tfms >> 8) & 15;
 		c->x86_model = (tfms >> 4) & 15;
 		c->x86_model = (tfms >> 4) & 15;
-		if (c->x86 == 0xf) {
+		if (c->x86 == 0xf)
 			c->x86 += (tfms >> 20) & 0xff;
 			c->x86 += (tfms >> 20) & 0xff;
+		if (c->x86 >= 0x6)
 			c->x86_model += ((tfms >> 16) & 0xF) << 4;
 			c->x86_model += ((tfms >> 16) & 0xF) << 4;
-		}
 		c->x86_mask = tfms & 15;
 		c->x86_mask = tfms & 15;
 		if (cap0 & (1<<19))
 		if (cap0 & (1<<19))
 			c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
 			c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
@@ -333,7 +333,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
 	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
 	c->x86_vendor_id[0] = '\0'; /* Unset */
 	c->x86_vendor_id[0] = '\0'; /* Unset */
 	c->x86_model_id[0] = '\0';  /* Unset */
 	c->x86_model_id[0] = '\0';  /* Unset */
-	c->x86_num_cores = 1;
+	c->x86_max_cores = 1;
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
 
 	if (!have_cpuid_p()) {
 	if (!have_cpuid_p()) {
@@ -443,52 +443,44 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 void __devinit detect_ht(struct cpuinfo_x86 *c)
 void __devinit detect_ht(struct cpuinfo_x86 *c)
 {
 {
 	u32 	eax, ebx, ecx, edx;
 	u32 	eax, ebx, ecx, edx;
-	int 	index_msb, tmp;
+	int 	index_msb, core_bits;
 	int 	cpu = smp_processor_id();
 	int 	cpu = smp_processor_id();
 
 
+	cpuid(1, &eax, &ebx, &ecx, &edx);
+
+	c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+
 	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
 	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
 		return;
 		return;
 
 
-	cpuid(1, &eax, &ebx, &ecx, &edx);
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 
 
 	if (smp_num_siblings == 1) {
 	if (smp_num_siblings == 1) {
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
 	} else if (smp_num_siblings > 1 ) {
 	} else if (smp_num_siblings > 1 ) {
-		index_msb = 31;
 
 
 		if (smp_num_siblings > NR_CPUS) {
 		if (smp_num_siblings > NR_CPUS) {
 			printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
 			printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
 			smp_num_siblings = 1;
 			smp_num_siblings = 1;
 			return;
 			return;
 		}
 		}
-		tmp = smp_num_siblings;
-		while ((tmp & 0x80000000 ) == 0) {
-			tmp <<=1 ;
-			index_msb--;
-		}
-		if (smp_num_siblings & (smp_num_siblings - 1))
-			index_msb++;
+
+		index_msb = get_count_order(smp_num_siblings);
 		phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
 		phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
 
 
 		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
 		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
 		       phys_proc_id[cpu]);
 		       phys_proc_id[cpu]);
 
 
-		smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
 
 
-		tmp = smp_num_siblings;
-		index_msb = 31;
-		while ((tmp & 0x80000000) == 0) {
-			tmp <<=1 ;
-			index_msb--;
-		}
+		index_msb = get_count_order(smp_num_siblings) ;
 
 
-		if (smp_num_siblings & (smp_num_siblings - 1))
-			index_msb++;
+		core_bits = get_count_order(c->x86_max_cores);
 
 
-		cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+		cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+					       ((1 << core_bits) - 1);
 
 
-		if (c->x86_num_cores > 1)
+		if (c->x86_max_cores > 1)
 			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
 			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
 			       cpu_core_id[cpu]);
 			       cpu_core_id[cpu]);
 	}
 	}

+ 1 - 1
arch/i386/kernel/cpu/intel.c

@@ -158,7 +158,7 @@ static void __devinit init_intel(struct cpuinfo_x86 *c)
 	if ( p )
 	if ( p )
 		strcpy(c->x86_model_id, p);
 		strcpy(c->x86_model_id, p);
 	
 	
-	c->x86_num_cores = num_cpu_cores(c);
+	c->x86_max_cores = num_cpu_cores(c);
 
 
 	detect_ht(c);
 	detect_ht(c);
 
 

+ 32 - 14
arch/i386/kernel/cpu/intel_cacheinfo.c

@@ -293,29 +293,45 @@ static struct _cpuid4_info *cpuid4_info[NR_CPUS];
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 {
 {
-	struct _cpuid4_info	*this_leaf;
+	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 	unsigned long num_threads_sharing;
 	unsigned long num_threads_sharing;
-#ifdef CONFIG_X86_HT
-	struct cpuinfo_x86 *c = cpu_data + cpu;
-#endif
+	int index_msb, i;
+	struct cpuinfo_x86 *c = cpu_data;
 
 
 	this_leaf = CPUID4_INFO_IDX(cpu, index);
 	this_leaf = CPUID4_INFO_IDX(cpu, index);
 	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
 	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
 
 
 	if (num_threads_sharing == 1)
 	if (num_threads_sharing == 1)
 		cpu_set(cpu, this_leaf->shared_cpu_map);
 		cpu_set(cpu, this_leaf->shared_cpu_map);
-#ifdef CONFIG_X86_HT
-	else if (num_threads_sharing == smp_num_siblings)
-		this_leaf->shared_cpu_map = cpu_sibling_map[cpu];
-	else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings))
-		this_leaf->shared_cpu_map = cpu_core_map[cpu];
-	else
-		printk(KERN_DEBUG "Number of CPUs sharing cache didn't match "
-				"any known set of CPUs\n");
-#endif
+	else {
+		index_msb = get_count_order(num_threads_sharing);
+
+		for_each_online_cpu(i) {
+			if (c[i].apicid >> index_msb ==
+			    c[cpu].apicid >> index_msb) {
+				cpu_set(i, this_leaf->shared_cpu_map);
+				if (i != cpu && cpuid4_info[i])  {
+					sibling_leaf = CPUID4_INFO_IDX(i, index);
+					cpu_set(cpu, sibling_leaf->shared_cpu_map);
+				}
+			}
+		}
+	}
+}
+static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
+{
+	struct _cpuid4_info	*this_leaf, *sibling_leaf;
+	int sibling;
+
+	this_leaf = CPUID4_INFO_IDX(cpu, index);
+	for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
+		sibling_leaf = CPUID4_INFO_IDX(sibling, index);	
+		cpu_clear(cpu, sibling_leaf->shared_cpu_map);
+	}
 }
 }
 #else
 #else
 static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
 static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
+static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
 #endif
 #endif
 
 
 static void free_cache_attributes(unsigned int cpu)
 static void free_cache_attributes(unsigned int cpu)
@@ -574,8 +590,10 @@ static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
 	unsigned int cpu = sys_dev->id;
 	unsigned int cpu = sys_dev->id;
 	unsigned long i;
 	unsigned long i;
 
 
-	for (i = 0; i < num_cache_leaves; i++)
+	for (i = 0; i < num_cache_leaves; i++) {
+		cache_remove_shared_cpu_map(cpu, i);
 		kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
 		kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
+	}
 	kobject_unregister(cache_kobject[cpu]);
 	kobject_unregister(cache_kobject[cpu]);
 	cpuid4_cache_sysfs_exit(cpu);
 	cpuid4_cache_sysfs_exit(cpu);
 	return;
 	return;

+ 8 - 0
arch/i386/kernel/cpu/mtrr/main.c

@@ -626,6 +626,14 @@ void __init mtrr_bp_init(void)
 		if (cpuid_eax(0x80000000) >= 0x80000008) {
 		if (cpuid_eax(0x80000000) >= 0x80000008) {
 			u32 phys_addr;
 			u32 phys_addr;
 			phys_addr = cpuid_eax(0x80000008) & 0xff;
 			phys_addr = cpuid_eax(0x80000008) & 0xff;
+			/* CPUID workaround for Intel 0F33/0F34 CPU */
+			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+			    boot_cpu_data.x86 == 0xF &&
+			    boot_cpu_data.x86_model == 0x3 &&
+			    (boot_cpu_data.x86_mask == 0x3 ||
+			     boot_cpu_data.x86_mask == 0x4))
+				phys_addr = 36;
+
 			size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
 			size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
 			size_and_mask = ~size_or_mask & 0xfff00000;
 			size_and_mask = ~size_or_mask & 0xfff00000;
 		} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
 		} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&

+ 3 - 4
arch/i386/kernel/cpu/proc.c

@@ -94,12 +94,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	if (c->x86_cache_size >= 0)
 	if (c->x86_cache_size >= 0)
 		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
 		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
 #ifdef CONFIG_X86_HT
 #ifdef CONFIG_X86_HT
-	if (c->x86_num_cores * smp_num_siblings > 1) {
+	if (c->x86_max_cores * smp_num_siblings > 1) {
 		seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]);
 		seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]);
-		seq_printf(m, "siblings\t: %d\n",
-				c->x86_num_cores * smp_num_siblings);
+		seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n]));
 		seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]);
 		seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]);
-		seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
+		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
 	}
 	}
 #endif
 #endif
 	
 	

+ 53 - 20
arch/i386/kernel/smpboot.c

@@ -72,9 +72,11 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 /* Core ID of each logical CPU */
 /* Core ID of each logical CPU */
 int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 
 
+/* representing HT siblings of each logical CPU */
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_sibling_map);
 EXPORT_SYMBOL(cpu_sibling_map);
 
 
+/* representing HT and core siblings of each logical CPU */
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_core_map);
 EXPORT_SYMBOL(cpu_core_map);
 
 
@@ -442,35 +444,60 @@ static void __devinit smp_callin(void)
 
 
 static int cpucount;
 static int cpucount;
 
 
+/* representing cpus for which sibling maps can be computed */
+static cpumask_t cpu_sibling_setup_map;
+
 static inline void
 static inline void
 set_cpu_sibling_map(int cpu)
 set_cpu_sibling_map(int cpu)
 {
 {
 	int i;
 	int i;
+	struct cpuinfo_x86 *c = cpu_data;
+
+	cpu_set(cpu, cpu_sibling_setup_map);
 
 
 	if (smp_num_siblings > 1) {
 	if (smp_num_siblings > 1) {
-		for (i = 0; i < NR_CPUS; i++) {
-			if (!cpu_isset(i, cpu_callout_map))
-				continue;
-			if (cpu_core_id[cpu] == cpu_core_id[i]) {
+		for_each_cpu_mask(i, cpu_sibling_setup_map) {
+			if (phys_proc_id[cpu] == phys_proc_id[i] &&
+			    cpu_core_id[cpu] == cpu_core_id[i]) {
 				cpu_set(i, cpu_sibling_map[cpu]);
 				cpu_set(i, cpu_sibling_map[cpu]);
 				cpu_set(cpu, cpu_sibling_map[i]);
 				cpu_set(cpu, cpu_sibling_map[i]);
+				cpu_set(i, cpu_core_map[cpu]);
+				cpu_set(cpu, cpu_core_map[i]);
 			}
 			}
 		}
 		}
 	} else {
 	} else {
 		cpu_set(cpu, cpu_sibling_map[cpu]);
 		cpu_set(cpu, cpu_sibling_map[cpu]);
 	}
 	}
 
 
-	if (current_cpu_data.x86_num_cores > 1) {
-		for (i = 0; i < NR_CPUS; i++) {
-			if (!cpu_isset(i, cpu_callout_map))
-				continue;
-			if (phys_proc_id[cpu] == phys_proc_id[i]) {
-				cpu_set(i, cpu_core_map[cpu]);
-				cpu_set(cpu, cpu_core_map[i]);
-			}
-		}
-	} else {
+	if (current_cpu_data.x86_max_cores == 1) {
 		cpu_core_map[cpu] = cpu_sibling_map[cpu];
 		cpu_core_map[cpu] = cpu_sibling_map[cpu];
+		c[cpu].booted_cores = 1;
+		return;
+	}
+
+	for_each_cpu_mask(i, cpu_sibling_setup_map) {
+		if (phys_proc_id[cpu] == phys_proc_id[i]) {
+			cpu_set(i, cpu_core_map[cpu]);
+			cpu_set(cpu, cpu_core_map[i]);
+			/*
+			 *  Does this new cpu bringup a new core?
+			 */
+			if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+				/*
+				 * for each core in package, increment
+				 * the booted_cores for this new cpu
+				 */
+				if (first_cpu(cpu_sibling_map[i]) == i)
+					c[cpu].booted_cores++;
+				/*
+				 * increment the core count for all
+				 * the other cpus in this package
+				 */
+				if (i != cpu)
+					c[i].booted_cores++;
+			} else if (i != cpu && !c[cpu].booted_cores)
+				c[cpu].booted_cores = c[i].booted_cores;
+		}
 	}
 	}
 }
 }
 
 
@@ -1095,11 +1122,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 
 
 	current_thread_info()->cpu = 0;
 	current_thread_info()->cpu = 0;
 	smp_tune_scheduling();
 	smp_tune_scheduling();
-	cpus_clear(cpu_sibling_map[0]);
-	cpu_set(0, cpu_sibling_map[0]);
 
 
-	cpus_clear(cpu_core_map[0]);
-	cpu_set(0, cpu_core_map[0]);
+	set_cpu_sibling_map(0);
 
 
 	/*
 	/*
 	 * If we couldn't find an SMP configuration at boot time,
 	 * If we couldn't find an SMP configuration at boot time,
@@ -1278,15 +1302,24 @@ static void
 remove_siblinginfo(int cpu)
 remove_siblinginfo(int cpu)
 {
 {
 	int sibling;
 	int sibling;
+	struct cpuinfo_x86 *c = cpu_data;
 
 
+	for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
+		cpu_clear(cpu, cpu_core_map[sibling]);
+		/*
+		 * last thread sibling in this cpu core going down
+		 */
+		if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+			c[sibling].booted_cores--;
+	}
+			
 	for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
 	for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
 		cpu_clear(cpu, cpu_sibling_map[sibling]);
 		cpu_clear(cpu, cpu_sibling_map[sibling]);
-	for_each_cpu_mask(sibling, cpu_core_map[cpu])
-		cpu_clear(cpu, cpu_core_map[sibling]);
 	cpus_clear(cpu_sibling_map[cpu]);
 	cpus_clear(cpu_sibling_map[cpu]);
 	cpus_clear(cpu_core_map[cpu]);
 	cpus_clear(cpu_core_map[cpu]);
 	phys_proc_id[cpu] = BAD_APICID;
 	phys_proc_id[cpu] = BAD_APICID;
 	cpu_core_id[cpu] = BAD_APICID;
 	cpu_core_id[cpu] = BAD_APICID;
+	cpu_clear(cpu, cpu_sibling_setup_map);
 }
 }
 
 
 int __cpu_disable(void)
 int __cpu_disable(void)

+ 2 - 2
arch/i386/kernel/srat.c

@@ -137,8 +137,8 @@ static void __init parse_memory_affinity_structure (char *sratp)
 		 "enabled and removable" : "enabled" ) );
 		 "enabled and removable" : "enabled" ) );
 }
 }
 
 
-#if MAX_NR_ZONES != 3
-#error "MAX_NR_ZONES != 3, chunk_to_zone requires review"
+#if MAX_NR_ZONES != 4
+#error "MAX_NR_ZONES != 4, chunk_to_zone requires review"
 #endif
 #endif
 /* Take a chunk of pages from page frame cstart to cend and count the number
 /* Take a chunk of pages from page frame cstart to cend and count the number
  * of pages in each zone, returned via zones[].
  * of pages in each zone, returned via zones[].

+ 4 - 0
arch/ia64/Kconfig

@@ -58,6 +58,10 @@ config IA64_UNCACHED_ALLOCATOR
 	bool
 	bool
 	select GENERIC_ALLOCATOR
 	select GENERIC_ALLOCATOR
 
 
+config ZONE_DMA_IS_DMA32
+	bool
+	default y
+
 choice
 choice
 	prompt "System type"
 	prompt "System type"
 	default IA64_GENERIC
 	default IA64_GENERIC

+ 41 - 16
arch/x86_64/Kconfig

@@ -226,22 +226,42 @@ config SCHED_SMT
 
 
 source "kernel/Kconfig.preempt"
 source "kernel/Kconfig.preempt"
 
 
-config K8_NUMA
-       bool "K8 NUMA support"
-       select NUMA
+config NUMA
+       bool "Non Uniform Memory Access (NUMA) Support"
        depends on SMP
        depends on SMP
        help
        help
-	  Enable NUMA (Non Unified Memory Architecture) support for
-	  AMD Opteron Multiprocessor systems. The kernel will try to allocate
-	  memory used by a CPU on the local memory controller of the CPU
-	  and add some more NUMA awareness to the kernel.
-	  This code is recommended on all multiprocessor Opteron systems
-	  and normally doesn't hurt on others.
+	 Enable NUMA (Non Uniform Memory Access) support. The kernel 
+	 will try to allocate memory used by a CPU on the local memory 
+	 controller of the CPU and add some more NUMA awareness to the kernel.
+	 This code is recommended on all multiprocessor Opteron systems.
+	 If the system is EM64T, you should say N unless your system is EM64T 
+	 NUMA. 
+
+config K8_NUMA
+       bool "Old style AMD Opteron NUMA detection"
+       depends on NUMA
+       default y
+       help
+	 Enable K8 NUMA node topology detection.  You should say Y here if
+	 you have a multi processor AMD K8 system. This uses an old
+	 method to read the NUMA configurtion directly from the builtin
+	 Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
+	 instead, which also takes priority if both are compiled in.   
+
+# Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig.
+
+config X86_64_ACPI_NUMA
+       bool "ACPI NUMA detection"
+       depends on NUMA
+       select ACPI 
+       select ACPI_NUMA
+       default y
+       help
+	 Enable ACPI SRAT based node topology detection.
 
 
 config NUMA_EMU
 config NUMA_EMU
-	bool "NUMA emulation support"
-	select NUMA
-	depends on SMP
+	bool "NUMA emulation"
+	depends on NUMA
 	help
 	help
 	  Enable NUMA emulation. A flat machine will be split
 	  Enable NUMA emulation. A flat machine will be split
 	  into virtual nodes when booted with "numa=fake=N", where N is the
 	  into virtual nodes when booted with "numa=fake=N", where N is the
@@ -252,9 +272,6 @@ config ARCH_DISCONTIGMEM_ENABLE
        depends on NUMA
        depends on NUMA
        default y
        default y
 
 
-config NUMA
-       bool
-       default n
 
 
 config ARCH_DISCONTIGMEM_ENABLE
 config ARCH_DISCONTIGMEM_ENABLE
 	def_bool y
 	def_bool y
@@ -374,6 +391,14 @@ config X86_MCE_INTEL
 	   Additional support for intel specific MCE features such as
 	   Additional support for intel specific MCE features such as
 	   the thermal monitor.
 	   the thermal monitor.
 
 
+config X86_MCE_AMD
+	bool "AMD MCE features"
+	depends on X86_MCE && X86_LOCAL_APIC
+	default y
+	help
+	   Additional support for AMD specific MCE features such as
+	   the DRAM Error Threshold.
+
 config PHYSICAL_START
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded" if EMBEDDED
 	hex "Physical address where the kernel is loaded" if EMBEDDED
 	default "0x100000"
 	default "0x100000"
@@ -502,7 +527,7 @@ config IA32_EMULATION
 	  left.
 	  left.
 
 
 config IA32_AOUT
 config IA32_AOUT
-       bool "IA32 a.out support"
+       tristate "IA32 a.out support"
        depends on IA32_EMULATION
        depends on IA32_EMULATION
        help
        help
          Support old a.out binaries in the 32bit emulation.
          Support old a.out binaries in the 32bit emulation.

+ 0 - 9
arch/x86_64/Kconfig.debug

@@ -2,15 +2,6 @@ menu "Kernel hacking"
 
 
 source "lib/Kconfig.debug"
 source "lib/Kconfig.debug"
 
 
-# !SMP for now because the context switch early causes GPF in segment reloading
-# and the GS base checking does the wrong thing then, causing a hang.
-config CHECKING
-	bool "Additional run-time checks"
-	depends on DEBUG_KERNEL && !SMP
-	help
-	  Enables some internal consistency checks for kernel debugging.
-	  You should normally say N.
-
 config INIT_DEBUG
 config INIT_DEBUG
 	bool "Debug __init statements"
 	bool "Debug __init statements"
 	depends on DEBUG_KERNEL
 	depends on DEBUG_KERNEL

+ 83 - 15
arch/x86_64/defconfig

@@ -1,7 +1,7 @@
 #
 #
 # Automatically generated make config: don't edit
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-git11
-# Mon Sep 12 16:16:16 2005
+# Linux kernel version: 2.6.14-git7
+# Sat Nov  5 15:55:50 2005
 #
 #
 CONFIG_X86_64=y
 CONFIG_X86_64=y
 CONFIG_64BIT=y
 CONFIG_64BIT=y
@@ -35,7 +35,7 @@ CONFIG_POSIX_MQUEUE=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_BSD_PROCESS_ACCT is not set
 CONFIG_SYSCTL=y
 CONFIG_SYSCTL=y
 # CONFIG_AUDIT is not set
 # CONFIG_AUDIT is not set
-# CONFIG_HOTPLUG is not set
+CONFIG_HOTPLUG=y
 CONFIG_KOBJECT_UEVENT=y
 CONFIG_KOBJECT_UEVENT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_IKCONFIG_PROC=y
@@ -93,10 +93,11 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT is not set
 CONFIG_PREEMPT_BKL=y
 CONFIG_PREEMPT_BKL=y
+CONFIG_NUMA=y
 CONFIG_K8_NUMA=y
 CONFIG_K8_NUMA=y
+CONFIG_X86_64_ACPI_NUMA=y
 # CONFIG_NUMA_EMU is not set
 # CONFIG_NUMA_EMU is not set
 CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
 CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_NUMA=y
 CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
 CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
 CONFIG_ARCH_SPARSEMEM_ENABLE=y
 CONFIG_ARCH_SPARSEMEM_ENABLE=y
 CONFIG_SELECT_MEMORY_MODEL=y
 CONFIG_SELECT_MEMORY_MODEL=y
@@ -107,9 +108,10 @@ CONFIG_DISCONTIGMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 CONFIG_NEED_MULTIPLE_NODES=y
 CONFIG_NEED_MULTIPLE_NODES=y
 # CONFIG_SPARSEMEM_STATIC is not set
 # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
 CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
 CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
-CONFIG_HAVE_DEC_LOCK=y
 CONFIG_NR_CPUS=32
 CONFIG_NR_CPUS=32
+CONFIG_HOTPLUG_CPU=y
 CONFIG_HPET_TIMER=y
 CONFIG_HPET_TIMER=y
 CONFIG_X86_PM_TIMER=y
 CONFIG_X86_PM_TIMER=y
 CONFIG_HPET_EMULATE_RTC=y
 CONFIG_HPET_EMULATE_RTC=y
@@ -117,6 +119,7 @@ CONFIG_GART_IOMMU=y
 CONFIG_SWIOTLB=y
 CONFIG_SWIOTLB=y
 CONFIG_X86_MCE=y
 CONFIG_X86_MCE=y
 CONFIG_X86_MCE_INTEL=y
 CONFIG_X86_MCE_INTEL=y
+CONFIG_X86_MCE_AMD=y
 CONFIG_PHYSICAL_START=0x100000
 CONFIG_PHYSICAL_START=0x100000
 # CONFIG_KEXEC is not set
 # CONFIG_KEXEC is not set
 CONFIG_SECCOMP=y
 CONFIG_SECCOMP=y
@@ -136,11 +139,15 @@ CONFIG_PM=y
 # CONFIG_PM_DEBUG is not set
 # CONFIG_PM_DEBUG is not set
 CONFIG_SOFTWARE_SUSPEND=y
 CONFIG_SOFTWARE_SUSPEND=y
 CONFIG_PM_STD_PARTITION=""
 CONFIG_PM_STD_PARTITION=""
+CONFIG_SUSPEND_SMP=y
 
 
 #
 #
 # ACPI (Advanced Configuration and Power Interface) Support
 # ACPI (Advanced Configuration and Power Interface) Support
 #
 #
 CONFIG_ACPI=y
 CONFIG_ACPI=y
+CONFIG_ACPI_SLEEP=y
+CONFIG_ACPI_SLEEP_PROC_FS=y
+CONFIG_ACPI_SLEEP_PROC_SLEEP=y
 CONFIG_ACPI_AC=y
 CONFIG_ACPI_AC=y
 CONFIG_ACPI_BATTERY=y
 CONFIG_ACPI_BATTERY=y
 CONFIG_ACPI_BUTTON=y
 CONFIG_ACPI_BUTTON=y
@@ -148,6 +155,7 @@ CONFIG_ACPI_BUTTON=y
 CONFIG_ACPI_HOTKEY=m
 CONFIG_ACPI_HOTKEY=m
 CONFIG_ACPI_FAN=y
 CONFIG_ACPI_FAN=y
 CONFIG_ACPI_PROCESSOR=y
 CONFIG_ACPI_PROCESSOR=y
+CONFIG_ACPI_HOTPLUG_CPU=y
 CONFIG_ACPI_THERMAL=y
 CONFIG_ACPI_THERMAL=y
 CONFIG_ACPI_NUMA=y
 CONFIG_ACPI_NUMA=y
 # CONFIG_ACPI_ASUS is not set
 # CONFIG_ACPI_ASUS is not set
@@ -158,7 +166,7 @@ CONFIG_ACPI_BLACKLIST_YEAR=2001
 CONFIG_ACPI_EC=y
 CONFIG_ACPI_EC=y
 CONFIG_ACPI_POWER=y
 CONFIG_ACPI_POWER=y
 CONFIG_ACPI_SYSTEM=y
 CONFIG_ACPI_SYSTEM=y
-# CONFIG_ACPI_CONTAINER is not set
+CONFIG_ACPI_CONTAINER=y
 
 
 #
 #
 # CPU Frequency scaling
 # CPU Frequency scaling
@@ -293,7 +301,6 @@ CONFIG_IPV6=y
 # Network testing
 # Network testing
 #
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_NET_PKTGEN is not set
-# CONFIG_NETFILTER_NETLINK is not set
 # CONFIG_HAMRADIO is not set
 # CONFIG_HAMRADIO is not set
 # CONFIG_IRDA is not set
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_BT is not set
@@ -311,6 +318,11 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y
 # CONFIG_FW_LOADER is not set
 # CONFIG_FW_LOADER is not set
 # CONFIG_DEBUG_DRIVER is not set
 # CONFIG_DEBUG_DRIVER is not set
 
 
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
+
 #
 #
 # Memory Technology Devices (MTD)
 # Memory Technology Devices (MTD)
 #
 #
@@ -354,6 +366,11 @@ CONFIG_IOSCHED_NOOP=y
 # CONFIG_IOSCHED_AS is not set
 # CONFIG_IOSCHED_AS is not set
 CONFIG_IOSCHED_DEADLINE=y
 CONFIG_IOSCHED_DEADLINE=y
 CONFIG_IOSCHED_CFQ=y
 CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+CONFIG_DEFAULT_DEADLINE=y
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
 # CONFIG_ATA_OVER_ETH is not set
 # CONFIG_ATA_OVER_ETH is not set
 
 
 #
 #
@@ -450,6 +467,7 @@ CONFIG_BLK_DEV_SD=y
 CONFIG_SCSI_SPI_ATTRS=y
 CONFIG_SCSI_SPI_ATTRS=y
 # CONFIG_SCSI_FC_ATTRS is not set
 # CONFIG_SCSI_FC_ATTRS is not set
 # CONFIG_SCSI_ISCSI_ATTRS is not set
 # CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_ATTRS is not set
 
 
 #
 #
 # SCSI low-level drivers
 # SCSI low-level drivers
@@ -469,20 +487,24 @@ CONFIG_AIC79XX_DEBUG_MASK=0
 # CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
 # CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
 # CONFIG_MEGARAID_NEWGEN is not set
 # CONFIG_MEGARAID_NEWGEN is not set
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_MEGARAID_LEGACY is not set
+# CONFIG_MEGARAID_SAS is not set
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA=y
 # CONFIG_SCSI_SATA_AHCI is not set
 # CONFIG_SCSI_SATA_AHCI is not set
 # CONFIG_SCSI_SATA_SVW is not set
 # CONFIG_SCSI_SATA_SVW is not set
 CONFIG_SCSI_ATA_PIIX=y
 CONFIG_SCSI_ATA_PIIX=y
 # CONFIG_SCSI_SATA_MV is not set
 # CONFIG_SCSI_SATA_MV is not set
-# CONFIG_SCSI_SATA_NV is not set
-# CONFIG_SCSI_SATA_PROMISE is not set
+CONFIG_SCSI_SATA_NV=y
+# CONFIG_SCSI_PDC_ADMA is not set
 # CONFIG_SCSI_SATA_QSTOR is not set
 # CONFIG_SCSI_SATA_QSTOR is not set
+# CONFIG_SCSI_SATA_PROMISE is not set
 # CONFIG_SCSI_SATA_SX4 is not set
 # CONFIG_SCSI_SATA_SX4 is not set
 # CONFIG_SCSI_SATA_SIL is not set
 # CONFIG_SCSI_SATA_SIL is not set
+# CONFIG_SCSI_SATA_SIL24 is not set
 # CONFIG_SCSI_SATA_SIS is not set
 # CONFIG_SCSI_SATA_SIS is not set
 # CONFIG_SCSI_SATA_ULI is not set
 # CONFIG_SCSI_SATA_ULI is not set
 CONFIG_SCSI_SATA_VIA=y
 CONFIG_SCSI_SATA_VIA=y
 # CONFIG_SCSI_SATA_VITESSE is not set
 # CONFIG_SCSI_SATA_VITESSE is not set
+CONFIG_SCSI_SATA_INTEL_COMBINED=y
 # CONFIG_SCSI_BUSLOGIC is not set
 # CONFIG_SCSI_BUSLOGIC is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_EATA is not set
 # CONFIG_SCSI_EATA is not set
@@ -525,6 +547,7 @@ CONFIG_BLK_DEV_DM=y
 CONFIG_FUSION=y
 CONFIG_FUSION=y
 CONFIG_FUSION_SPI=y
 CONFIG_FUSION_SPI=y
 # CONFIG_FUSION_FC is not set
 # CONFIG_FUSION_FC is not set
+# CONFIG_FUSION_SAS is not set
 CONFIG_FUSION_MAX_SGE=128
 CONFIG_FUSION_MAX_SGE=128
 # CONFIG_FUSION_CTL is not set
 # CONFIG_FUSION_CTL is not set
 
 
@@ -564,6 +587,7 @@ CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
 CONFIG_MII=y
 # CONFIG_HAPPYMEAL is not set
 # CONFIG_HAPPYMEAL is not set
 # CONFIG_SUNGEM is not set
 # CONFIG_SUNGEM is not set
+# CONFIG_CASSINI is not set
 CONFIG_NET_VENDOR_3COM=y
 CONFIG_NET_VENDOR_3COM=y
 CONFIG_VORTEX=y
 CONFIG_VORTEX=y
 # CONFIG_TYPHOON is not set
 # CONFIG_TYPHOON is not set
@@ -740,7 +764,43 @@ CONFIG_LEGACY_PTY_COUNT=256
 #
 #
 # Watchdog Cards
 # Watchdog Cards
 #
 #
-# CONFIG_WATCHDOG is not set
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+
+#
+# Watchdog Device Drivers
+#
+CONFIG_SOFT_WATCHDOG=y
+# CONFIG_ACQUIRE_WDT is not set
+# CONFIG_ADVANTECH_WDT is not set
+# CONFIG_ALIM1535_WDT is not set
+# CONFIG_ALIM7101_WDT is not set
+# CONFIG_SC520_WDT is not set
+# CONFIG_EUROTECH_WDT is not set
+# CONFIG_IB700_WDT is not set
+# CONFIG_IBMASR is not set
+# CONFIG_WAFER_WDT is not set
+# CONFIG_I6300ESB_WDT is not set
+# CONFIG_I8XX_TCO is not set
+# CONFIG_SC1200_WDT is not set
+# CONFIG_60XX_WDT is not set
+# CONFIG_SBC8360_WDT is not set
+# CONFIG_CPU5_WDT is not set
+# CONFIG_W83627HF_WDT is not set
+# CONFIG_W83877F_WDT is not set
+# CONFIG_W83977F_WDT is not set
+# CONFIG_MACHZ_WDT is not set
+
+#
+# PCI-based Watchdog Cards
+#
+# CONFIG_PCIPCWATCHDOG is not set
+# CONFIG_WDTPCI is not set
+
+#
+# USB-based Watchdog Cards
+#
+# CONFIG_USBPCWATCHDOG is not set
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM=y
 # CONFIG_NVRAM is not set
 # CONFIG_NVRAM is not set
 CONFIG_RTC=y
 CONFIG_RTC=y
@@ -767,6 +827,7 @@ CONFIG_MAX_RAW_DEVS=256
 # TPM devices
 # TPM devices
 #
 #
 # CONFIG_TCG_TPM is not set
 # CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
 
 
 #
 #
 # I2C support
 # I2C support
@@ -783,6 +844,7 @@ CONFIG_MAX_RAW_DEVS=256
 #
 #
 CONFIG_HWMON=y
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
 # CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_HDAPS is not set
 # CONFIG_HWMON_DEBUG_CHIP is not set
 # CONFIG_HWMON_DEBUG_CHIP is not set
 
 
 #
 #
@@ -886,12 +948,15 @@ CONFIG_USB_UHCI_HCD=y
 # USB Device Class drivers
 # USB Device Class drivers
 #
 #
 # CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
 # CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
-# CONFIG_USB_BLUETOOTH_TTY is not set
 # CONFIG_USB_ACM is not set
 # CONFIG_USB_ACM is not set
 CONFIG_USB_PRINTER=y
 CONFIG_USB_PRINTER=y
 
 
 #
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
 #
 #
 CONFIG_USB_STORAGE=y
 CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -924,6 +989,7 @@ CONFIG_USB_HIDINPUT=y
 # CONFIG_USB_XPAD is not set
 # CONFIG_USB_XPAD is not set
 # CONFIG_USB_ATI_REMOTE is not set
 # CONFIG_USB_ATI_REMOTE is not set
 # CONFIG_USB_KEYSPAN_REMOTE is not set
 # CONFIG_USB_KEYSPAN_REMOTE is not set
+# CONFIG_USB_APPLETOUCH is not set
 
 
 #
 #
 # USB Imaging devices
 # USB Imaging devices
@@ -1005,7 +1071,7 @@ CONFIG_USB_MON=y
 #
 #
 # CONFIG_EDD is not set
 # CONFIG_EDD is not set
 # CONFIG_DELL_RBU is not set
 # CONFIG_DELL_RBU is not set
-CONFIG_DCDBAS=m
+# CONFIG_DCDBAS is not set
 
 
 #
 #
 # File systems
 # File systems
@@ -1037,7 +1103,7 @@ CONFIG_INOTIFY=y
 # CONFIG_QUOTA is not set
 # CONFIG_QUOTA is not set
 CONFIG_DNOTIFY=y
 CONFIG_DNOTIFY=y
 CONFIG_AUTOFS_FS=y
 CONFIG_AUTOFS_FS=y
-# CONFIG_AUTOFS4_FS is not set
+CONFIG_AUTOFS4_FS=y
 # CONFIG_FUSE_FS is not set
 # CONFIG_FUSE_FS is not set
 
 
 #
 #
@@ -1068,7 +1134,7 @@ CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
 CONFIG_HUGETLBFS=y
 CONFIG_HUGETLB_PAGE=y
 CONFIG_HUGETLB_PAGE=y
 CONFIG_RAMFS=y
 CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
+CONFIG_RELAYFS_FS=y
 
 
 #
 #
 # Miscellaneous filesystems
 # Miscellaneous filesystems
@@ -1186,7 +1252,9 @@ CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_DEBUG_KOBJECT is not set
 # CONFIG_DEBUG_KOBJECT is not set
 # CONFIG_DEBUG_INFO is not set
 # CONFIG_DEBUG_INFO is not set
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_FS=y
+# CONFIG_DEBUG_VM is not set
 # CONFIG_FRAME_POINTER is not set
 # CONFIG_FRAME_POINTER is not set
+# CONFIG_RCU_TORTURE_TEST is not set
 CONFIG_INIT_DEBUG=y
 CONFIG_INIT_DEBUG=y
 # CONFIG_IOMMU_DEBUG is not set
 # CONFIG_IOMMU_DEBUG is not set
 CONFIG_KPROBES=y
 CONFIG_KPROBES=y

+ 0 - 3
arch/x86_64/ia32/ia32_aout.c

@@ -36,9 +36,6 @@
 #undef WARN_OLD
 #undef WARN_OLD
 #undef CORE_DUMP /* probably broken */
 #undef CORE_DUMP /* probably broken */
 
 
-extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
-				unsigned long stack_top, int exec_stack);
-
 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
 static int load_aout_library(struct file*);
 static int load_aout_library(struct file*);
 
 

+ 3 - 1
arch/x86_64/ia32/ia32_binfmt.c

@@ -335,7 +335,8 @@ static void elf32_init(struct pt_regs *regs)
 	me->thread.es = __USER_DS;
 	me->thread.es = __USER_DS;
 }
 }
 
 
-int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack)
+int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
+			 int executable_stack)
 {
 {
 	unsigned long stack_base;
 	unsigned long stack_base;
 	struct vm_area_struct *mpnt;
 	struct vm_area_struct *mpnt;
@@ -389,6 +390,7 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int exec
 	
 	
 	return 0;
 	return 0;
 }
 }
+EXPORT_SYMBOL(ia32_setup_arg_pages);
 
 
 static unsigned long
 static unsigned long
 elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
 elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)

+ 1 - 0
arch/x86_64/kernel/Makefile

@@ -11,6 +11,7 @@ obj-y	:= process.o signal.o entry.o traps.o irq.o \
 
 
 obj-$(CONFIG_X86_MCE)         += mce.o
 obj-$(CONFIG_X86_MCE)         += mce.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
+obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd.o
 obj-$(CONFIG_MTRR)		+= ../../i386/kernel/cpu/mtrr/
 obj-$(CONFIG_MTRR)		+= ../../i386/kernel/cpu/mtrr/
 obj-$(CONFIG_ACPI)		+= acpi/
 obj-$(CONFIG_ACPI)		+= acpi/
 obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_MSR)		+= msr.o

+ 1 - 1
arch/x86_64/kernel/aperture.c

@@ -196,7 +196,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
 void __init iommu_hole_init(void) 
 void __init iommu_hole_init(void) 
 { 
 { 
 	int fix, num; 
 	int fix, num; 
-	u32 aper_size, aper_alloc = 0, aper_order, last_aper_order = 0;
+	u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
 	u64 aper_base, last_aper_base = 0;
 	u64 aper_base, last_aper_base = 0;
 	int valid_agp = 0;
 	int valid_agp = 0;
 
 

+ 10 - 0
arch/x86_64/kernel/apic.c

@@ -833,6 +833,16 @@ int setup_profiling_timer(unsigned int multiplier)
 	return 0;
 	return 0;
 }
 }
 
 
+#ifdef CONFIG_X86_MCE_AMD
+void setup_threshold_lvt(unsigned long lvt_off)
+{
+	unsigned int v = 0;
+	unsigned long reg = (lvt_off << 4) + 0x500;
+	v |= THRESHOLD_APIC_VECTOR;
+	apic_write(reg, v);
+}
+#endif /* CONFIG_X86_MCE_AMD */
+
 #undef APIC_DIVISOR
 #undef APIC_DIVISOR
 
 
 /*
 /*

+ 1 - 2
arch/x86_64/kernel/e820.c

@@ -23,8 +23,7 @@
 #include <asm/e820.h>
 #include <asm/e820.h>
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm/bootsetup.h>
 #include <asm/bootsetup.h>
-
-extern char _end[];
+#include <asm/sections.h>
 
 
 /* 
 /* 
  * PFN of last memory page.
  * PFN of last memory page.

+ 3 - 0
arch/x86_64/kernel/entry.S

@@ -612,6 +612,9 @@ retint_kernel:
 ENTRY(thermal_interrupt)
 ENTRY(thermal_interrupt)
 	apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
 	apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
 
 
+ENTRY(threshold_interrupt)
+	apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
+
 #ifdef CONFIG_SMP	
 #ifdef CONFIG_SMP	
 ENTRY(reschedule_interrupt)
 ENTRY(reschedule_interrupt)
 	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
 	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt

+ 23 - 14
arch/x86_64/kernel/head.S

@@ -12,6 +12,7 @@
 
 
 #include <linux/linkage.h>
 #include <linux/linkage.h>
 #include <linux/threads.h>
 #include <linux/threads.h>
+#include <linux/init.h>
 #include <asm/desc.h>
 #include <asm/desc.h>
 #include <asm/segment.h>
 #include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/page.h>
@@ -70,7 +71,7 @@ startup_32:
 	movl	%eax, %cr4
 	movl	%eax, %cr4
 
 
 	/* Setup early boot stage 4 level pagetables */
 	/* Setup early boot stage 4 level pagetables */
-	movl	$(init_level4_pgt - __START_KERNEL_map), %eax
+	movl	$(boot_level4_pgt - __START_KERNEL_map), %eax
 	movl	%eax, %cr3
 	movl	%eax, %cr3
 
 
 	/* Setup EFER (Extended Feature Enable Register) */
 	/* Setup EFER (Extended Feature Enable Register) */
@@ -113,7 +114,7 @@ startup_64:
 	movq	%rax, %cr4
 	movq	%rax, %cr4
 
 
 	/* Setup early boot stage 4 level pagetables. */
 	/* Setup early boot stage 4 level pagetables. */
-	movq	$(init_level4_pgt - __START_KERNEL_map), %rax
+	movq	$(boot_level4_pgt - __START_KERNEL_map), %rax
 	movq	%rax, %cr3
 	movq	%rax, %cr3
 
 
 	/* Check if nx is implemented */
 	/* Check if nx is implemented */
@@ -240,20 +241,10 @@ ljumpvector:
 ENTRY(stext)
 ENTRY(stext)
 ENTRY(_stext)
 ENTRY(_stext)
 
 
-	/*
-	 * This default setting generates an ident mapping at address 0x100000
-	 * and a mapping for the kernel that precisely maps virtual address
-	 * 0xffffffff80000000 to physical address 0x000000. (always using
-	 * 2Mbyte large pages provided by PAE mode)
-	 */
 .org 0x1000
 .org 0x1000
 ENTRY(init_level4_pgt)
 ENTRY(init_level4_pgt)
-	.quad	0x0000000000002007 + __PHYSICAL_START	/* -> level3_ident_pgt */
-	.fill	255,8,0
-	.quad	0x000000000000a007 + __PHYSICAL_START
-	.fill	254,8,0
-	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
-	.quad	0x0000000000003007 + __PHYSICAL_START	/* -> level3_kernel_pgt */
+	/* This gets initialized in x86_64_start_kernel */
+	.fill	512,8,0
 
 
 .org 0x2000
 .org 0x2000
 ENTRY(level3_ident_pgt)
 ENTRY(level3_ident_pgt)
@@ -350,6 +341,24 @@ ENTRY(wakeup_level4_pgt)
 	.quad	0x0000000000003007 + __PHYSICAL_START	/* -> level3_kernel_pgt */
 	.quad	0x0000000000003007 + __PHYSICAL_START	/* -> level3_kernel_pgt */
 #endif
 #endif
 
 
+#ifndef CONFIG_HOTPLUG_CPU
+	__INITDATA
+#endif
+	/*
+	 * This default setting generates an ident mapping at address 0x100000
+	 * and a mapping for the kernel that precisely maps virtual address
+	 * 0xffffffff80000000 to physical address 0x000000. (always using
+	 * 2Mbyte large pages provided by PAE mode)
+	 */
+	.align PAGE_SIZE
+ENTRY(boot_level4_pgt)
+	.quad	0x0000000000002007 + __PHYSICAL_START	/* -> level3_ident_pgt */
+	.fill	255,8,0
+	.quad	0x000000000000a007 + __PHYSICAL_START
+	.fill	254,8,0
+	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+	.quad	0x0000000000003007 + __PHYSICAL_START	/* -> level3_kernel_pgt */
+
 	.data
 	.data
 
 
 	.align 16
 	.align 16

+ 10 - 4
arch/x86_64/kernel/head64.c

@@ -19,14 +19,15 @@
 #include <asm/bootsetup.h>
 #include <asm/bootsetup.h>
 #include <asm/setup.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
 
 
 /* Don't add a printk in there. printk relies on the PDA which is not initialized 
 /* Don't add a printk in there. printk relies on the PDA which is not initialized 
    yet. */
    yet. */
 static void __init clear_bss(void)
 static void __init clear_bss(void)
 {
 {
-	extern char __bss_start[], __bss_end[];
 	memset(__bss_start, 0,
 	memset(__bss_start, 0,
-	       (unsigned long) __bss_end - (unsigned long) __bss_start);
+	       (unsigned long) __bss_stop - (unsigned long) __bss_start);
 }
 }
 
 
 #define NEW_CL_POINTER		0x228	/* Relative to real mode data */
 #define NEW_CL_POINTER		0x228	/* Relative to real mode data */
@@ -75,8 +76,6 @@ static void __init setup_boot_cpu_data(void)
 	boot_cpu_data.x86_mask = eax & 0xf;
 	boot_cpu_data.x86_mask = eax & 0xf;
 }
 }
 
 
-extern char _end[];
-
 void __init x86_64_start_kernel(char * real_mode_data)
 void __init x86_64_start_kernel(char * real_mode_data)
 {
 {
 	char *s;
 	char *s;
@@ -86,6 +85,13 @@ void __init x86_64_start_kernel(char * real_mode_data)
 		set_intr_gate(i, early_idt_handler);
 		set_intr_gate(i, early_idt_handler);
 	asm volatile("lidt %0" :: "m" (idt_descr));
 	asm volatile("lidt %0" :: "m" (idt_descr));
 	clear_bss();
 	clear_bss();
+
+	/*
+	 * switch to init_level4_pgt from boot_level4_pgt
+	 */
+	memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
+	asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
+
 	pda_init(0);
 	pda_init(0);
 	copy_bootdata(real_mode_data);
 	copy_bootdata(real_mode_data);
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP

+ 2 - 0
arch/x86_64/kernel/i8259.c

@@ -492,6 +492,7 @@ void invalidate_interrupt5(void);
 void invalidate_interrupt6(void);
 void invalidate_interrupt6(void);
 void invalidate_interrupt7(void);
 void invalidate_interrupt7(void);
 void thermal_interrupt(void);
 void thermal_interrupt(void);
+void threshold_interrupt(void);
 void i8254_timer_resume(void);
 void i8254_timer_resume(void);
 
 
 static void setup_timer_hardware(void)
 static void setup_timer_hardware(void)
@@ -580,6 +581,7 @@ void __init init_IRQ(void)
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 #endif	
 #endif	
 	set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 	set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+	set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
 
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #ifdef CONFIG_X86_LOCAL_APIC
 	/* self generated IPI for local APIC timer */
 	/* self generated IPI for local APIC timer */

+ 73 - 7
arch/x86_64/kernel/io_apic.c

@@ -57,7 +57,7 @@ int nr_ioapic_registers[MAX_IO_APICS];
  * Rough estimation of how many shared IRQs there are, can
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
  * be changed anytime.
  */
  */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
 
 
 /*
 /*
@@ -85,6 +85,7 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
 	int pin;							\
 	int pin;							\
 	struct irq_pin_list *entry = irq_2_pin + irq;			\
 	struct irq_pin_list *entry = irq_2_pin + irq;			\
 									\
 									\
+	BUG_ON(irq >= NR_IRQS);						\
 	for (;;) {							\
 	for (;;) {							\
 		unsigned int reg;					\
 		unsigned int reg;					\
 		pin = entry->pin;					\
 		pin = entry->pin;					\
@@ -127,6 +128,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 }
 }
 #endif
 #endif
 
 
+static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
+
 /*
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
  * shared ISA-space IRQs, so we have to support them. We are super
@@ -137,6 +140,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 	static int first_free_entry = NR_IRQS;
 	static int first_free_entry = NR_IRQS;
 	struct irq_pin_list *entry = irq_2_pin + irq;
 	struct irq_pin_list *entry = irq_2_pin + irq;
 
 
+	BUG_ON(irq >= NR_IRQS);
 	while (entry->next)
 	while (entry->next)
 		entry = irq_2_pin + entry->next;
 		entry = irq_2_pin + entry->next;
 
 
@@ -144,7 +148,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 		entry->next = first_free_entry;
 		entry->next = first_free_entry;
 		entry = irq_2_pin + entry->next;
 		entry = irq_2_pin + entry->next;
 		if (++first_free_entry >= PIN_MAP_SIZE)
 		if (++first_free_entry >= PIN_MAP_SIZE)
-			panic("io_apic.c: whoops");
+			panic("io_apic.c: ran out of irq_2_pin entries!");
 	}
 	}
 	entry->apic = apic;
 	entry->apic = apic;
 	entry->pin = pin;
 	entry->pin = pin;
@@ -420,6 +424,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 				best_guess = irq;
 				best_guess = irq;
 		}
 		}
 	}
 	}
+	BUG_ON(best_guess >= NR_IRQS);
 	return best_guess;
 	return best_guess;
 }
 }
 
 
@@ -610,6 +615,64 @@ static inline int irq_trigger(int idx)
 	return MPBIOS_trigger(idx);
 	return MPBIOS_trigger(idx);
 }
 }
 
 
+static int next_irq = 16;
+
+/*
+ * gsi_irq_sharing -- Name overload!  "irq" can be either a legacy IRQ
+ * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
+ * from ACPI, which can reach 800 in large boxen.
+ *
+ * Compact the sparse GSI space into a sequential IRQ series and reuse
+ * vectors if possible.
+ */
+int gsi_irq_sharing(int gsi)
+{
+	int i, tries, vector;
+
+	BUG_ON(gsi >= NR_IRQ_VECTORS);
+
+	if (platform_legacy_irq(gsi))
+		return gsi;
+
+	if (gsi_2_irq[gsi] != 0xFF)
+		return (int)gsi_2_irq[gsi];
+
+	tries = NR_IRQS;
+  try_again:
+	vector = assign_irq_vector(gsi);
+
+	/*
+	 * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
+	 * use of vector and if found, return that IRQ.  However, we never want
+	 * to share legacy IRQs, which usually have a different trigger mode
+	 * than PCI.
+	 */
+	for (i = 0; i < NR_IRQS; i++)
+		if (IO_APIC_VECTOR(i) == vector)
+			break;
+	if (platform_legacy_irq(i)) {
+		if (--tries >= 0) {
+			IO_APIC_VECTOR(i) = 0;
+			goto try_again;
+		}
+		panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
+	}
+	if (i < NR_IRQS) {
+		gsi_2_irq[gsi] = i;
+		printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
+				gsi, vector, i);
+		return i;
+	}
+
+	i = next_irq++;
+	BUG_ON(i >= NR_IRQS);
+	gsi_2_irq[gsi] = i;
+	IO_APIC_VECTOR(i) = vector;
+	printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
+			gsi, vector, i);
+	return i;
+}
+
 static int pin_2_irq(int idx, int apic, int pin)
 static int pin_2_irq(int idx, int apic, int pin)
 {
 {
 	int irq, i;
 	int irq, i;
@@ -639,6 +702,7 @@ static int pin_2_irq(int idx, int apic, int pin)
 			while (i < apic)
 			while (i < apic)
 				irq += nr_ioapic_registers[i++];
 				irq += nr_ioapic_registers[i++];
 			irq += pin;
 			irq += pin;
+			irq = gsi_irq_sharing(irq);
 			break;
 			break;
 		}
 		}
 		default:
 		default:
@@ -648,6 +712,7 @@ static int pin_2_irq(int idx, int apic, int pin)
 			break;
 			break;
 		}
 		}
 	}
 	}
+	BUG_ON(irq >= NR_IRQS);
 
 
 	/*
 	/*
 	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
 	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
@@ -663,6 +728,7 @@ static int pin_2_irq(int idx, int apic, int pin)
 			}
 			}
 		}
 		}
 	}
 	}
+	BUG_ON(irq >= NR_IRQS);
 	return irq;
 	return irq;
 }
 }
 
 
@@ -690,8 +756,8 @@ int assign_irq_vector(int irq)
 {
 {
 	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
 	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
 
 
-	BUG_ON(irq >= NR_IRQ_VECTORS);
-	if (IO_APIC_VECTOR(irq) > 0)
+	BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
+	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
 		return IO_APIC_VECTOR(irq);
 		return IO_APIC_VECTOR(irq);
 next:
 next:
 	current_vector += 8;
 	current_vector += 8;
@@ -699,9 +765,8 @@ next:
 		goto next;
 		goto next;
 
 
 	if (current_vector >= FIRST_SYSTEM_VECTOR) {
 	if (current_vector >= FIRST_SYSTEM_VECTOR) {
-		offset++;
-		if (!(offset%8))
-			return -ENOSPC;
+		/* If we run out of vectors on large boxen, must share them. */
+		offset = (offset + 1) % 8;
 		current_vector = FIRST_DEVICE_VECTOR + offset;
 		current_vector = FIRST_DEVICE_VECTOR + offset;
 	}
 	}
 
 
@@ -1917,6 +1982,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
 	entry.polarity = active_high_low;
 	entry.polarity = active_high_low;
 	entry.mask = 1;					 /* Disabled (masked) */
 	entry.mask = 1;					 /* Disabled (masked) */
 
 
+	irq = gsi_irq_sharing(irq);
 	/*
 	/*
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
 	 */

+ 12 - 5
arch/x86_64/kernel/mce.c

@@ -37,7 +37,7 @@ static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
 static unsigned long console_logged;
 static unsigned long console_logged;
 static int notify_user;
 static int notify_user;
 static int rip_msr;
 static int rip_msr;
-static int mce_bootlog;
+static int mce_bootlog = 1;
 
 
 /*
 /*
  * Lockless MCE logging infrastructure.
  * Lockless MCE logging infrastructure.
@@ -347,7 +347,11 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
 		/* disable GART TBL walk error reporting, which trips off 
 		/* disable GART TBL walk error reporting, which trips off 
 		   incorrectly with the IOMMU & 3ware & Cerberus. */
 		   incorrectly with the IOMMU & 3ware & Cerberus. */
 		clear_bit(10, &bank[4]);
 		clear_bit(10, &bank[4]);
+		/* Lots of broken BIOS around that don't clear them
+		   by default and leave crap in there. Don't log. */
+		mce_bootlog = 0;
 	}
 	}
+
 }			
 }			
 
 
 static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
 static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
@@ -356,6 +360,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
 	case X86_VENDOR_INTEL:
 	case X86_VENDOR_INTEL:
 		mce_intel_feature_init(c);
 		mce_intel_feature_init(c);
 		break;
 		break;
+	case X86_VENDOR_AMD:
+		mce_amd_feature_init(c);
+		break;
 	default:
 	default:
 		break;
 		break;
 	}
 	}
@@ -495,16 +502,16 @@ static int __init mcheck_disable(char *str)
 /* mce=off disables machine check. Note you can reenable it later
 /* mce=off disables machine check. Note you can reenable it later
    using sysfs.
    using sysfs.
    mce=TOLERANCELEVEL (number, see above)
    mce=TOLERANCELEVEL (number, see above)
-   mce=bootlog Log MCEs from before booting. Disabled by default to work
-   around buggy BIOS that leave bogus MCEs.  */
+   mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
+   mce=nobootlog Don't log MCEs from before booting. */
 static int __init mcheck_enable(char *str)
 static int __init mcheck_enable(char *str)
 {
 {
 	if (*str == '=')
 	if (*str == '=')
 		str++;
 		str++;
 	if (!strcmp(str, "off"))
 	if (!strcmp(str, "off"))
 		mce_dont_init = 1;
 		mce_dont_init = 1;
-	else if (!strcmp(str, "bootlog"))
-		mce_bootlog = 1;
+	else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
+		mce_bootlog = str[0] == 'b';
 	else if (isdigit(str[0]))
 	else if (isdigit(str[0]))
 		get_option(&str, &tolerant);
 		get_option(&str, &tolerant);
 	else
 	else

+ 538 - 0
arch/x86_64/kernel/mce_amd.c

@@ -0,0 +1,538 @@
+/*
+ *  (c) 2005 Advanced Micro Devices, Inc.
+ *  Your use of this code is subject to the terms and conditions of the
+ *  GNU general public license version 2. See "COPYING" or
+ *  http://www.gnu.org/licenses/gpl.html
+ *
+ *  Written by Jacob Shin - AMD, Inc.
+ *
+ *  Support : jacob.shin@amd.com
+ *
+ *  MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F.
+ *  MC4_MISC0 exists per physical processor.
+ *
+ */
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kobject.h>
+#include <linux/notifier.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/sysdev.h>
+#include <linux/sysfs.h>
+#include <asm/apic.h>
+#include <asm/mce.h>
+#include <asm/msr.h>
+#include <asm/percpu.h>
+
+#define PFX "mce_threshold: "
+#define VERSION "version 1.00.9"
+#define NR_BANKS 5
+#define THRESHOLD_MAX 0xFFF
+#define INT_TYPE_APIC 0x00020000
+#define MASK_VALID_HI 0x80000000
+#define MASK_LVTOFF_HI 0x00F00000
+#define MASK_COUNT_EN_HI 0x00080000
+#define MASK_INT_TYPE_HI 0x00060000
+#define MASK_OVERFLOW_HI 0x00010000
+#define MASK_ERR_COUNT_HI 0x00000FFF
+#define MASK_OVERFLOW 0x0001000000000000L
+
+struct threshold_bank {
+	unsigned int cpu;
+	u8 bank;
+	u8 interrupt_enable;
+	u16 threshold_limit;
+	struct kobject kobj;
+};
+
+static struct threshold_bank threshold_defaults = {
+	.interrupt_enable = 0,
+	.threshold_limit = THRESHOLD_MAX,
+};
+
+#ifdef CONFIG_SMP
+static unsigned char shared_bank[NR_BANKS] = {
+	0, 0, 0, 0, 1
+};
+#endif
+
+static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
+
+/*
+ * CPU Initialization
+ */
+
+/* must be called with correct cpu affinity */
+static void threshold_restart_bank(struct threshold_bank *b,
+				   int reset, u16 old_limit)
+{
+	u32 mci_misc_hi, mci_misc_lo;
+
+	rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
+
+	if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+		reset = 1;	/* limit cannot be lower than err count */
+
+	if (reset) {		/* reset err count and overflow bit */
+		mci_misc_hi =
+		    (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
+		    (THRESHOLD_MAX - b->threshold_limit);
+	} else if (old_limit) {	/* change limit w/o reset */
+		int new_count = (mci_misc_hi & THRESHOLD_MAX) +
+		    (old_limit - b->threshold_limit);
+		mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
+		    (new_count & THRESHOLD_MAX);
+	}
+
+	b->interrupt_enable ?
+	    (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
+	    (mci_misc_hi &= ~MASK_INT_TYPE_HI);
+
+	mci_misc_hi |= MASK_COUNT_EN_HI;
+	wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
+}
+
+void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
+{
+	int bank;
+	u32 mci_misc_lo, mci_misc_hi;
+	unsigned int cpu = smp_processor_id();
+
+	for (bank = 0; bank < NR_BANKS; ++bank) {
+		rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi);
+
+		/* !valid, !counter present, bios locked */
+		if (!(mci_misc_hi & MASK_VALID_HI) ||
+		    !(mci_misc_hi & MASK_VALID_HI >> 1) ||
+		    (mci_misc_hi & MASK_VALID_HI >> 2))
+			continue;
+
+		per_cpu(bank_map, cpu) |= (1 << bank);
+
+#ifdef CONFIG_SMP
+		if (shared_bank[bank] && cpu_core_id[cpu])
+			continue;
+#endif
+
+		setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20);
+		threshold_defaults.cpu = cpu;
+		threshold_defaults.bank = bank;
+		threshold_restart_bank(&threshold_defaults, 0, 0);
+	}
+}
+
+/*
+ * APIC Interrupt Handler
+ */
+
+/*
+ * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
+ * the interrupt goes off when error_count reaches threshold_limit.
+ * the handler will simply log mcelog w/ software defined bank number.
+ */
+asmlinkage void mce_threshold_interrupt(void)
+{
+	int bank;
+	struct mce m;
+
+	ack_APIC_irq();
+	irq_enter();
+
+	memset(&m, 0, sizeof(m));
+	rdtscll(m.tsc);
+	m.cpu = smp_processor_id();
+
+	/* assume first bank caused it */
+	for (bank = 0; bank < NR_BANKS; ++bank) {
+		m.bank = MCE_THRESHOLD_BASE + bank;
+		rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc);
+
+		if (m.misc & MASK_OVERFLOW) {
+			mce_log(&m);
+			goto out;
+		}
+	}
+      out:
+	irq_exit();
+}
+
+/*
+ * Sysfs Interface
+ */
+
+static struct sysdev_class threshold_sysclass = {
+	set_kset_name("threshold"),
+};
+
+static DEFINE_PER_CPU(struct sys_device, device_threshold);
+
+struct threshold_attr {
+        struct attribute attr;
+        ssize_t(*show) (struct threshold_bank *, char *);
+        ssize_t(*store) (struct threshold_bank *, const char *, size_t count);
+};
+
+static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
+
+static cpumask_t affinity_set(unsigned int cpu)
+{
+	cpumask_t oldmask = current->cpus_allowed;
+	cpumask_t newmask = CPU_MASK_NONE;
+	cpu_set(cpu, newmask);
+	set_cpus_allowed(current, newmask);
+	return oldmask;
+}
+
+static void affinity_restore(cpumask_t oldmask)
+{
+	set_cpus_allowed(current, oldmask);
+}
+
+#define SHOW_FIELDS(name) \
+        static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \
+        { \
+                return sprintf(buf, "%lx\n", (unsigned long) b->name); \
+        }
+SHOW_FIELDS(interrupt_enable)
+SHOW_FIELDS(threshold_limit)
+
+static ssize_t store_interrupt_enable(struct threshold_bank *b,
+				      const char *buf, size_t count)
+{
+	char *end;
+	cpumask_t oldmask;
+	unsigned long new = simple_strtoul(buf, &end, 0);
+	if (end == buf)
+		return -EINVAL;
+	b->interrupt_enable = !!new;
+
+	oldmask = affinity_set(b->cpu);
+	threshold_restart_bank(b, 0, 0);
+	affinity_restore(oldmask);
+
+	return end - buf;
+}
+
+static ssize_t store_threshold_limit(struct threshold_bank *b,
+				     const char *buf, size_t count)
+{
+	char *end;
+	cpumask_t oldmask;
+	u16 old;
+	unsigned long new = simple_strtoul(buf, &end, 0);
+	if (end == buf)
+		return -EINVAL;
+	if (new > THRESHOLD_MAX)
+		new = THRESHOLD_MAX;
+	if (new < 1)
+		new = 1;
+	old = b->threshold_limit;
+	b->threshold_limit = new;
+
+	oldmask = affinity_set(b->cpu);
+	threshold_restart_bank(b, 0, old);
+	affinity_restore(oldmask);
+
+	return end - buf;
+}
+
+static ssize_t show_error_count(struct threshold_bank *b, char *buf)
+{
+	u32 high, low;
+	cpumask_t oldmask;
+	oldmask = affinity_set(b->cpu);
+	rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */
+	affinity_restore(oldmask);
+	return sprintf(buf, "%x\n",
+		       (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
+}
+
+static ssize_t store_error_count(struct threshold_bank *b,
+				 const char *buf, size_t count)
+{
+	cpumask_t oldmask;
+	oldmask = affinity_set(b->cpu);
+	threshold_restart_bank(b, 1, 0);
+	affinity_restore(oldmask);
+	return 1;
+}
+
+#define THRESHOLD_ATTR(_name,_mode,_show,_store) {            \
+        .attr = {.name = __stringify(_name), .mode = _mode }, \
+        .show = _show,                                        \
+        .store = _store,                                      \
+};
+
+#define ATTR_FIELDS(name) \
+        static struct threshold_attr name = \
+        THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
+
+ATTR_FIELDS(interrupt_enable);
+ATTR_FIELDS(threshold_limit);
+ATTR_FIELDS(error_count);
+
+static struct attribute *default_attrs[] = {
+	&interrupt_enable.attr,
+	&threshold_limit.attr,
+	&error_count.attr,
+	NULL
+};
+
+#define to_bank(k) container_of(k,struct threshold_bank,kobj)
+#define to_attr(a) container_of(a,struct threshold_attr,attr)
+
+static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	struct threshold_bank *b = to_bank(kobj);
+	struct threshold_attr *a = to_attr(attr);
+	ssize_t ret;
+	ret = a->show ? a->show(b, buf) : -EIO;
+	return ret;
+}
+
+static ssize_t store(struct kobject *kobj, struct attribute *attr,
+		     const char *buf, size_t count)
+{
+	struct threshold_bank *b = to_bank(kobj);
+	struct threshold_attr *a = to_attr(attr);
+	ssize_t ret;
+	ret = a->store ? a->store(b, buf, count) : -EIO;
+	return ret;
+}
+
+static struct sysfs_ops threshold_ops = {
+	.show = show,
+	.store = store,
+};
+
+static struct kobj_type threshold_ktype = {
+	.sysfs_ops = &threshold_ops,
+	.default_attrs = default_attrs,
+};
+
+/* symlinks sibling shared banks to first core.  first core owns dir/files. */
+static __cpuinit int threshold_create_bank(unsigned int cpu, int bank)
+{
+	int err = 0;
+	struct threshold_bank *b = 0;
+
+#ifdef CONFIG_SMP
+	if (cpu_core_id[cpu] && shared_bank[bank]) {	/* symlink */
+		char name[16];
+		unsigned lcpu = first_cpu(cpu_core_map[cpu]);
+		if (cpu_core_id[lcpu])
+			goto out;	/* first core not up yet */
+
+		b = per_cpu(threshold_banks, lcpu)[bank];
+		if (!b)
+			goto out;
+		sprintf(name, "bank%i", bank);
+		err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj,
+					&b->kobj, name);
+		if (err)
+			goto out;
+		per_cpu(threshold_banks, cpu)[bank] = b;
+		goto out;
+	}
+#endif
+
+	b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL);
+	if (!b) {
+		err = -ENOMEM;
+		goto out;
+	}
+	memset(b, 0, sizeof(struct threshold_bank));
+
+	b->cpu = cpu;
+	b->bank = bank;
+	b->interrupt_enable = 0;
+	b->threshold_limit = THRESHOLD_MAX;
+	kobject_set_name(&b->kobj, "bank%i", bank);
+	b->kobj.parent = &per_cpu(device_threshold, cpu).kobj;
+	b->kobj.ktype = &threshold_ktype;
+
+	err = kobject_register(&b->kobj);
+	if (err) {
+		kfree(b);
+		goto out;
+	}
+	per_cpu(threshold_banks, cpu)[bank] = b;
+      out:
+	return err;
+}
+
+/* create dir/files for all valid threshold banks */
+static __cpuinit int threshold_create_device(unsigned int cpu)
+{
+	int bank;
+	int err = 0;
+
+	per_cpu(device_threshold, cpu).id = cpu;
+	per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
+	err = sysdev_register(&per_cpu(device_threshold, cpu));
+	if (err)
+		goto out;
+
+	for (bank = 0; bank < NR_BANKS; ++bank) {
+		if (!(per_cpu(bank_map, cpu) & 1 << bank))
+			continue;
+		err = threshold_create_bank(cpu, bank);
+		if (err)
+			goto out;
+	}
+      out:
+	return err;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * let's be hotplug friendly.
+ * in case of multiple core processors, the first core always takes ownership
+ *   of shared sysfs dir/files, and rest of the cores will be symlinked to it.
+ */
+
+/* cpu hotplug call removes all symlinks before first core dies */
+static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
+{
+	struct threshold_bank *b;
+	char name[16];
+
+	b = per_cpu(threshold_banks, cpu)[bank];
+	if (!b)
+		return;
+	if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) {
+		sprintf(name, "bank%i", bank);
+		sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name);
+		per_cpu(threshold_banks, cpu)[bank] = 0;
+	} else {
+		kobject_unregister(&b->kobj);
+		kfree(per_cpu(threshold_banks, cpu)[bank]);
+	}
+}
+
+static __cpuinit void threshold_remove_device(unsigned int cpu)
+{
+	int bank;
+
+	for (bank = 0; bank < NR_BANKS; ++bank) {
+		if (!(per_cpu(bank_map, cpu) & 1 << bank))
+			continue;
+		threshold_remove_bank(cpu, bank);
+	}
+	sysdev_unregister(&per_cpu(device_threshold, cpu));
+}
+
+/* link all existing siblings when first core comes up */
+static __cpuinit int threshold_create_symlinks(unsigned int cpu)
+{
+	int bank, err = 0;
+	unsigned int lcpu = 0;
+
+	if (cpu_core_id[cpu])
+		return 0;
+	for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
+		if (lcpu == cpu)
+			continue;
+		for (bank = 0; bank < NR_BANKS; ++bank) {
+			if (!(per_cpu(bank_map, cpu) & 1 << bank))
+				continue;
+			if (!shared_bank[bank])
+				continue;
+			err = threshold_create_bank(lcpu, bank);
+		}
+	}
+	return err;
+}
+
+/* remove all symlinks before first core dies. */
+static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
+{
+	int bank;
+	unsigned int lcpu = 0;
+	if (cpu_core_id[cpu])
+		return;
+	for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
+		if (lcpu == cpu)
+			continue;
+		for (bank = 0; bank < NR_BANKS; ++bank) {
+			if (!(per_cpu(bank_map, cpu) & 1 << bank))
+				continue;
+			if (!shared_bank[bank])
+				continue;
+			threshold_remove_bank(lcpu, bank);
+		}
+	}
+}
+#else /* !CONFIG_HOTPLUG_CPU */
+static __cpuinit void threshold_create_symlinks(unsigned int cpu)
+{
+}
+static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
+{
+}
+static void threshold_remove_device(unsigned int cpu)
+{
+}
+#endif
+
+/* get notified when a cpu comes on/off */
+static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb,
+					    unsigned long action, void *hcpu)
+{
+	/* cpu was unsigned int to begin with */
+	unsigned int cpu = (unsigned long)hcpu;
+
+	if (cpu >= NR_CPUS)
+		goto out;
+
+	switch (action) {
+	case CPU_ONLINE:
+		threshold_create_device(cpu);
+		threshold_create_symlinks(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		threshold_remove_symlinks(cpu);
+		break;
+	case CPU_DOWN_FAILED:
+		threshold_create_symlinks(cpu);
+		break;
+	case CPU_DEAD:
+		threshold_remove_device(cpu);
+		break;
+	default:
+		break;
+	}
+      out:
+	return NOTIFY_OK;
+}
+
+static struct notifier_block threshold_cpu_notifier = {
+	.notifier_call = threshold_cpu_callback,
+};
+
+static __init int threshold_init_device(void)
+{
+	int err;
+	int lcpu = 0;
+
+	err = sysdev_class_register(&threshold_sysclass);
+	if (err)
+		goto out;
+
+	/* to hit CPUs online before the notifier is up */
+	for_each_online_cpu(lcpu) {
+		err = threshold_create_device(lcpu);
+		if (err)
+			goto out;
+	}
+	register_cpu_notifier(&threshold_cpu_notifier);
+
+      out:
+	return err;
+}
+
+device_initcall(threshold_init_device);

+ 15 - 8
arch/x86_64/kernel/mpparse.c

@@ -42,7 +42,7 @@ int acpi_found_madt;
  * Various Linux-internal data structures created from the
  * Various Linux-internal data structures created from the
  * MP-table.
  * MP-table.
  */
  */
-int apic_version [MAX_APICS];
+unsigned char apic_version [MAX_APICS];
 unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
 unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
 
 
@@ -65,7 +65,9 @@ unsigned long mp_lapic_addr = 0;
 /* Processor that is doing the boot up */
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_id = -1U;
 unsigned int boot_cpu_id = -1U;
 /* Internal processor count */
 /* Internal processor count */
-static unsigned int num_processors = 0;
+unsigned int num_processors __initdata = 0;
+
+unsigned disabled_cpus __initdata;
 
 
 /* Bitmask of physically existing CPUs */
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
@@ -106,11 +108,14 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 
 
 static void __init MP_processor_info (struct mpc_config_processor *m)
 static void __init MP_processor_info (struct mpc_config_processor *m)
 {
 {
-	int ver, cpu;
+	int cpu;
+	unsigned char ver;
 	static int found_bsp=0;
 	static int found_bsp=0;
 
 
-	if (!(m->mpc_cpuflag & CPU_ENABLED))
+	if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+		disabled_cpus++;
 		return;
 		return;
+	}
 
 
 	printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
 	printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
 		m->mpc_apicid,
 		m->mpc_apicid,
@@ -129,12 +134,14 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
 	}
 	}
 
 
 	cpu = num_processors++;
 	cpu = num_processors++;
-
-	if (m->mpc_apicid > MAX_APICS) {
+	
+#if MAX_APICS < 255	
+	if ((int)m->mpc_apicid > MAX_APICS) {
 		printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
 		printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
 			m->mpc_apicid, MAX_APICS);
 			m->mpc_apicid, MAX_APICS);
 		return;
 		return;
 	}
 	}
+#endif
 	ver = m->mpc_apicver;
 	ver = m->mpc_apicver;
 
 
 	physid_set(m->mpc_apicid, phys_cpu_present_map);
 	physid_set(m->mpc_apicid, phys_cpu_present_map);
@@ -218,7 +225,7 @@ static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
 			m->mpc_irqtype, m->mpc_irqflag & 3,
 			m->mpc_irqtype, m->mpc_irqflag & 3,
 			(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
 			(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
 			m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
 			m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
-	if (++mp_irq_entries == MAX_IRQ_SOURCES)
+	if (++mp_irq_entries >= MAX_IRQ_SOURCES)
 		panic("Max # of irq sources exceeded!!\n");
 		panic("Max # of irq sources exceeded!!\n");
 }
 }
 
 
@@ -549,7 +556,7 @@ void __init get_smp_config (void)
 		 * Read the physical hardware table.  Anything here will
 		 * Read the physical hardware table.  Anything here will
 		 * override the defaults.
 		 * override the defaults.
 		 */
 		 */
-		if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) {
+		if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) {
 			smp_found_config = 0;
 			smp_found_config = 0;
 			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
 			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
 			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
 			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");

+ 7 - 1
arch/x86_64/kernel/pci-gart.c

@@ -220,6 +220,12 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	   uses the normal dma_mask for alloc_coherent. */
 	   uses the normal dma_mask for alloc_coherent. */
 	dma_mask &= *dev->dma_mask;
 	dma_mask &= *dev->dma_mask;
 
 
+	/* Why <=? Even when the mask is smaller than 4GB it is often larger 
+	   than 16MB and in this case we have a chance of finding fitting memory 
+	   in the next higher zone first. If not retry with true GFP_DMA. -AK */
+	if (dma_mask <= 0xffffffff)
+		gfp |= GFP_DMA32;
+
  again:
  again:
 	memory = dma_alloc_pages(dev, gfp, get_order(size));
 	memory = dma_alloc_pages(dev, gfp, get_order(size));
 	if (memory == NULL)
 	if (memory == NULL)
@@ -245,7 +251,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 				}
 				}
 
 
 				if (!(gfp & GFP_DMA)) { 
 				if (!(gfp & GFP_DMA)) { 
-					gfp |= GFP_DMA; 
+					gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
 					goto again;
 					goto again;
 				}
 				}
 				return NULL;
 				return NULL;

+ 27 - 20
arch/x86_64/kernel/process.c

@@ -144,7 +144,8 @@ void cpu_idle_wait(void)
 	do {
 	do {
 		ssleep(1);
 		ssleep(1);
 		for_each_online_cpu(cpu) {
 		for_each_online_cpu(cpu) {
-			if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+			if (cpu_isset(cpu, map) &&
+					!per_cpu(cpu_idle_state, cpu))
 				cpu_clear(cpu, map);
 				cpu_clear(cpu, map);
 		}
 		}
 		cpus_and(map, map, cpu_online_map);
 		cpus_and(map, map, cpu_online_map);
@@ -275,7 +276,8 @@ void __show_regs(struct pt_regs * regs)
 		system_utsname.version);
 		system_utsname.version);
 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
 	printk_address(regs->rip); 
 	printk_address(regs->rip); 
-	printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
+	printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
+		regs->eflags);
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       regs->rax, regs->rbx, regs->rcx);
 	       regs->rax, regs->rbx, regs->rcx);
 	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
 	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
@@ -427,15 +429,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
 	struct pt_regs * childregs;
 	struct pt_regs * childregs;
 	struct task_struct *me = current;
 	struct task_struct *me = current;
 
 
-	childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
-
+	childregs = ((struct pt_regs *)
+			(THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
 	*childregs = *regs;
 	*childregs = *regs;
 
 
 	childregs->rax = 0;
 	childregs->rax = 0;
 	childregs->rsp = rsp;
 	childregs->rsp = rsp;
-	if (rsp == ~0UL) {
+	if (rsp == ~0UL)
 		childregs->rsp = (unsigned long)childregs;
 		childregs->rsp = (unsigned long)childregs;
-	}
 
 
 	p->thread.rsp = (unsigned long) childregs;
 	p->thread.rsp = (unsigned long) childregs;
 	p->thread.rsp0 = (unsigned long) (childregs+1);
 	p->thread.rsp0 = (unsigned long) (childregs+1);
@@ -457,7 +458,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
 			p->thread.io_bitmap_max = 0;
 			p->thread.io_bitmap_max = 0;
 			return -ENOMEM;
 			return -ENOMEM;
 		}
 		}
-		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
+		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
+				IO_BITMAP_BYTES);
 	} 
 	} 
 
 
 	/*
 	/*
@@ -494,7 +496,8 @@ out:
  * - fold all the options into a flag word and test it with a single test.
  * - fold all the options into a flag word and test it with a single test.
  * - could test fs/gs bitsliced
  * - could test fs/gs bitsliced
  */
  */
-struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+struct task_struct *
+__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 {
 	struct thread_struct *prev = &prev_p->thread,
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
 				 *next = &next_p->thread;
@@ -565,7 +568,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *
 	prev->userrsp = read_pda(oldrsp); 
 	prev->userrsp = read_pda(oldrsp); 
 	write_pda(oldrsp, next->userrsp); 
 	write_pda(oldrsp, next->userrsp); 
 	write_pda(pcurrent, next_p); 
 	write_pda(pcurrent, next_p); 
-	write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
+	write_pda(kernelstack,
+	    (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
 
 
 	/*
 	/*
 	 * Now maybe reload the debug registers
 	 * Now maybe reload the debug registers
@@ -646,7 +650,9 @@ asmlinkage long sys_fork(struct pt_regs *regs)
 	return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
 	return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
 }
 }
 
 
-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
+asmlinkage long
+sys_clone(unsigned long clone_flags, unsigned long newsp,
+	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
 {
 {
 	if (!newsp)
 	if (!newsp)
 		newsp = regs->rsp;
 		newsp = regs->rsp;
@@ -682,7 +688,8 @@ unsigned long get_wchan(struct task_struct *p)
 		return 0;
 		return 0;
 	fp = *(u64 *)(p->thread.rsp);
 	fp = *(u64 *)(p->thread.rsp);
 	do { 
 	do { 
-		if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
+		if (fp < (unsigned long)stack ||
+		    fp > (unsigned long)stack+THREAD_SIZE)
 			return 0; 
 			return 0; 
 		rip = *(u64 *)(fp+8); 
 		rip = *(u64 *)(fp+8); 
 		if (!in_sched_functions(rip))
 		if (!in_sched_functions(rip))
@@ -717,8 +724,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 			task->thread.gsindex = 0;
 			task->thread.gsindex = 0;
 			task->thread.gs = addr;
 			task->thread.gs = addr;
 			if (doit) {
 			if (doit) {
-		load_gs_index(0);
-		ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 
+				load_gs_index(0);
+				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
 			} 
 			} 
 		}
 		}
 		put_cpu();
 		put_cpu();
@@ -735,7 +742,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 			set_32bit_tls(task, FS_TLS, addr);
 			set_32bit_tls(task, FS_TLS, addr);
 			if (doit) { 
 			if (doit) { 
 				load_TLS(&task->thread, cpu); 
 				load_TLS(&task->thread, cpu); 
-				asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
+				asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
 			}
 			}
 			task->thread.fsindex = FS_TLS_SEL;
 			task->thread.fsindex = FS_TLS_SEL;
 			task->thread.fs = 0;
 			task->thread.fs = 0;
@@ -745,8 +752,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 			if (doit) {
 			if (doit) {
 				/* set the selector to 0 to not confuse
 				/* set the selector to 0 to not confuse
 				   __switch_to */
 				   __switch_to */
-		asm volatile("movl %0,%%fs" :: "r" (0));
-		ret = checking_wrmsrl(MSR_FS_BASE, addr); 
+				asm volatile("movl %0,%%fs" :: "r" (0));
+				ret = checking_wrmsrl(MSR_FS_BASE, addr);
 			}
 			}
 		}
 		}
 		put_cpu();
 		put_cpu();
@@ -755,9 +762,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		unsigned long base; 
 		unsigned long base; 
 		if (task->thread.fsindex == FS_TLS_SEL)
 		if (task->thread.fsindex == FS_TLS_SEL)
 			base = read_32bit_tls(task, FS_TLS);
 			base = read_32bit_tls(task, FS_TLS);
-		else if (doit) {
+		else if (doit)
 			rdmsrl(MSR_FS_BASE, base);
 			rdmsrl(MSR_FS_BASE, base);
-		} else
+		else
 			base = task->thread.fs;
 			base = task->thread.fs;
 		ret = put_user(base, (unsigned long __user *)addr); 
 		ret = put_user(base, (unsigned long __user *)addr); 
 		break; 
 		break; 
@@ -766,9 +773,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		unsigned long base;
 		unsigned long base;
 		if (task->thread.gsindex == GS_TLS_SEL)
 		if (task->thread.gsindex == GS_TLS_SEL)
 			base = read_32bit_tls(task, GS_TLS);
 			base = read_32bit_tls(task, GS_TLS);
-		else if (doit) {
+		else if (doit)
 			rdmsrl(MSR_KERNEL_GS_BASE, base);
 			rdmsrl(MSR_KERNEL_GS_BASE, base);
-		} else
+		else
 			base = task->thread.gs;
 			base = task->thread.gs;
 		ret = put_user(base, (unsigned long __user *)addr); 
 		ret = put_user(base, (unsigned long __user *)addr); 
 		break;
 		break;

+ 4 - 3
arch/x86_64/kernel/reboot.c

@@ -77,6 +77,7 @@ static inline void kb_wait(void)
 
 
 void machine_shutdown(void)
 void machine_shutdown(void)
 {
 {
+	unsigned long flags;
 	/* Stop the cpus and apics */
 	/* Stop the cpus and apics */
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 	int reboot_cpu_id;
 	int reboot_cpu_id;
@@ -98,7 +99,7 @@ void machine_shutdown(void)
 	smp_send_stop();
 	smp_send_stop();
 #endif
 #endif
 
 
-	local_irq_disable();
+	local_irq_save(flags);
 
 
 #ifndef CONFIG_SMP
 #ifndef CONFIG_SMP
 	disable_local_APIC();
 	disable_local_APIC();
@@ -106,7 +107,7 @@ void machine_shutdown(void)
 
 
 	disable_IO_APIC();
 	disable_IO_APIC();
 
 
-	local_irq_enable();
+	local_irq_restore(flags);
 }
 }
 
 
 void machine_emergency_restart(void)
 void machine_emergency_restart(void)
@@ -120,7 +121,7 @@ void machine_emergency_restart(void)
 		/* Could also try the reset bit in the Hammer NB */
 		/* Could also try the reset bit in the Hammer NB */
 		switch (reboot_type) { 
 		switch (reboot_type) { 
 		case BOOT_KBD:
 		case BOOT_KBD:
-		for (i=0; i<100; i++) {
+		for (i=0; i<10; i++) {
 			kb_wait();
 			kb_wait();
 			udelay(50);
 			udelay(50);
 			outb(0xfe,0x64);         /* pulse reset low */
 			outb(0xfe,0x64);         /* pulse reset low */

+ 41 - 48
arch/x86_64/kernel/setup.c

@@ -60,6 +60,7 @@
 #include <asm/setup.h>
 #include <asm/setup.h>
 #include <asm/mach_apic.h>
 #include <asm/mach_apic.h>
 #include <asm/numa.h>
 #include <asm/numa.h>
+#include <asm/sections.h>
 
 
 /*
 /*
  * Machine setup..
  * Machine setup..
@@ -103,7 +104,6 @@ struct edid_info edid_info;
 struct e820map e820;
 struct e820map e820;
 
 
 extern int root_mountflags;
 extern int root_mountflags;
-extern char _text, _etext, _edata, _end;
 
 
 char command_line[COMMAND_LINE_SIZE];
 char command_line[COMMAND_LINE_SIZE];
 
 
@@ -412,7 +412,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 {
 {
 	unsigned long bootmap_size, bootmap;
 	unsigned long bootmap_size, bootmap;
 
 
-	memory_present(0, start_pfn, end_pfn);
 	bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
 	bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
 	bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
 	bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
 	if (bootmap == -1L)
 	if (bootmap == -1L)
@@ -571,6 +570,8 @@ void __init setup_arch(char **cmdline_p)
 
 
 	init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
 	init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
 
 
+	zap_low_mappings(0);
+
 #ifdef CONFIG_ACPI
 #ifdef CONFIG_ACPI
 	/*
 	/*
 	 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
 	 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -657,8 +658,6 @@ void __init setup_arch(char **cmdline_p)
 	}
 	}
 #endif
 #endif
 
 
-	sparse_init();
-
 	paging_init();
 	paging_init();
 
 
 	check_ioapic();
 	check_ioapic();
@@ -793,7 +792,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 #endif
 #endif
 
 
 	bits = 0;
 	bits = 0;
-	while ((1 << bits) < c->x86_num_cores)
+	while ((1 << bits) < c->x86_max_cores)
 		bits++;
 		bits++;
 
 
 	/* Low order bits define the core id (index of core in socket) */
 	/* Low order bits define the core id (index of core in socket) */
@@ -823,10 +822,10 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
  		if (!node_online(node))
  		if (!node_online(node))
  			node = nearby_node(apicid);
  			node = nearby_node(apicid);
  	}
  	}
-  	cpu_to_node[cpu] = node;
+	numa_set_node(cpu, node);
 
 
   	printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
   	printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
-  			cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
+  			cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
 #endif
 #endif
 #endif
 #endif
 }
 }
@@ -875,9 +874,9 @@ static int __init init_amd(struct cpuinfo_x86 *c)
 	display_cacheinfo(c);
 	display_cacheinfo(c);
 
 
 	if (c->extended_cpuid_level >= 0x80000008) {
 	if (c->extended_cpuid_level >= 0x80000008) {
-		c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-		if (c->x86_num_cores & (c->x86_num_cores - 1))
-			c->x86_num_cores = 1;
+		c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+		if (c->x86_max_cores & (c->x86_max_cores - 1))
+			c->x86_max_cores = 1;
 
 
 		amd_detect_cmp(c);
 		amd_detect_cmp(c);
 	}
 	}
@@ -889,54 +888,44 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 {
 {
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 	u32 	eax, ebx, ecx, edx;
 	u32 	eax, ebx, ecx, edx;
-	int 	index_msb, tmp;
+	int 	index_msb, core_bits;
 	int 	cpu = smp_processor_id();
 	int 	cpu = smp_processor_id();
-	
+
+	cpuid(1, &eax, &ebx, &ecx, &edx);
+
+	c->apicid = phys_pkg_id(0);
+
 	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
 	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
 		return;
 		return;
 
 
-	cpuid(1, &eax, &ebx, &ecx, &edx);
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
-	
+
 	if (smp_num_siblings == 1) {
 	if (smp_num_siblings == 1) {
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-	} else if (smp_num_siblings > 1) {
-		index_msb = 31;
-		/*
-		 * At this point we only support two siblings per
-		 * processor package.
-		 */
+	} else if (smp_num_siblings > 1 ) {
+
 		if (smp_num_siblings > NR_CPUS) {
 		if (smp_num_siblings > NR_CPUS) {
 			printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
 			printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
 			smp_num_siblings = 1;
 			smp_num_siblings = 1;
 			return;
 			return;
 		}
 		}
-		tmp = smp_num_siblings;
-		while ((tmp & 0x80000000 ) == 0) {
-			tmp <<=1 ;
-			index_msb--;
-		}
-		if (smp_num_siblings & (smp_num_siblings - 1))
-			index_msb++;
+
+		index_msb = get_count_order(smp_num_siblings);
 		phys_proc_id[cpu] = phys_pkg_id(index_msb);
 		phys_proc_id[cpu] = phys_pkg_id(index_msb);
-		
+
 		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
 		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
 		       phys_proc_id[cpu]);
 		       phys_proc_id[cpu]);
 
 
-		smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
 
 
-		tmp = smp_num_siblings;
-		index_msb = 31;
-		while ((tmp & 0x80000000) == 0) {
-			tmp <<=1 ;
-			index_msb--;
-		}
-		if (smp_num_siblings & (smp_num_siblings - 1))
-			index_msb++;
+		index_msb = get_count_order(smp_num_siblings) ;
 
 
-		cpu_core_id[cpu] = phys_pkg_id(index_msb);
+		core_bits = get_count_order(c->x86_max_cores);
 
 
-		if (c->x86_num_cores > 1)
+		cpu_core_id[cpu] = phys_pkg_id(index_msb) &
+					       ((1 << core_bits) - 1);
+
+		if (c->x86_max_cores > 1)
 			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
 			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
 			       cpu_core_id[cpu]);
 			       cpu_core_id[cpu]);
 	}
 	}
@@ -975,7 +964,7 @@ static void srat_detect_node(void)
 	node = apicid_to_node[hard_smp_processor_id()];
 	node = apicid_to_node[hard_smp_processor_id()];
 	if (node == NUMA_NO_NODE)
 	if (node == NUMA_NO_NODE)
 		node = 0;
 		node = 0;
-	cpu_to_node[cpu] = node;
+	numa_set_node(cpu, node);
 
 
 	if (acpi_numa > 0)
 	if (acpi_numa > 0)
 		printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
 		printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
@@ -993,13 +982,18 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		unsigned eax = cpuid_eax(0x80000008);
 		unsigned eax = cpuid_eax(0x80000008);
 		c->x86_virt_bits = (eax >> 8) & 0xff;
 		c->x86_virt_bits = (eax >> 8) & 0xff;
 		c->x86_phys_bits = eax & 0xff;
 		c->x86_phys_bits = eax & 0xff;
+		/* CPUID workaround for Intel 0F34 CPU */
+		if (c->x86_vendor == X86_VENDOR_INTEL &&
+		    c->x86 == 0xF && c->x86_model == 0x3 &&
+		    c->x86_mask == 0x4)
+			c->x86_phys_bits = 36;
 	}
 	}
 
 
 	if (c->x86 == 15)
 	if (c->x86 == 15)
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 	if (c->x86 >= 15)
 	if (c->x86 >= 15)
 		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
 		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
- 	c->x86_num_cores = intel_num_cpu_cores(c);
+ 	c->x86_max_cores = intel_num_cpu_cores(c);
 
 
 	srat_detect_node();
 	srat_detect_node();
 }
 }
@@ -1037,7 +1031,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 	c->x86_model_id[0] = '\0';  /* Unset */
 	c->x86_model_id[0] = '\0';  /* Unset */
 	c->x86_clflush_size = 64;
 	c->x86_clflush_size = 64;
 	c->x86_cache_alignment = c->x86_clflush_size;
 	c->x86_cache_alignment = c->x86_clflush_size;
-	c->x86_num_cores = 1;
+	c->x86_max_cores = 1;
 	c->extended_cpuid_level = 0;
 	c->extended_cpuid_level = 0;
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
 
@@ -1060,10 +1054,10 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 		c->x86 = (tfms >> 8) & 0xf;
 		c->x86 = (tfms >> 8) & 0xf;
 		c->x86_model = (tfms >> 4) & 0xf;
 		c->x86_model = (tfms >> 4) & 0xf;
 		c->x86_mask = tfms & 0xf;
 		c->x86_mask = tfms & 0xf;
-		if (c->x86 == 0xf) {
+		if (c->x86 == 0xf)
 			c->x86 += (tfms >> 20) & 0xff;
 			c->x86 += (tfms >> 20) & 0xff;
+		if (c->x86 >= 0x6)
 			c->x86_model += ((tfms >> 16) & 0xF) << 4;
 			c->x86_model += ((tfms >> 16) & 0xF) << 4;
-		} 
 		if (c->x86_capability[0] & (1<<19)) 
 		if (c->x86_capability[0] & (1<<19)) 
 			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
 			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
 	} else {
 	} else {
@@ -1271,13 +1265,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
 		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
 	
 	
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-	if (smp_num_siblings * c->x86_num_cores > 1) {
+	if (smp_num_siblings * c->x86_max_cores > 1) {
 		int cpu = c - cpu_data;
 		int cpu = c - cpu_data;
 		seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
 		seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
-		seq_printf(m, "siblings\t: %d\n",
-				c->x86_num_cores * smp_num_siblings);
+		seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
 		seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
 		seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
-		seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
+		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
 	}
 	}
 #endif	
 #endif	
 
 

+ 1 - 1
arch/x86_64/kernel/setup64.c

@@ -141,7 +141,6 @@ void pda_init(int cpu)
 			panic("cannot allocate irqstack for cpu %d", cpu); 
 			panic("cannot allocate irqstack for cpu %d", cpu); 
 	}
 	}
 
 
-	asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
 
 
 	pda->irqstackptr += IRQSTACKSIZE-64;
 	pda->irqstackptr += IRQSTACKSIZE-64;
 } 
 } 
@@ -197,6 +196,7 @@ void __cpuinit cpu_init (void)
 	/* CPU 0 is initialised in head64.c */
 	/* CPU 0 is initialised in head64.c */
 	if (cpu != 0) {
 	if (cpu != 0) {
 		pda_init(cpu);
 		pda_init(cpu);
+		zap_low_mappings(cpu);
 	} else 
 	} else 
 		estacks = boot_exception_stacks; 
 		estacks = boot_exception_stacks; 
 
 

+ 17 - 0
arch/x86_64/kernel/signal.c

@@ -110,6 +110,15 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
 	COPY(r14);
 	COPY(r14);
 	COPY(r15);
 	COPY(r15);
 
 
+	/* Kernel saves and restores only the CS segment register on signals,
+	 * which is the bare minimum needed to allow mixed 32/64-bit code.
+	 * App's signal handler can save/restore other segments if needed. */
+	{
+		unsigned cs;
+		err |= __get_user(cs, &sc->cs);
+		regs->cs = cs | 3;	/* Force into user mode */
+	}
+
 	{
 	{
 		unsigned int tmpflags;
 		unsigned int tmpflags;
 		err |= __get_user(tmpflags, &sc->eflags);
 		err |= __get_user(tmpflags, &sc->eflags);
@@ -187,6 +196,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo
 {
 {
 	int err = 0;
 	int err = 0;
 
 
+	err |= __put_user(regs->cs, &sc->cs);
 	err |= __put_user(0, &sc->gs);
 	err |= __put_user(0, &sc->gs);
 	err |= __put_user(0, &sc->fs);
 	err |= __put_user(0, &sc->fs);
 
 
@@ -318,7 +328,14 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 
 
 	regs->rsp = (unsigned long)frame;
 	regs->rsp = (unsigned long)frame;
 
 
+	/* Set up the CS register to run signal handlers in 64-bit mode,
+	   even if the handler happens to be interrupting 32-bit code. */
+	regs->cs = __USER_CS;
+
+	/* This, by contrast, has nothing to do with segment registers -
+	   see include/asm-x86_64/uaccess.h for details. */
 	set_fs(USER_DS);
 	set_fs(USER_DS);
+
 	regs->eflags &= ~TF_MASK;
 	regs->eflags &= ~TF_MASK;
 	if (test_thread_flag(TIF_SINGLESTEP))
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 		ptrace_notify(SIGTRAP);

+ 3 - 4
arch/x86_64/kernel/smp.c

@@ -28,8 +28,6 @@
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm/apicdef.h>
 #include <asm/apicdef.h>
 
 
-#define __cpuinit __init
-
 /*
 /*
  *	Smarter SMP flushing macros. 
  *	Smarter SMP flushing macros. 
  *		c/o Linus Torvalds.
  *		c/o Linus Torvalds.
@@ -452,13 +450,14 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
 
 
 void smp_stop_cpu(void)
 void smp_stop_cpu(void)
 {
 {
+	unsigned long flags;
 	/*
 	/*
 	 * Remove this CPU:
 	 * Remove this CPU:
 	 */
 	 */
 	cpu_clear(smp_processor_id(), cpu_online_map);
 	cpu_clear(smp_processor_id(), cpu_online_map);
-	local_irq_disable();
+	local_irq_save(flags);
 	disable_local_APIC();
 	disable_local_APIC();
-	local_irq_enable(); 
+	local_irq_restore(flags);
 }
 }
 
 
 static void smp_really_stop_cpu(void *dummy)
 static void smp_really_stop_cpu(void *dummy)

+ 91 - 20
arch/x86_64/kernel/smpboot.c

@@ -64,6 +64,7 @@
 int smp_num_siblings = 1;
 int smp_num_siblings = 1;
 /* Package ID of each logical CPU */
 /* Package ID of each logical CPU */
 u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
 u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
+/* core ID of each logical CPU */
 u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
 u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
 
 
 /* Bitmask of currently online CPUs */
 /* Bitmask of currently online CPUs */
@@ -87,7 +88,10 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
 /* Set when the idlers are all forked */
 /* Set when the idlers are all forked */
 int smp_threads_ready;
 int smp_threads_ready;
 
 
+/* representing HT siblings of each logical CPU */
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
+
+/* representing HT and core siblings of each logical CPU */
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_core_map);
 EXPORT_SYMBOL(cpu_core_map);
 
 
@@ -434,30 +438,59 @@ void __cpuinit smp_callin(void)
 	cpu_set(cpuid, cpu_callin_map);
 	cpu_set(cpuid, cpu_callin_map);
 }
 }
 
 
+/* representing cpus for which sibling maps can be computed */
+static cpumask_t cpu_sibling_setup_map;
+
 static inline void set_cpu_sibling_map(int cpu)
 static inline void set_cpu_sibling_map(int cpu)
 {
 {
 	int i;
 	int i;
+	struct cpuinfo_x86 *c = cpu_data;
+
+	cpu_set(cpu, cpu_sibling_setup_map);
 
 
 	if (smp_num_siblings > 1) {
 	if (smp_num_siblings > 1) {
-		for_each_cpu(i) {
-			if (cpu_core_id[cpu] == cpu_core_id[i]) {
+		for_each_cpu_mask(i, cpu_sibling_setup_map) {
+			if (phys_proc_id[cpu] == phys_proc_id[i] &&
+			    cpu_core_id[cpu] == cpu_core_id[i]) {
 				cpu_set(i, cpu_sibling_map[cpu]);
 				cpu_set(i, cpu_sibling_map[cpu]);
 				cpu_set(cpu, cpu_sibling_map[i]);
 				cpu_set(cpu, cpu_sibling_map[i]);
+				cpu_set(i, cpu_core_map[cpu]);
+				cpu_set(cpu, cpu_core_map[i]);
 			}
 			}
 		}
 		}
 	} else {
 	} else {
 		cpu_set(cpu, cpu_sibling_map[cpu]);
 		cpu_set(cpu, cpu_sibling_map[cpu]);
 	}
 	}
 
 
-	if (current_cpu_data.x86_num_cores > 1) {
-		for_each_cpu(i) {
-			if (phys_proc_id[cpu] == phys_proc_id[i]) {
-				cpu_set(i, cpu_core_map[cpu]);
-				cpu_set(cpu, cpu_core_map[i]);
-			}
-		}
-	} else {
+	if (current_cpu_data.x86_max_cores == 1) {
 		cpu_core_map[cpu] = cpu_sibling_map[cpu];
 		cpu_core_map[cpu] = cpu_sibling_map[cpu];
+		c[cpu].booted_cores = 1;
+		return;
+	}
+
+	for_each_cpu_mask(i, cpu_sibling_setup_map) {
+		if (phys_proc_id[cpu] == phys_proc_id[i]) {
+			cpu_set(i, cpu_core_map[cpu]);
+			cpu_set(cpu, cpu_core_map[i]);
+			/*
+			 *  Does this new cpu bringup a new core?
+			 */
+			if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+				/*
+				 * for each core in package, increment
+				 * the booted_cores for this new cpu
+				 */
+				if (first_cpu(cpu_sibling_map[i]) == i)
+					c[cpu].booted_cores++;
+				/*
+				 * increment the core count for all
+				 * the other cpus in this package
+				 */
+				if (i != cpu)
+					c[i].booted_cores++;
+			} else if (i != cpu && !c[cpu].booted_cores)
+				c[cpu].booted_cores = c[i].booted_cores;
+		}
 	}
 	}
 }
 }
 
 
@@ -879,6 +912,9 @@ static __init void disable_smp(void)
 }
 }
 
 
 #ifdef CONFIG_HOTPLUG_CPU
 #ifdef CONFIG_HOTPLUG_CPU
+
+int additional_cpus __initdata = -1;
+
 /*
 /*
  * cpu_possible_map should be static, it cannot change as cpu's
  * cpu_possible_map should be static, it cannot change as cpu's
  * are onlined, or offlined. The reason is per-cpu data-structures
  * are onlined, or offlined. The reason is per-cpu data-structures
@@ -887,14 +923,38 @@ static __init void disable_smp(void)
  * cpu_present_map on the other hand can change dynamically.
  * cpu_present_map on the other hand can change dynamically.
  * In case when cpu_hotplug is not compiled, then we resort to current
  * In case when cpu_hotplug is not compiled, then we resort to current
  * behaviour, which is cpu_possible == cpu_present.
  * behaviour, which is cpu_possible == cpu_present.
- * If cpu-hotplug is supported, then we need to preallocate for all
- * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range.
  * - Ashok Raj
  * - Ashok Raj
+ *
+ * Three ways to find out the number of additional hotplug CPUs:
+ * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
+ * - otherwise use half of the available CPUs or 2, whatever is more.
+ * - The user can overwrite it with additional_cpus=NUM
+ * We do this because additional CPUs waste a lot of memory.
+ * -AK
  */
  */
 __init void prefill_possible_map(void)
 __init void prefill_possible_map(void)
 {
 {
 	int i;
 	int i;
-	for (i = 0; i < NR_CPUS; i++)
+	int possible;
+
+ 	if (additional_cpus == -1) {
+ 		if (disabled_cpus > 0) {
+ 			additional_cpus = disabled_cpus;
+ 		} else {
+ 			additional_cpus = num_processors / 2;
+ 			if (additional_cpus == 0)
+ 				additional_cpus = 2;
+ 		}
+ 	}
+	possible = num_processors + additional_cpus;
+	if (possible > NR_CPUS) 
+		possible = NR_CPUS;
+
+	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
+		possible,
+	        max_t(int, possible - num_processors, 0));
+
+	for (i = 0; i < possible; i++)
 		cpu_set(i, cpu_possible_map);
 		cpu_set(i, cpu_possible_map);
 }
 }
 #endif
 #endif
@@ -965,6 +1025,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	nmi_watchdog_default();
 	nmi_watchdog_default();
 	current_cpu_data = boot_cpu_data;
 	current_cpu_data = boot_cpu_data;
 	current_thread_info()->cpu = 0;  /* needed? */
 	current_thread_info()->cpu = 0;  /* needed? */
+	set_cpu_sibling_map(0);
 
 
 	if (smp_sanity_check(max_cpus) < 0) {
 	if (smp_sanity_check(max_cpus) < 0) {
 		printk(KERN_INFO "SMP disabled\n");
 		printk(KERN_INFO "SMP disabled\n");
@@ -1008,8 +1069,6 @@ void __init smp_prepare_boot_cpu(void)
 	int me = smp_processor_id();
 	int me = smp_processor_id();
 	cpu_set(me, cpu_online_map);
 	cpu_set(me, cpu_online_map);
 	cpu_set(me, cpu_callout_map);
 	cpu_set(me, cpu_callout_map);
-	cpu_set(0, cpu_sibling_map[0]);
-	cpu_set(0, cpu_core_map[0]);
 	per_cpu(cpu_state, me) = CPU_ONLINE;
 	per_cpu(cpu_state, me) = CPU_ONLINE;
 }
 }
 
 
@@ -1062,9 +1121,6 @@ int __cpuinit __cpu_up(unsigned int cpu)
  */
  */
 void __init smp_cpus_done(unsigned int max_cpus)
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 {
-#ifndef CONFIG_HOTPLUG_CPU
-	zap_low_mappings();
-#endif
 	smp_cleanup_boot();
 	smp_cleanup_boot();
 
 
 #ifdef CONFIG_X86_IO_APIC
 #ifdef CONFIG_X86_IO_APIC
@@ -1081,15 +1137,24 @@ void __init smp_cpus_done(unsigned int max_cpus)
 static void remove_siblinginfo(int cpu)
 static void remove_siblinginfo(int cpu)
 {
 {
 	int sibling;
 	int sibling;
+	struct cpuinfo_x86 *c = cpu_data;
 
 
+	for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
+		cpu_clear(cpu, cpu_core_map[sibling]);
+		/*
+		 * last thread sibling in this cpu core going down
+		 */
+		if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+			c[sibling].booted_cores--;
+	}
+			
 	for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
 	for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
 		cpu_clear(cpu, cpu_sibling_map[sibling]);
 		cpu_clear(cpu, cpu_sibling_map[sibling]);
-	for_each_cpu_mask(sibling, cpu_core_map[cpu])
-		cpu_clear(cpu, cpu_core_map[sibling]);
 	cpus_clear(cpu_sibling_map[cpu]);
 	cpus_clear(cpu_sibling_map[cpu]);
 	cpus_clear(cpu_core_map[cpu]);
 	cpus_clear(cpu_core_map[cpu]);
 	phys_proc_id[cpu] = BAD_APICID;
 	phys_proc_id[cpu] = BAD_APICID;
 	cpu_core_id[cpu] = BAD_APICID;
 	cpu_core_id[cpu] = BAD_APICID;
+	cpu_clear(cpu, cpu_sibling_setup_map);
 }
 }
 
 
 void remove_cpu_from_maps(void)
 void remove_cpu_from_maps(void)
@@ -1153,6 +1218,12 @@ void __cpu_die(unsigned int cpu)
  	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
  	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 }
 }
 
 
+static __init int setup_additional_cpus(char *s)
+{
+	return get_option(&s, &additional_cpus);
+}
+__setup("additional_cpus=", setup_additional_cpus);
+
 #else /* ... !CONFIG_HOTPLUG_CPU */
 #else /* ... !CONFIG_HOTPLUG_CPU */
 
 
 int __cpu_disable(void)
 int __cpu_disable(void)

+ 0 - 14
arch/x86_64/kernel/sys_x86_64.c

@@ -154,17 +154,3 @@ asmlinkage long sys_uname(struct new_utsname __user * name)
 		err |= copy_to_user(&name->machine, "i686", 5); 		
 		err |= copy_to_user(&name->machine, "i686", 5); 		
 	return err ? -EFAULT : 0;
 	return err ? -EFAULT : 0;
 }
 }
-
-asmlinkage long sys_time64(long __user * tloc)
-{
-	struct timeval now; 
-	int i; 
-
-	do_gettimeofday(&now);
-	i = now.tv_sec;
-	if (tloc) {
-		if (put_user(i,tloc))
-			i = -EFAULT;
-	}
-	return i;
-}

+ 4 - 40
arch/x86_64/kernel/traps.c

@@ -428,19 +428,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
 {
 {
 	conditional_sti(regs);
 	conditional_sti(regs);
 
 
-#ifdef CONFIG_CHECKING
-       { 
-               unsigned long gs; 
-               struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 
-               rdmsrl(MSR_GS_BASE, gs); 
-               if (gs != (unsigned long)pda) { 
-                       wrmsrl(MSR_GS_BASE, pda); 
-                       printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda,
-			      regs->rip);
-               }
-       }
-#endif
-
 	if (user_mode(regs)) {
 	if (user_mode(regs)) {
 		struct task_struct *tsk = current;
 		struct task_struct *tsk = current;
 
 
@@ -513,20 +500,6 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
 {
 {
 	conditional_sti(regs);
 	conditional_sti(regs);
 
 
-#ifdef CONFIG_CHECKING
-       { 
-               unsigned long gs; 
-               struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 
-               rdmsrl(MSR_GS_BASE, gs); 
-               if (gs != (unsigned long)pda) { 
-                       wrmsrl(MSR_GS_BASE, pda); 
-		       oops_in_progress++;
-                       printk("general protection handler: wrong gs %lx expected %p\n", gs, pda);
-		       oops_in_progress--;
-               }
-       }
-#endif
-
 	if (user_mode(regs)) {
 	if (user_mode(regs)) {
 		struct task_struct *tsk = current;
 		struct task_struct *tsk = current;
 
 
@@ -665,19 +638,6 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
 	siginfo_t info;
 	siginfo_t info;
 
 
-#ifdef CONFIG_CHECKING
-       { 
-	       /* RED-PEN interaction with debugger - could destroy gs */
-               unsigned long gs; 
-               struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 
-               rdmsrl(MSR_GS_BASE, gs); 
-               if (gs != (unsigned long)pda) { 
-                       wrmsrl(MSR_GS_BASE, pda); 
-                       printk("debug handler: wrong gs %lx expected %p\n", gs, pda);
-               }
-       }
-#endif
-
 	get_debugreg(condition, 6);
 	get_debugreg(condition, 6);
 
 
 	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
 	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
@@ -888,6 +848,10 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 {
 {
 }
 }
 
 
+asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+{
+}
+
 /*
 /*
  *  'math_state_restore()' saves the current math information in the
  *  'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
  * old math state array, and gets the new ones from the current task

+ 1 - 1
arch/x86_64/kernel/vmlinux.lds.S

@@ -50,7 +50,7 @@ SECTIONS
 	*(.bss.page_aligned)	
 	*(.bss.page_aligned)	
 	*(.bss)
 	*(.bss)
 	}
 	}
-  __bss_end = .;
+  __bss_stop = .;
 
 
   . = ALIGN(PAGE_SIZE);
   . = ALIGN(PAGE_SIZE);
   . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
   . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);

+ 3 - 0
arch/x86_64/kernel/x8664_ksyms.c

@@ -203,3 +203,6 @@ EXPORT_SYMBOL(flush_tlb_page);
 #endif
 #endif
 
 
 EXPORT_SYMBOL(cpu_khz);
 EXPORT_SYMBOL(cpu_khz);
+
+EXPORT_SYMBOL(load_gs_index);
+

+ 0 - 38
arch/x86_64/lib/clear_page.S

@@ -5,46 +5,8 @@
 	.globl clear_page
 	.globl clear_page
 	.p2align 4
 	.p2align 4
 clear_page:
 clear_page:
-	xorl   %eax,%eax
-	movl   $4096/64,%ecx
-	.p2align 4
-.Lloop:
-	decl	%ecx
-#define PUT(x) movq %rax,x*8(%rdi) 
-	movq %rax,(%rdi)
-	PUT(1)
-	PUT(2)
-	PUT(3)
-	PUT(4)
-	PUT(5)
-	PUT(6)
-	PUT(7)
-	leaq	64(%rdi),%rdi
-	jnz	.Lloop
-	nop
-	ret
-clear_page_end:	
-	
-	/* C stepping K8 run faster using the string instructions.
-	   It is also a lot simpler. Use this when possible */
-	
-#include <asm/cpufeature.h>
-	    	
-	.section .altinstructions,"a"
-	.align 8
-	.quad  clear_page
-	.quad  clear_page_c
-	.byte  X86_FEATURE_K8_C
-	.byte  clear_page_end-clear_page	
-	.byte  clear_page_c_end-clear_page_c
-	.previous
-
-	.section .altinstr_replacement,"ax"
-clear_page_c:
 	movl $4096/8,%ecx
 	movl $4096/8,%ecx
 	xorl %eax,%eax
 	xorl %eax,%eax
 	rep 
 	rep 
 	stosq
 	stosq
 	ret
 	ret
-clear_page_c_end:
-	.previous

+ 0 - 87
arch/x86_64/lib/copy_page.S

@@ -8,94 +8,7 @@
 	.globl copy_page
 	.globl copy_page
 	.p2align 4
 	.p2align 4
 copy_page:
 copy_page:
-	subq	$3*8,%rsp
-	movq	%rbx,(%rsp)
-	movq	%r12,1*8(%rsp)
-	movq	%r13,2*8(%rsp)
-			
-	movl	$(4096/64)-5,%ecx
-	.p2align 4
-.Loop64:	
-  	dec     %rcx
-
-	movq        (%rsi), %rax
-	movq      8 (%rsi), %rbx
-	movq     16 (%rsi), %rdx
-	movq     24 (%rsi), %r8
-	movq     32 (%rsi), %r9
-	movq     40 (%rsi), %r10
-	movq     48 (%rsi), %r11
-	movq     56 (%rsi), %r12
-
-	prefetcht0 5*64(%rsi)
-
-	movq     %rax,    (%rdi)
-	movq     %rbx,  8 (%rdi)
-	movq     %rdx, 16 (%rdi)
-	movq     %r8,  24 (%rdi)
-	movq     %r9,  32 (%rdi)
-	movq     %r10, 40 (%rdi)
-	movq     %r11, 48 (%rdi)
-	movq     %r12, 56 (%rdi)
-
-	leaq    64 (%rsi), %rsi
-	leaq    64 (%rdi), %rdi
-
-	jnz     .Loop64
-
-	movl	$5,%ecx
-	.p2align 4
-.Loop2:	
-	decl   %ecx
-
-	movq        (%rsi), %rax
-	movq      8 (%rsi), %rbx
-	movq     16 (%rsi), %rdx
-	movq     24 (%rsi), %r8
-	movq     32 (%rsi), %r9
-	movq     40 (%rsi), %r10
-	movq     48 (%rsi), %r11
-	movq     56 (%rsi), %r12
-
-	movq     %rax,    (%rdi)
-	movq     %rbx,  8 (%rdi)
-	movq     %rdx, 16 (%rdi)
-	movq     %r8,  24 (%rdi)
-	movq     %r9,  32 (%rdi)
-	movq     %r10, 40 (%rdi)
-	movq     %r11, 48 (%rdi)
-	movq     %r12, 56 (%rdi)
-	
-	leaq	64(%rdi),%rdi			
-	leaq	64(%rsi),%rsi			
-	
-	jnz	.Loop2		
-	
-	movq	(%rsp),%rbx
-	movq	1*8(%rsp),%r12
-	movq	2*8(%rsp),%r13
-	addq	$3*8,%rsp
-	ret
-	
-	/* C stepping K8 run faster using the string copy instructions.
-	   It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>		
-		
-	.section .altinstructions,"a"
-	.align 8
-	.quad  copy_page
-	.quad  copy_page_c
-	.byte  X86_FEATURE_K8_C
-	.byte  copy_page_c_end-copy_page_c
-	.byte  copy_page_c_end-copy_page_c
-	.previous
-
-	.section .altinstr_replacement,"ax"
-copy_page_c:
 	movl $4096/8,%ecx
 	movl $4096/8,%ecx
 	rep 
 	rep 
 	movsq 
 	movsq 
 	ret
 	ret
-copy_page_c_end:
-	.previous

+ 2 - 91
arch/x86_64/lib/memcpy.S

@@ -11,6 +11,8 @@
  * 
  * 
  * Output:
  * Output:
  * rax original destination
  * rax original destination
+ * 
+ * TODO: check best memcpy for PSC
  */	
  */	
 
 
  	.globl __memcpy
  	.globl __memcpy
@@ -18,95 +20,6 @@
 	.p2align 4
 	.p2align 4
 __memcpy:
 __memcpy:
 memcpy:		
 memcpy:		
-	pushq %rbx
-	movq %rdi,%rax
-
-	movl %edx,%ecx
-	shrl $6,%ecx
-	jz .Lhandle_tail
-	
-	.p2align 4
-.Lloop_64:
-	decl %ecx
-	
-	movq (%rsi),%r11
-	movq 8(%rsi),%r8
-
-	movq %r11,(%rdi)
-	movq %r8,1*8(%rdi)
-
-	movq 2*8(%rsi),%r9
-	movq 3*8(%rsi),%r10
-
-	movq %r9,2*8(%rdi)
-	movq %r10,3*8(%rdi)
-		
-	movq 4*8(%rsi),%r11
-	movq 5*8(%rsi),%r8
-
-	movq %r11,4*8(%rdi)
-	movq %r8,5*8(%rdi)
-
-	movq 6*8(%rsi),%r9
-	movq 7*8(%rsi),%r10
-
-	movq %r9,6*8(%rdi)
-	movq %r10,7*8(%rdi)
-
-	leaq 64(%rsi),%rsi
-	leaq 64(%rdi),%rdi
-	jnz  .Lloop_64
-
-.Lhandle_tail:
-	movl %edx,%ecx
-	andl $63,%ecx
-	shrl $3,%ecx
-	jz   .Lhandle_7
-	.p2align 4
-.Lloop_8: 
-	decl %ecx
-	movq (%rsi),%r8
-	movq %r8,(%rdi) 
-	leaq 8(%rdi),%rdi
-	leaq 8(%rsi),%rsi
-	jnz  .Lloop_8
-
-.Lhandle_7:
-	movl %edx,%ecx
-	andl $7,%ecx
-	jz .Lende
-	.p2align 4
-.Lloop_1:
-	movb (%rsi),%r8b
-	movb %r8b,(%rdi) 
-	incq %rdi
-	incq %rsi
-	decl %ecx
-	jnz .Lloop_1
-	
-.Lende: 	
-	popq %rbx
-	ret
-.Lfinal:
-	
-	/* C stepping K8 run faster using the string copy instructions.
-	   It is also a lot simpler. Use this when possible */
-	
-	.section .altinstructions,"a"
-	.align 8
-	.quad  memcpy
-	.quad  memcpy_c
-	.byte  X86_FEATURE_K8_C
-	.byte  .Lfinal-memcpy
-	.byte  memcpy_c_end-memcpy_c	
-	.previous
-
-	.section .altinstr_replacement,"ax"
- /* rdi	destination
-  * rsi source
-  * rdx count
-  */			
-memcpy_c:
 	movq %rdi,%rax
 	movq %rdi,%rax
 	movl %edx,%ecx
 	movl %edx,%ecx
 	shrl $3,%ecx
 	shrl $3,%ecx
@@ -117,5 +30,3 @@ memcpy_c:
 	rep
 	rep
 	movsb
 	movsb
 	ret
 	ret
-memcpy_c_end:
-	.previous

+ 0 - 94
arch/x86_64/lib/memset.S

@@ -13,98 +13,6 @@
 	.p2align 4
 	.p2align 4
 memset:	
 memset:	
 __memset:
 __memset:
-	movq %rdi,%r10
-	movq %rdx,%r11
-
-	/* expand byte value  */
-	movzbl %sil,%ecx
-	movabs $0x0101010101010101,%rax
-	mul    %rcx		/* with rax, clobbers rdx */
-
-	/* align dst */
-	movl  %edi,%r9d		
-	andl  $7,%r9d	
-	jnz  .Lbad_alignment
-.Lafter_bad_alignment:
-	
-	movl %r11d,%ecx
-	shrl $6,%ecx
-	jz	 .Lhandle_tail
-
-	.p2align 4
-.Lloop_64:	
-	decl   %ecx
-	movq  %rax,(%rdi) 
-	movq  %rax,8(%rdi) 
-	movq  %rax,16(%rdi) 
-	movq  %rax,24(%rdi) 
-	movq  %rax,32(%rdi) 
-	movq  %rax,40(%rdi) 
-	movq  %rax,48(%rdi) 
-	movq  %rax,56(%rdi) 
-	leaq  64(%rdi),%rdi
-	jnz    .Lloop_64
-
-	/* Handle tail in loops. The loops should be faster than hard
-	   to predict jump tables. */ 
-	.p2align 4	   
-.Lhandle_tail:
-	movl	%r11d,%ecx
-	andl    $63&(~7),%ecx
-	jz 		.Lhandle_7
-	shrl	$3,%ecx
-	.p2align 4
-.Lloop_8:
-	decl   %ecx
-	movq  %rax,(%rdi)
-	leaq  8(%rdi),%rdi
-	jnz    .Lloop_8
-
-.Lhandle_7:
-	movl	%r11d,%ecx
-	andl	$7,%ecx
-	jz      .Lende
-	.p2align 4
-.Lloop_1:
-	decl    %ecx
-	movb 	%al,(%rdi)
-	leaq	1(%rdi),%rdi
-	jnz     .Lloop_1
-	
-.Lende:	
-	movq	%r10,%rax
-	ret
-
-.Lbad_alignment:
-	cmpq $7,%r11
-	jbe	.Lhandle_7
-	movq %rax,(%rdi)	/* unaligned store */
-	movq $8,%r8			
-	subq %r9,%r8 
-	addq %r8,%rdi
-	subq %r8,%r11
-	jmp .Lafter_bad_alignment
-
-	/* C stepping K8 run faster using the string instructions.
-	   It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>	
-		
-	.section .altinstructions,"a"
-	.align 8
-	.quad  memset
-	.quad  memset_c
-	.byte  X86_FEATURE_K8_C
-	.byte  memset_c_end-memset_c
-	.byte  memset_c_end-memset_c
-	.previous
-
-	.section .altinstr_replacement,"ax"
- /* rdi	destination
-  * rsi value
-  * rdx count
-  */			
-memset_c:	
 	movq %rdi,%r9
 	movq %rdi,%r9
 	movl %edx,%r8d
 	movl %edx,%r8d
 	andl $7,%r8d		
 	andl $7,%r8d		
@@ -121,5 +29,3 @@ memset_c:
 	stosb
 	stosb
 	movq %r9,%rax
 	movq %r9,%rax
 	ret
 	ret
-memset_c_end:
-	.previous

+ 7 - 12
arch/x86_64/mm/fault.c

@@ -308,18 +308,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	unsigned long flags;
 	unsigned long flags;
 	siginfo_t info;
 	siginfo_t info;
 
 
-#ifdef CONFIG_CHECKING
-	{ 
-		unsigned long gs; 
-		struct x8664_pda *pda = cpu_pda + stack_smp_processor_id(); 
-		rdmsrl(MSR_GS_BASE, gs); 
-		if (gs != (unsigned long)pda) { 
-			wrmsrl(MSR_GS_BASE, pda); 
-			printk("page_fault: wrong gs %lx expected %p\n", gs, pda);
-		}
-	}
-#endif
-
 	/* get the address */
 	/* get the address */
 	__asm__("movq %%cr2,%0":"=r" (address));
 	__asm__("movq %%cr2,%0":"=r" (address));
 	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
 	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
@@ -571,3 +559,10 @@ do_sigbus:
 	force_sig_info(SIGBUS, &info, tsk);
 	force_sig_info(SIGBUS, &info, tsk);
 	return;
 	return;
 }
 }
+
+static int __init enable_pagefaulttrace(char *str)
+{
+	page_fault_trace = 1;
+	return 0;
+}
+__setup("pagefaulttrace", enable_pagefaulttrace);

+ 79 - 50
arch/x86_64/mm/init.c

@@ -22,6 +22,7 @@
 #include <linux/pagemap.h>
 #include <linux/pagemap.h>
 #include <linux/bootmem.h>
 #include <linux/bootmem.h>
 #include <linux/proc_fs.h>
 #include <linux/proc_fs.h>
+#include <linux/pci.h>
 
 
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/system.h>
@@ -36,16 +37,13 @@
 #include <asm/mmu_context.h>
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm/smp.h>
 #include <asm/smp.h>
+#include <asm/sections.h>
 
 
 #ifndef Dprintk
 #ifndef Dprintk
 #define Dprintk(x...)
 #define Dprintk(x...)
 #endif
 #endif
 
 
-#ifdef CONFIG_GART_IOMMU
-extern int swiotlb;
-#endif
-
-extern char _stext[];
+static unsigned long dma_reserve __initdata;
 
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 
@@ -86,9 +84,6 @@ void show_mem(void)
 
 
 /* References to section boundaries */
 /* References to section boundaries */
 
 
-extern char _text, _etext, _edata, __bss_start, _end[];
-extern char __init_begin, __init_end;
-
 int after_bootmem;
 int after_bootmem;
 
 
 static void *spp_getpage(void)
 static void *spp_getpage(void)
@@ -308,42 +303,81 @@ void __init init_memory_mapping(unsigned long start, unsigned long end)
 	       table_end<<PAGE_SHIFT);
 	       table_end<<PAGE_SHIFT);
 }
 }
 
 
-extern struct x8664_pda cpu_pda[NR_CPUS];
+void __cpuinit zap_low_mappings(int cpu)
+{
+	if (cpu == 0) {
+		pgd_t *pgd = pgd_offset_k(0UL);
+		pgd_clear(pgd);
+	} else {
+		/*
+		 * For AP's, zap the low identity mappings by changing the cr3
+		 * to init_level4_pgt and doing local flush tlb all
+		 */
+		asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
+	}
+	__flush_tlb_all();
+}
 
 
-/* Assumes all CPUs still execute in init_mm */
-void zap_low_mappings(void)
+/* Compute zone sizes for the DMA and DMA32 zones in a node. */
+__init void
+size_zones(unsigned long *z, unsigned long *h,
+	   unsigned long start_pfn, unsigned long end_pfn)
 {
 {
-	pgd_t *pgd = pgd_offset_k(0UL);
-	pgd_clear(pgd);
-	flush_tlb_all();
+ 	int i;
+ 	unsigned long w;
+
+ 	for (i = 0; i < MAX_NR_ZONES; i++)
+ 		z[i] = 0;
+
+ 	if (start_pfn < MAX_DMA_PFN)
+ 		z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
+ 	if (start_pfn < MAX_DMA32_PFN) {
+ 		unsigned long dma32_pfn = MAX_DMA32_PFN;
+ 		if (dma32_pfn > end_pfn)
+ 			dma32_pfn = end_pfn;
+ 		z[ZONE_DMA32] = dma32_pfn - start_pfn;
+ 	}
+ 	z[ZONE_NORMAL] = end_pfn - start_pfn;
+
+ 	/* Remove lower zones from higher ones. */
+ 	w = 0;
+ 	for (i = 0; i < MAX_NR_ZONES; i++) {
+ 		if (z[i])
+ 			z[i] -= w;
+ 	        w += z[i];
+	}
+
+	/* Compute holes */
+	w = 0;
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		unsigned long s = w;
+		w += z[i];
+		h[i] = e820_hole_size(s, w);
+	}
+
+	/* Add the space pace needed for mem_map to the holes too. */
+	for (i = 0; i < MAX_NR_ZONES; i++)
+		h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
+
+	/* The 16MB DMA zone has the kernel and other misc mappings.
+ 	   Account them too */
+	if (h[ZONE_DMA]) {
+		h[ZONE_DMA] += dma_reserve;
+		if (h[ZONE_DMA] >= z[ZONE_DMA]) {
+			printk(KERN_WARNING
+				"Kernel too large and filling up ZONE_DMA?\n");
+			h[ZONE_DMA] = z[ZONE_DMA];
+		}
+	}
 }
 }
 
 
 #ifndef CONFIG_NUMA
 #ifndef CONFIG_NUMA
 void __init paging_init(void)
 void __init paging_init(void)
 {
 {
-	{
-		unsigned long zones_size[MAX_NR_ZONES];
-		unsigned long holes[MAX_NR_ZONES];
-		unsigned int max_dma;
-
-		memset(zones_size, 0, sizeof(zones_size));
-		memset(holes, 0, sizeof(holes));
-
-		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
-		if (end_pfn < max_dma) {
-			zones_size[ZONE_DMA] = end_pfn;
-			holes[ZONE_DMA] = e820_hole_size(0, end_pfn);
-		} else {
-			zones_size[ZONE_DMA] = max_dma;
-			holes[ZONE_DMA] = e820_hole_size(0, max_dma);
-			zones_size[ZONE_NORMAL] = end_pfn - max_dma;
-			holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn);
-		}
-		free_area_init_node(0, NODE_DATA(0), zones_size,
-                        __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
-	}
-	return;
+	unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
+	size_zones(zones, holes, 0, end_pfn);
+	free_area_init_node(0, NODE_DATA(0), zones,
+			    __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
 }
 }
 #endif
 #endif
 
 
@@ -438,19 +472,16 @@ void __init mem_init(void)
 		datasize >> 10,
 		datasize >> 10,
 		initsize >> 10);
 		initsize >> 10);
 
 
+#ifdef CONFIG_SMP
 	/*
 	/*
-	 * Subtle. SMP is doing its boot stuff late (because it has to
-	 * fork idle threads) - but it also needs low mappings for the
-	 * protected-mode entry to work. We zap these entries only after
-	 * the WP-bit has been tested.
+	 * Sync boot_level4_pgt mappings with the init_level4_pgt
+	 * except for the low identity mappings which are already zapped
+	 * in init_level4_pgt. This sync-up is essential for AP's bringup
 	 */
 	 */
-#ifndef CONFIG_SMP
-	zap_low_mappings();
+	memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
 #endif
 #endif
 }
 }
 
 
-extern char __initdata_begin[], __initdata_end[];
-
 void free_initmem(void)
 void free_initmem(void)
 {
 {
 	unsigned long addr;
 	unsigned long addr;
@@ -464,7 +495,7 @@ void free_initmem(void)
 		totalram_pages++;
 		totalram_pages++;
 	}
 	}
 	memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
 	memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
-	printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
+	printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
 }
 }
 
 
 #ifdef CONFIG_BLK_DEV_INITRD
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -491,6 +522,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 #else       		
 #else       		
 	reserve_bootmem(phys, len);    
 	reserve_bootmem(phys, len);    
 #endif
 #endif
+	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
+		dma_reserve += len / PAGE_SIZE;
 }
 }
 
 
 int kern_addr_valid(unsigned long addr) 
 int kern_addr_valid(unsigned long addr) 
@@ -532,10 +565,6 @@ extern int exception_trace, page_fault_trace;
 static ctl_table debug_table2[] = {
 static ctl_table debug_table2[] = {
 	{ 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
 	{ 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
 	  proc_dointvec },
 	  proc_dointvec },
-#ifdef CONFIG_CHECKING
-	{ 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL,
-	  proc_dointvec },
-#endif
 	{ 0, }
 	{ 0, }
 }; 
 }; 
 
 

+ 1 - 0
arch/x86_64/mm/k8topology.c

@@ -108,6 +108,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
 		limit >>= 16; 
 		limit >>= 16; 
 		limit <<= 24; 
 		limit <<= 24; 
 		limit |= (1<<24)-1;
 		limit |= (1<<24)-1;
+		limit++;
 
 
 		if (limit > end_pfn << PAGE_SHIFT)
 		if (limit > end_pfn << PAGE_SHIFT)
 			limit = end_pfn << PAGE_SHIFT;
 			limit = end_pfn << PAGE_SHIFT;

+ 73 - 49
arch/x86_64/mm/numa.c

@@ -38,38 +38,57 @@ cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
 
 
 int numa_off __initdata;
 int numa_off __initdata;
 
 
-int __init compute_hash_shift(struct node *nodes, int numnodes)
+
+/*
+ * Given a shift value, try to populate memnodemap[]
+ * Returns :
+ * 1 if OK
+ * 0 if memnodmap[] too small (of shift too small)
+ * -1 if node overlap or lost ram (shift too big)
+ */
+static int __init populate_memnodemap(
+	const struct node *nodes, int numnodes, int shift)
 {
 {
 	int i; 
 	int i; 
-	int shift = 20;
-	unsigned long addr,maxend=0;
-	
-	for (i = 0; i < numnodes; i++)
-		if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend))
-				maxend = nodes[i].end;
+	int res = -1;
+	unsigned long addr, end;
 
 
-	while ((1UL << shift) <  (maxend / NODEMAPSIZE))
-		shift++;
-
-	printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n",
-			shift,maxend);
-	memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE);
+	memset(memnodemap, 0xff, sizeof(memnodemap));
 	for (i = 0; i < numnodes; i++) {
 	for (i = 0; i < numnodes; i++) {
-		if (nodes[i].start == nodes[i].end)
+		addr = nodes[i].start;
+		end = nodes[i].end;
+		if (addr >= end)
 			continue;
 			continue;
-		for (addr = nodes[i].start;
-		     addr < nodes[i].end;
-		     addr += (1UL << shift)) {
-			if (memnodemap[addr >> shift] != 0xff) {
-				printk(KERN_INFO
-	"Your memory is not aligned you need to rebuild your kernel "
-	"with a bigger NODEMAPSIZE shift=%d adder=%lu\n",
-					shift,addr);
+		if ((end >> shift) >= NODEMAPSIZE)
+			return 0;
+		do {
+			if (memnodemap[addr >> shift] != 0xff)
 				return -1;
 				return -1;
-			} 
 			memnodemap[addr >> shift] = i;
 			memnodemap[addr >> shift] = i;
-		} 
+			addr += (1 << shift);
+		} while (addr < end);
+		res = 1;
 	} 
 	} 
+	return res;
+}
+
+int __init compute_hash_shift(struct node *nodes, int numnodes)
+{
+	int shift = 20;
+
+	while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0)
+		shift++;
+
+	printk(KERN_DEBUG "Using %d for the hash shift.\n",
+		shift);
+
+	if (populate_memnodemap(nodes, numnodes, shift) != 1) {
+		printk(KERN_INFO
+	"Your memory is not aligned you need to rebuild your kernel "
+	"with a bigger NODEMAPSIZE shift=%d\n",
+			shift);
+		return -1;
+	}
 	return shift;
 	return shift;
 }
 }
 
 
@@ -94,7 +113,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
 	start_pfn = start >> PAGE_SHIFT;
 	start_pfn = start >> PAGE_SHIFT;
 	end_pfn = end >> PAGE_SHIFT;
 	end_pfn = end >> PAGE_SHIFT;
 
 
-	memory_present(nodeid, start_pfn, end_pfn);
 	nodedata_phys = find_e820_area(start, end, pgdat_size); 
 	nodedata_phys = find_e820_area(start, end, pgdat_size); 
 	if (nodedata_phys == -1L) 
 	if (nodedata_phys == -1L) 
 		panic("Cannot find memory pgdat in node %d\n", nodeid);
 		panic("Cannot find memory pgdat in node %d\n", nodeid);
@@ -132,29 +150,14 @@ void __init setup_node_zones(int nodeid)
 	unsigned long start_pfn, end_pfn; 
 	unsigned long start_pfn, end_pfn; 
 	unsigned long zones[MAX_NR_ZONES];
 	unsigned long zones[MAX_NR_ZONES];
 	unsigned long holes[MAX_NR_ZONES];
 	unsigned long holes[MAX_NR_ZONES];
-	unsigned long dma_end_pfn;
 
 
-	memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); 
-	memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES);
+ 	start_pfn = node_start_pfn(nodeid);
+ 	end_pfn = node_end_pfn(nodeid);
 
 
-	start_pfn = node_start_pfn(nodeid);
-	end_pfn = node_end_pfn(nodeid);
+	Dprintk(KERN_INFO "setting up node %d %lx-%lx\n",
+		nodeid, start_pfn, end_pfn);
 
 
-	Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn);
-	
-	/* All nodes > 0 have a zero length zone DMA */ 
-	dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; 
-	if (start_pfn < dma_end_pfn) { 
-		zones[ZONE_DMA] = dma_end_pfn - start_pfn;
-		holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn);
-		zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; 
-		holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn);
-
-	} else { 
-		zones[ZONE_NORMAL] = end_pfn - start_pfn; 
-		holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn);
-	} 
-    
+	size_zones(zones, holes, start_pfn, end_pfn);
 	free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
 	free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
 			    start_pfn, holes);
 			    start_pfn, holes);
 } 
 } 
@@ -171,7 +174,7 @@ void __init numa_init_array(void)
 	for (i = 0; i < NR_CPUS; i++) {
 	for (i = 0; i < NR_CPUS; i++) {
 		if (cpu_to_node[i] != NUMA_NO_NODE)
 		if (cpu_to_node[i] != NUMA_NO_NODE)
 			continue;
 			continue;
-		cpu_to_node[i] = rr;
+ 		numa_set_node(i, rr);
 		rr = next_node(rr, node_online_map);
 		rr = next_node(rr, node_online_map);
 		if (rr == MAX_NUMNODES)
 		if (rr == MAX_NUMNODES)
 			rr = first_node(node_online_map);
 			rr = first_node(node_online_map);
@@ -205,8 +208,6 @@ static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
  		if (i == numa_fake-1)
  		if (i == numa_fake-1)
  			sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
  			sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
  		nodes[i].end = nodes[i].start + sz;
  		nodes[i].end = nodes[i].start + sz;
- 		if (i != numa_fake-1)
- 			nodes[i].end--;
  		printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n",
  		printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n",
  		       i,
  		       i,
  		       nodes[i].start, nodes[i].end,
  		       nodes[i].start, nodes[i].end,
@@ -257,7 +258,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 	nodes_clear(node_online_map);
 	nodes_clear(node_online_map);
 	node_set_online(0);
 	node_set_online(0);
 	for (i = 0; i < NR_CPUS; i++)
 	for (i = 0; i < NR_CPUS; i++)
-		cpu_to_node[i] = 0;
+		numa_set_node(i, 0);
 	node_to_cpumask[0] = cpumask_of_cpu(0);
 	node_to_cpumask[0] = cpumask_of_cpu(0);
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 }
 }
@@ -267,6 +268,12 @@ __cpuinit void numa_add_cpu(int cpu)
 	set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
 	set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
 } 
 } 
 
 
+void __cpuinit numa_set_node(int cpu, int node)
+{
+	cpu_pda[cpu].nodenumber = node;
+	cpu_to_node[cpu] = node;
+}
+
 unsigned long __init numa_free_all_bootmem(void) 
 unsigned long __init numa_free_all_bootmem(void) 
 { 
 { 
 	int i;
 	int i;
@@ -277,9 +284,26 @@ unsigned long __init numa_free_all_bootmem(void)
 	return pages;
 	return pages;
 } 
 } 
 
 
+#ifdef CONFIG_SPARSEMEM
+static void __init arch_sparse_init(void)
+{
+	int i;
+
+	for_each_online_node(i)
+		memory_present(i, node_start_pfn(i), node_end_pfn(i));
+
+	sparse_init();
+}
+#else
+#define arch_sparse_init() do {} while (0)
+#endif
+
 void __init paging_init(void)
 void __init paging_init(void)
 { 
 { 
 	int i;
 	int i;
+
+	arch_sparse_init();
+
 	for_each_online_node(i) {
 	for_each_online_node(i) {
 		setup_node_zones(i); 
 		setup_node_zones(i); 
 	}
 	}

+ 1 - 5
arch/x86_64/mm/srat.c

@@ -71,8 +71,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end)
 			nd->start = nd->end;
 			nd->start = nd->end;
 	}
 	}
 	if (nd->end > end) {
 	if (nd->end > end) {
-		if (!(end & 0xfff))
-			end--;
 		nd->end = end;
 		nd->end = end;
 		if (nd->start > nd->end)
 		if (nd->start > nd->end)
 			nd->start = nd->end;
 			nd->start = nd->end;
@@ -166,8 +164,6 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
 		if (nd->end < end)
 		if (nd->end < end)
 			nd->end = end;
 			nd->end = end;
 	}
 	}
-	if (!(nd->end & 0xfff))
-		nd->end--;
 	printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
 	printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
 	       nd->start, nd->end);
 	       nd->start, nd->end);
 }
 }
@@ -203,7 +199,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 		if (cpu_to_node[i] == NUMA_NO_NODE)
 		if (cpu_to_node[i] == NUMA_NO_NODE)
 			continue;
 			continue;
 		if (!node_isset(cpu_to_node[i], nodes_parsed))
 		if (!node_isset(cpu_to_node[i], nodes_parsed))
-			cpu_to_node[i] = NUMA_NO_NODE; 
+			numa_set_node(i, NUMA_NO_NODE);
 	}
 	}
 	numa_init_array();
 	numa_init_array();
 	return 0;
 	return 0;

+ 14 - 3
drivers/char/agp/amd64-agp.c

@@ -57,9 +57,8 @@ static int nr_garts;
 static struct pci_dev * hammers[MAX_HAMMER_GARTS];
 static struct pci_dev * hammers[MAX_HAMMER_GARTS];
 
 
 static struct resource *aperture_resource;
 static struct resource *aperture_resource;
-static int __initdata agp_try_unsupported;
+static int __initdata agp_try_unsupported = 1;
 
 
-static int gart_iterator;
 #define for_each_nb() for(gart_iterator=0;gart_iterator<nr_garts;gart_iterator++)
 #define for_each_nb() for(gart_iterator=0;gart_iterator<nr_garts;gart_iterator++)
 
 
 static void flush_amd64_tlb(struct pci_dev *dev)
 static void flush_amd64_tlb(struct pci_dev *dev)
@@ -73,6 +72,7 @@ static void flush_amd64_tlb(struct pci_dev *dev)
 
 
 static void amd64_tlbflush(struct agp_memory *temp)
 static void amd64_tlbflush(struct agp_memory *temp)
 {
 {
+	int gart_iterator;
 	for_each_nb()
 	for_each_nb()
 		flush_amd64_tlb(hammers[gart_iterator]);
 		flush_amd64_tlb(hammers[gart_iterator]);
 }
 }
@@ -222,6 +222,7 @@ static struct aper_size_info_32 amd_8151_sizes[7] =
 static int amd_8151_configure(void)
 static int amd_8151_configure(void)
 {
 {
 	unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real);
 	unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real);
+	int gart_iterator;
 
 
 	/* Configure AGP regs in each x86-64 host bridge. */
 	/* Configure AGP regs in each x86-64 host bridge. */
 	for_each_nb() {
 	for_each_nb() {
@@ -235,7 +236,7 @@ static int amd_8151_configure(void)
 static void amd64_cleanup(void)
 static void amd64_cleanup(void)
 {
 {
 	u32 tmp;
 	u32 tmp;
-
+	int gart_iterator;
 	for_each_nb() {
 	for_each_nb() {
 		/* disable gart translation */
 		/* disable gart translation */
 		pci_read_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, &tmp);
 		pci_read_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, &tmp);
@@ -697,6 +698,16 @@ static struct pci_device_id agp_amd64_pci_table[] = {
 	.subvendor	= PCI_ANY_ID,
 	.subvendor	= PCI_ANY_ID,
 	.subdevice	= PCI_ANY_ID,
 	.subdevice	= PCI_ANY_ID,
 	},
 	},
+	/* ALI/ULI M1695 */
+	{
+	.class		= (PCI_CLASS_BRIDGE_HOST << 8),
+	.class_mask	= ~0,
+	.vendor		= PCI_VENDOR_ID_AL,
+	.device		= 0x1689,
+	.subvendor	= PCI_ANY_ID,
+	.subdevice	= PCI_ANY_ID,
+	},
+
 	{ }
 	{ }
 };
 };
 
 

+ 1 - 0
include/asm-generic/sections.h

@@ -13,5 +13,6 @@ extern char _eextratext[] __attribute__((weak));
 extern char _end[];
 extern char _end[];
 extern char __per_cpu_start[], __per_cpu_end[];
 extern char __per_cpu_start[], __per_cpu_end[];
 extern char __kprobes_text_start[], __kprobes_text_end[];
 extern char __kprobes_text_start[], __kprobes_text_end[];
+extern char __initdata_begin[], __initdata_end[];
 
 
 #endif /* _ASM_GENERIC_SECTIONS_H_ */
 #endif /* _ASM_GENERIC_SECTIONS_H_ */

+ 1 - 1
include/asm-i386/mach-default/mach_reboot.h

@@ -19,7 +19,7 @@ static inline void kb_wait(void)
 static inline void mach_reboot(void)
 static inline void mach_reboot(void)
 {
 {
 	int i;
 	int i;
-	for (i = 0; i < 100; i++) {
+	for (i = 0; i < 10; i++) {
 		kb_wait();
 		kb_wait();
 		udelay(50);
 		udelay(50);
 		outb(0x60, 0x64);	/* write Controller Command Byte */
 		outb(0x60, 0x64);	/* write Controller Command Byte */

+ 3 - 1
include/asm-i386/processor.h

@@ -65,7 +65,9 @@ struct cpuinfo_x86 {
 	int	f00f_bug;
 	int	f00f_bug;
 	int	coma_bug;
 	int	coma_bug;
 	unsigned long loops_per_jiffy;
 	unsigned long loops_per_jiffy;
-	unsigned char x86_num_cores;
+	unsigned char x86_max_cores;	/* cpuid returned max cores value */
+	unsigned char booted_cores;	/* number of cores as seen by OS */
+	unsigned char apicid;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 
 #define X86_VENDOR_INTEL 0
 #define X86_VENDOR_INTEL 0

+ 2 - 0
include/asm-x86_64/apic.h

@@ -111,6 +111,8 @@ extern unsigned int nmi_watchdog;
 
 
 extern int disable_timer_pin_1;
 extern int disable_timer_pin_1;
 
 
+extern void setup_threshold_lvt(unsigned long lvt_off);
+
 #endif /* CONFIG_X86_LOCAL_APIC */
 #endif /* CONFIG_X86_LOCAL_APIC */
 
 
 extern unsigned boot_cpu_id;
 extern unsigned boot_cpu_id;

+ 1 - 1
include/asm-x86_64/cache.h

@@ -9,6 +9,6 @@
 /* L1 cache line size */
 /* L1 cache line size */
 #define L1_CACHE_SHIFT	(CONFIG_X86_L1_CACHE_SHIFT)
 #define L1_CACHE_SHIFT	(CONFIG_X86_L1_CACHE_SHIFT)
 #define L1_CACHE_BYTES	(1 << L1_CACHE_SHIFT)
 #define L1_CACHE_BYTES	(1 << L1_CACHE_SHIFT)
-#define L1_CACHE_SHIFT_MAX 6	/* largest L1 which this arch supports */
+#define L1_CACHE_SHIFT_MAX 7	/* largest L1 which this arch supports */
 
 
 #endif
 #endif

+ 3 - 0
include/asm-x86_64/desc.h

@@ -98,16 +98,19 @@ static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsig
 
 
 static inline void set_intr_gate(int nr, void *func) 
 static inline void set_intr_gate(int nr, void *func) 
 { 
 { 
+	BUG_ON((unsigned)nr > 0xFF);
 	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); 
 	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); 
 } 
 } 
 
 
 static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) 
 static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) 
 { 
 { 
+	BUG_ON((unsigned)nr > 0xFF);
 	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); 
 	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); 
 } 
 } 
 
 
 static inline void set_system_gate(int nr, void *func) 
 static inline void set_system_gate(int nr, void *func) 
 { 
 { 
+	BUG_ON((unsigned)nr > 0xFF);
 	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); 
 	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); 
 } 
 } 
 
 

+ 9 - 2
include/asm-x86_64/dma.h

@@ -72,8 +72,15 @@
 
 
 #define MAX_DMA_CHANNELS	8
 #define MAX_DMA_CHANNELS	8
 
 
-/* The maximum address that we can perform a DMA transfer to on this platform */
-#define MAX_DMA_ADDRESS      (PAGE_OFFSET+0x1000000)
+
+/* 16MB ISA DMA zone */
+#define MAX_DMA_PFN   ((16*1024*1024) >> PAGE_SHIFT)
+
+/* 4GB broken PCI/AGP hardware bus master zone */
+#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT)
+
+/* Compat define for old dma zone */
+#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
 
 
 /* 8237 DMA controllers */
 /* 8237 DMA controllers */
 #define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
 #define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */

+ 21 - 14
include/asm-x86_64/hpet.h

@@ -14,18 +14,18 @@
 #define HPET_CFG	0x010
 #define HPET_CFG	0x010
 #define HPET_STATUS	0x020
 #define HPET_STATUS	0x020
 #define HPET_COUNTER	0x0f0
 #define HPET_COUNTER	0x0f0
-#define HPET_T0_CFG	0x100
-#define HPET_T0_CMP	0x108
-#define HPET_T0_ROUTE	0x110
-#define HPET_T1_CFG	0x120
-#define HPET_T1_CMP	0x128
-#define HPET_T1_ROUTE	0x130
-#define HPET_T2_CFG	0x140
-#define HPET_T2_CMP	0x148
-#define HPET_T2_ROUTE	0x150
+#define HPET_Tn_OFFSET	0x20
+#define HPET_Tn_CFG(n)	 (0x100 + (n) * HPET_Tn_OFFSET)
+#define HPET_Tn_ROUTE(n) (0x104 + (n) * HPET_Tn_OFFSET)
+#define HPET_Tn_CMP(n)	 (0x108 + (n) * HPET_Tn_OFFSET)
+#define HPET_T0_CFG	HPET_Tn_CFG(0)
+#define HPET_T0_CMP	HPET_Tn_CMP(0)
+#define HPET_T1_CFG	HPET_Tn_CFG(1)
+#define HPET_T1_CMP	HPET_Tn_CMP(1)
 
 
 #define HPET_ID_VENDOR	0xffff0000
 #define HPET_ID_VENDOR	0xffff0000
 #define HPET_ID_LEGSUP	0x00008000
 #define HPET_ID_LEGSUP	0x00008000
+#define HPET_ID_64BIT	0x00002000
 #define HPET_ID_NUMBER	0x00001f00
 #define HPET_ID_NUMBER	0x00001f00
 #define HPET_ID_REV	0x000000ff
 #define HPET_ID_REV	0x000000ff
 #define	HPET_ID_NUMBER_SHIFT	8
 #define	HPET_ID_NUMBER_SHIFT	8
@@ -38,11 +38,18 @@
 #define	HPET_LEGACY_8254	2
 #define	HPET_LEGACY_8254	2
 #define	HPET_LEGACY_RTC		8
 #define	HPET_LEGACY_RTC		8
 
 
-#define HPET_TN_ENABLE		0x004
-#define HPET_TN_PERIODIC	0x008
-#define HPET_TN_PERIODIC_CAP	0x010
-#define HPET_TN_SETVAL		0x040
-#define HPET_TN_32BIT		0x100
+#define HPET_TN_LEVEL		0x0002
+#define HPET_TN_ENABLE		0x0004
+#define HPET_TN_PERIODIC	0x0008
+#define HPET_TN_PERIODIC_CAP	0x0010
+#define HPET_TN_64BIT_CAP	0x0020
+#define HPET_TN_SETVAL		0x0040
+#define HPET_TN_32BIT		0x0100
+#define HPET_TN_ROUTE		0x3e00
+#define HPET_TN_FSB		0x4000
+#define HPET_TN_FSB_CAP		0x8000
+
+#define HPET_TN_ROUTE_SHIFT	9
 
 
 extern int is_hpet_enabled(void);
 extern int is_hpet_enabled(void);
 extern int hpet_rtc_timer_init(void);
 extern int hpet_rtc_timer_init(void);

+ 1 - 1
include/asm-x86_64/hw_irq.h

@@ -55,7 +55,7 @@ struct hw_interrupt_type;
 #define CALL_FUNCTION_VECTOR	0xfc
 #define CALL_FUNCTION_VECTOR	0xfc
 #define KDB_VECTOR		0xfb	/* reserved for KDB */
 #define KDB_VECTOR		0xfb	/* reserved for KDB */
 #define THERMAL_APIC_VECTOR	0xfa
 #define THERMAL_APIC_VECTOR	0xfa
-/* 0xf9 free */
+#define THRESHOLD_APIC_VECTOR   0xf9
 #define INVALIDATE_TLB_VECTOR_END	0xf8
 #define INVALIDATE_TLB_VECTOR_END	0xf8
 #define INVALIDATE_TLB_VECTOR_START	0xf0	/* f0-f8 used for TLB flush */
 #define INVALIDATE_TLB_VECTOR_START	0xf0	/* f0-f8 used for TLB flush */
 
 

+ 5 - 0
include/asm-x86_64/ia32.h

@@ -165,6 +165,11 @@ struct siginfo_t;
 int do_get_thread_area(struct thread_struct *t, struct user_desc __user *info);
 int do_get_thread_area(struct thread_struct *t, struct user_desc __user *info);
 int do_set_thread_area(struct thread_struct *t, struct user_desc __user *info);
 int do_set_thread_area(struct thread_struct *t, struct user_desc __user *info);
 int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs);
 int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs);
+
+struct linux_binprm;
+extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
+				unsigned long stack_top, int exec_stack);
+
 #endif
 #endif
 
 
 #endif /* !CONFIG_IA32_SUPPORT */
 #endif /* !CONFIG_IA32_SUPPORT */

+ 10 - 0
include/asm-x86_64/mce.h

@@ -67,6 +67,8 @@ struct mce_log {
 /* Software defined banks */
 /* Software defined banks */
 #define MCE_EXTENDED_BANK	128
 #define MCE_EXTENDED_BANK	128
 #define MCE_THERMAL_BANK	MCE_EXTENDED_BANK + 0
 #define MCE_THERMAL_BANK	MCE_EXTENDED_BANK + 0
+#define MCE_THRESHOLD_BASE      MCE_EXTENDED_BANK + 1 /* MCE_AMD */
+#define MCE_THRESHOLD_DRAM_ECC  MCE_THRESHOLD_BASE + 4
 
 
 void mce_log(struct mce *m);
 void mce_log(struct mce *m);
 #ifdef CONFIG_X86_MCE_INTEL
 #ifdef CONFIG_X86_MCE_INTEL
@@ -77,4 +79,12 @@ static inline void mce_intel_feature_init(struct cpuinfo_x86 *c)
 }
 }
 #endif
 #endif
 
 
+#ifdef CONFIG_X86_MCE_AMD
+void mce_amd_feature_init(struct cpuinfo_x86 *c);
+#else
+static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)
+{
+}
+#endif
+
 #endif
 #endif

+ 3 - 6
include/asm-x86_64/mmzone.h

@@ -17,16 +17,15 @@
 /* Simple perfect hash to map physical addresses to node numbers */
 /* Simple perfect hash to map physical addresses to node numbers */
 extern int memnode_shift; 
 extern int memnode_shift; 
 extern u8  memnodemap[NODEMAPSIZE]; 
 extern u8  memnodemap[NODEMAPSIZE]; 
-extern int maxnode;
 
 
 extern struct pglist_data *node_data[];
 extern struct pglist_data *node_data[];
 
 
 static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) 
 static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) 
 { 
 { 
-	int nid; 
+	unsigned nid; 
 	VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE);
 	VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE);
 	nid = memnodemap[addr >> memnode_shift]; 
 	nid = memnodemap[addr >> memnode_shift]; 
-	VIRTUAL_BUG_ON(nid > maxnode); 
+	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); 
 	return nid; 
 	return nid; 
 } 
 } 
 
 
@@ -41,9 +40,7 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
 #define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT)
 #define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT)
 #define kvaddr_to_nid(kaddr)	phys_to_nid(__pa(kaddr))
 #define kvaddr_to_nid(kaddr)	phys_to_nid(__pa(kaddr))
 
 
-/* AK: this currently doesn't deal with invalid addresses. We'll see 
-   if the 2.5 kernel doesn't pass them
-   (2.4 used to). */
+/* Requires pfn_valid(pfn) to be true */
 #define pfn_to_page(pfn) ({ \
 #define pfn_to_page(pfn) ({ \
 	int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); 	\
 	int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); 	\
 	((pfn) - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map;	\
 	((pfn) - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map;	\

+ 4 - 3
include/asm-x86_64/mpspec.h

@@ -16,7 +16,7 @@
 /*
 /*
  * A maximum of 255 APICs with the current APIC ID architecture.
  * A maximum of 255 APICs with the current APIC ID architecture.
  */
  */
-#define MAX_APICS 128
+#define MAX_APICS 255
 
 
 struct intel_mp_floating
 struct intel_mp_floating
 {
 {
@@ -157,7 +157,8 @@ struct mpc_config_lintsrc
  */
  */
 
 
 #define MAX_MP_BUSSES 256
 #define MAX_MP_BUSSES 256
-#define MAX_IRQ_SOURCES 256
+/* Each PCI slot may be a combo card with its own bus.  4 IRQ pins per slot. */
+#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
 enum mp_bustype {
 enum mp_bustype {
 	MP_BUS_ISA = 1,
 	MP_BUS_ISA = 1,
 	MP_BUS_EISA,
 	MP_BUS_EISA,
@@ -172,7 +173,7 @@ extern int smp_found_config;
 extern void find_smp_config (void);
 extern void find_smp_config (void);
 extern void get_smp_config (void);
 extern void get_smp_config (void);
 extern int nr_ioapics;
 extern int nr_ioapics;
-extern int apic_version [MAX_APICS];
+extern unsigned char apic_version [MAX_APICS];
 extern int mp_irq_entries;
 extern int mp_irq_entries;
 extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
 extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
 extern int mpc_default_type;
 extern int mpc_default_type;

+ 1 - 1
include/asm-x86_64/msr.h

@@ -19,7 +19,7 @@
 			    : "=a" (a__), "=d" (b__) \
 			    : "=a" (a__), "=d" (b__) \
 			    : "c" (msr)); \
 			    : "c" (msr)); \
        val = a__ | (b__<<32); \
        val = a__ | (b__<<32); \
-} while(0); 
+} while(0)
 
 
 #define wrmsr(msr,val1,val2) \
 #define wrmsr(msr,val1,val2) \
      __asm__ __volatile__("wrmsr" \
      __asm__ __volatile__("wrmsr" \

+ 2 - 0
include/asm-x86_64/numa.h

@@ -17,6 +17,8 @@ extern void numa_add_cpu(int cpu);
 extern void numa_init_array(void);
 extern void numa_init_array(void);
 extern int numa_off;
 extern int numa_off;
 
 
+extern void numa_set_node(int cpu, int node);
+
 extern unsigned char apicid_to_node[256];
 extern unsigned char apicid_to_node[256];
 
 
 #define NUMA_NO_NODE 0xff
 #define NUMA_NO_NODE 0xff

+ 1 - 1
include/asm-x86_64/page.h

@@ -11,7 +11,7 @@
 #define PAGE_SIZE	(1UL << PAGE_SHIFT)
 #define PAGE_SIZE	(1UL << PAGE_SHIFT)
 #endif
 #endif
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 #define PAGE_MASK	(~(PAGE_SIZE-1))
-#define PHYSICAL_PAGE_MASK	(~(PAGE_SIZE-1) & (__PHYSICAL_MASK << PAGE_SHIFT))
+#define PHYSICAL_PAGE_MASK	(~(PAGE_SIZE-1) & __PHYSICAL_MASK)
 
 
 #define THREAD_ORDER 1 
 #define THREAD_ORDER 1 
 #ifdef __ASSEMBLY__
 #ifdef __ASSEMBLY__

+ 1 - 0
include/asm-x86_64/pda.h

@@ -15,6 +15,7 @@ struct x8664_pda {
         int irqcount;		    /* Irq nesting counter. Starts with -1 */  	
         int irqcount;		    /* Irq nesting counter. Starts with -1 */  	
 	int cpunumber;		    /* Logical CPU number */
 	int cpunumber;		    /* Logical CPU number */
 	char *irqstackptr;	/* top of irqstack */
 	char *irqstackptr;	/* top of irqstack */
+	int nodenumber;		    /* number of current node */
 	unsigned int __softirq_pending;
 	unsigned int __softirq_pending;
 	unsigned int __nmi_count;	/* number of NMI on this CPUs */
 	unsigned int __nmi_count;	/* number of NMI on this CPUs */
 	struct mm_struct *active_mm;
 	struct mm_struct *active_mm;

+ 3 - 2
include/asm-x86_64/pgtable.h

@@ -16,6 +16,7 @@ extern pud_t level3_physmem_pgt[512];
 extern pud_t level3_ident_pgt[512];
 extern pud_t level3_ident_pgt[512];
 extern pmd_t level2_kernel_pgt[512];
 extern pmd_t level2_kernel_pgt[512];
 extern pgd_t init_level4_pgt[];
 extern pgd_t init_level4_pgt[];
+extern pgd_t boot_level4_pgt[];
 extern unsigned long __supported_pte_mask;
 extern unsigned long __supported_pte_mask;
 
 
 #define swapper_pg_dir init_level4_pgt
 #define swapper_pg_dir init_level4_pgt
@@ -247,7 +248,7 @@ static inline unsigned long pud_bad(pud_t pud)
 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))	/* FIXME: is this
 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))	/* FIXME: is this
 						   right? */
 						   right? */
 #define pte_page(x)	pfn_to_page(pte_pfn(x))
 #define pte_page(x)	pfn_to_page(pte_pfn(x))
-#define pte_pfn(x)  ((pte_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
+#define pte_pfn(x)  ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
 
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
 {
@@ -354,7 +355,7 @@ static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address)
 #define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
 #define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
 #define	pmd_bad(x)	((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE )
 #define	pmd_bad(x)	((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE )
 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
-#define pmd_pfn(x)  ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
+#define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
 
 #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
 #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
 #define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })
 #define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })

+ 3 - 1
include/asm-x86_64/processor.h

@@ -61,10 +61,12 @@ struct cpuinfo_x86 {
 	int	x86_cache_alignment;
 	int	x86_cache_alignment;
 	int	x86_tlbsize;	/* number of 4K pages in DTLB/ITLB combined(in pages)*/
 	int	x86_tlbsize;	/* number of 4K pages in DTLB/ITLB combined(in pages)*/
         __u8    x86_virt_bits, x86_phys_bits;
         __u8    x86_virt_bits, x86_phys_bits;
-	__u8	x86_num_cores;
+	__u8	x86_max_cores;	/* cpuid returned max cores value */
         __u32   x86_power; 	
         __u32   x86_power; 	
 	__u32   extended_cpuid_level;	/* Max extended CPUID function supported */
 	__u32   extended_cpuid_level;	/* Max extended CPUID function supported */
 	unsigned long loops_per_jiffy;
 	unsigned long loops_per_jiffy;
+	__u8	apicid;
+	__u8	booted_cores;	/* number of cores as seen by OS */
 } ____cacheline_aligned;
 } ____cacheline_aligned;
 
 
 #define X86_VENDOR_INTEL 0
 #define X86_VENDOR_INTEL 0

+ 4 - 0
include/asm-x86_64/proto.h

@@ -11,6 +11,8 @@ struct pt_regs;
 extern void start_kernel(void);
 extern void start_kernel(void);
 extern void pda_init(int); 
 extern void pda_init(int); 
 
 
+extern void zap_low_mappings(int cpu);
+
 extern void early_idt_handler(void);
 extern void early_idt_handler(void);
 
 
 extern void mcheck_init(struct cpuinfo_x86 *c);
 extern void mcheck_init(struct cpuinfo_x86 *c);
@@ -22,6 +24,8 @@ extern void mtrr_bp_init(void);
 #define mtrr_bp_init() do {} while (0)
 #define mtrr_bp_init() do {} while (0)
 #endif
 #endif
 extern void init_memory_mapping(unsigned long start, unsigned long end);
 extern void init_memory_mapping(unsigned long start, unsigned long end);
+extern void size_zones(unsigned long *z, unsigned long *h,
+			unsigned long start_pfn, unsigned long end_pfn);
 
 
 extern void system_call(void); 
 extern void system_call(void); 
 extern int kernel_syscall(void);
 extern int kernel_syscall(void);

+ 0 - 283
include/asm-x86_64/rwsem.h

@@ -1,283 +0,0 @@
-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for x86_64+
- *
- * Written by David Howells (dhowells@redhat.com).
- * Ported by Andi Kleen <ak@suse.de> to x86-64.
- *
- * Derived from asm-i386/semaphore.h and asm-i386/rwsem.h
- *
- *
- * The MSW of the count is the negated number of active writers and waiting
- * lockers, and the LSW is the total number of active locks
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
- * uncontended lock. This can be determined because XADD returns the old value.
- * Readers increment by 1 and see a positive value when uncontended, negative
- * if there are writers (and maybe) readers waiting (in which case it goes to
- * sleep).
- *
- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
- * be extended to 65534 by manually checking the whole MSW rather than relying
- * on the S flag.
- *
- * The value of ACTIVE_BIAS supports up to 65535 active processes.
- *
- * This should be totally fair - if anything is waiting, a process that wants a
- * lock will go to the back of the queue. When the currently active lock is
- * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consecutive readers at the
- * front, then they'll all be woken up, but no other readers will be.
- */
-
-#ifndef _X8664_RWSEM_H
-#define _X8664_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
-	signed int		count;
-#define RWSEM_UNLOCKED_VALUE		0x00000000
-#define RWSEM_ACTIVE_BIAS		0x00000001
-#define RWSEM_ACTIVE_MASK		0x0000ffff
-#define RWSEM_WAITING_BIAS		(-0x00010000)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-	spinlock_t		wait_lock;
-	struct list_head	wait_list;
-#if RWSEM_DEBUG
-	int			debug;
-#endif
-};
-
-/*
- * initialisation
- */
-#if RWSEM_DEBUG
-#define __RWSEM_DEBUG_INIT      , 0
-#else
-#define __RWSEM_DEBUG_INIT	/* */
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
-	__RWSEM_DEBUG_INIT }
-
-#define DECLARE_RWSEM(name) \
-	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
-	sem->count = RWSEM_UNLOCKED_VALUE;
-	spin_lock_init(&sem->wait_lock);
-	INIT_LIST_HEAD(&sem->wait_list);
-#if RWSEM_DEBUG
-	sem->debug = 0;
-#endif
-}
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-		"# beginning down_read\n\t"
-LOCK_PREFIX	"  incl      (%%rdi)\n\t" /* adds 0x00000001, returns the old value */
-		"  js        2f\n\t" /* jump if we weren't granted the lock */
-		"1:\n\t"
-		LOCK_SECTION_START("") \
-		"2:\n\t"
-		"  call      rwsem_down_read_failed_thunk\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END \
-		"# ending down_read\n\t"
-		: "+m"(sem->count)
-		: "D"(sem)
-		: "memory", "cc");
-}
-
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-	__s32 result, tmp;
-	__asm__ __volatile__(
-		"# beginning __down_read_trylock\n\t"
-		"  movl      %0,%1\n\t"
-		"1:\n\t"
-		"  movl	     %1,%2\n\t"
-		"  addl      %3,%2\n\t"
-		"  jle	     2f\n\t"
-LOCK_PREFIX	"  cmpxchgl  %2,%0\n\t"
-		"  jnz	     1b\n\t"
-		"2:\n\t"
-		"# ending __down_read_trylock\n\t"
-		: "+m"(sem->count), "=&a"(result), "=&r"(tmp)
-		: "i"(RWSEM_ACTIVE_READ_BIAS)
-		: "memory", "cc");
-	return result>=0 ? 1 : 0;
-}
-
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = RWSEM_ACTIVE_WRITE_BIAS;
-	__asm__ __volatile__(
-		"# beginning down_write\n\t"
-LOCK_PREFIX	"  xaddl      %0,(%%rdi)\n\t" /* subtract 0x0000ffff, returns the old value */
-		"  testl     %0,%0\n\t" /* was the count 0 before? */
-		"  jnz       2f\n\t" /* jump if we weren't granted the lock */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  call      rwsem_down_write_failed_thunk\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending down_write"
-		: "=&r" (tmp) 
-		: "0"(tmp), "D"(sem)
-		: "memory", "cc");
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-	signed long ret = cmpxchg(&sem->count,
-				  RWSEM_UNLOCKED_VALUE, 
-				  RWSEM_ACTIVE_WRITE_BIAS);
-	if (ret == RWSEM_UNLOCKED_VALUE)
-		return 1;
-	return 0;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	__s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
-	__asm__ __volatile__(
-		"# beginning __up_read\n\t"
-LOCK_PREFIX	"  xaddl      %[tmp],(%%rdi)\n\t" /* subtracts 1, returns the old value */
-		"  js        2f\n\t" /* jump if the lock is being waited upon */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  decw      %w[tmp]\n\t" /* do nothing if still outstanding active readers */
-		"  jnz       1b\n\t"
-		"  call      rwsem_wake_thunk\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending __up_read\n"
-		: "+m"(sem->count), [tmp] "+r" (tmp)
-		: "D"(sem)
-		: "memory", "cc");
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	unsigned tmp; 
-	__asm__ __volatile__(
-		"# beginning __up_write\n\t"
-		"  movl     %[bias],%[tmp]\n\t"
-LOCK_PREFIX	"  xaddl     %[tmp],(%%rdi)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
-		"  jnz       2f\n\t" /* jump if the lock is being waited upon */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  decw      %w[tmp]\n\t" /* did the active count reduce to 0? */
-		"  jnz       1b\n\t" /* jump back if not */
-		"  call      rwsem_wake_thunk\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending __up_write\n"
-		: "+m"(sem->count), [tmp] "=r" (tmp)
-		: "D"(sem), [bias] "i"(-RWSEM_ACTIVE_WRITE_BIAS)
-		: "memory", "cc");
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-		"# beginning __downgrade_write\n\t"
-LOCK_PREFIX	"  addl      %[bias],(%%rdi)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
-		"  js        2f\n\t" /* jump if the lock is being waited upon */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  call	     rwsem_downgrade_thunk\n"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending __downgrade_write\n"
-		: "=m"(sem->count)
-		: "D"(sem), [bias] "i"(-RWSEM_WAITING_BIAS), "m"(sem->count)
-		: "memory", "cc");
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-LOCK_PREFIX	"addl %1,%0"
-		:"=m"(sem->count)
-		:"ir"(delta), "m"(sem->count));
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
-{
-	int tmp = delta;
-
-	__asm__ __volatile__(
-LOCK_PREFIX	"xaddl %0,(%2)"
-		: "=r"(tmp), "=m"(sem->count)
-		: "r"(sem), "m"(sem->count), "0" (tmp)
-		: "memory");
-
-	return tmp+delta;
-}
-
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
-	return (sem->count != 0);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _X8664_RWSEM_H */

+ 2 - 1
include/asm-x86_64/smp.h

@@ -47,7 +47,6 @@ extern void lock_ipi_call_lock(void);
 extern void unlock_ipi_call_lock(void);
 extern void unlock_ipi_call_lock(void);
 extern int smp_num_siblings;
 extern int smp_num_siblings;
 extern void smp_send_reschedule(int cpu);
 extern void smp_send_reschedule(int cpu);
-extern void zap_low_mappings(void);
 void smp_stop_cpu(void);
 void smp_stop_cpu(void);
 extern int smp_call_function_single(int cpuid, void (*func) (void *info),
 extern int smp_call_function_single(int cpuid, void (*func) (void *info),
 				void *info, int retry, int wait);
 				void *info, int retry, int wait);
@@ -82,6 +81,8 @@ extern int safe_smp_processor_id(void);
 extern int __cpu_disable(void);
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
 extern void __cpu_die(unsigned int cpu);
 extern void prefill_possible_map(void);
 extern void prefill_possible_map(void);
+extern unsigned num_processors;
+extern unsigned disabled_cpus;
 
 
 #endif /* !ASSEMBLY */
 #endif /* !ASSEMBLY */
 
 

+ 6 - 6
include/asm-x86_64/spinlock.h

@@ -18,22 +18,22 @@
  */
  */
 
 
 #define __raw_spin_is_locked(x) \
 #define __raw_spin_is_locked(x) \
-		(*(volatile signed char *)(&(x)->slock) <= 0)
+		(*(volatile signed int *)(&(x)->slock) <= 0)
 
 
 #define __raw_spin_lock_string \
 #define __raw_spin_lock_string \
 	"\n1:\t" \
 	"\n1:\t" \
-	"lock ; decb %0\n\t" \
+	"lock ; decl %0\n\t" \
 	"js 2f\n" \
 	"js 2f\n" \
 	LOCK_SECTION_START("") \
 	LOCK_SECTION_START("") \
 	"2:\t" \
 	"2:\t" \
 	"rep;nop\n\t" \
 	"rep;nop\n\t" \
-	"cmpb $0,%0\n\t" \
+	"cmpl $0,%0\n\t" \
 	"jle 2b\n\t" \
 	"jle 2b\n\t" \
 	"jmp 1b\n" \
 	"jmp 1b\n" \
 	LOCK_SECTION_END
 	LOCK_SECTION_END
 
 
 #define __raw_spin_unlock_string \
 #define __raw_spin_unlock_string \
-	"movb $1,%0" \
+	"movl $1,%0" \
 		:"=m" (lock->slock) : : "memory"
 		:"=m" (lock->slock) : : "memory"
 
 
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
@@ -47,10 +47,10 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
 {
-	char oldval;
+	int oldval;
 
 
 	__asm__ __volatile__(
 	__asm__ __volatile__(
-		"xchgb %b0,%1"
+		"xchgl %0,%1"
 		:"=q" (oldval), "=m" (lock->slock)
 		:"=q" (oldval), "=m" (lock->slock)
 		:"0" (0) : "memory");
 		:"0" (0) : "memory");
 
 

+ 2 - 0
include/asm-x86_64/topology.h

@@ -28,6 +28,8 @@ extern int __node_distance(int, int);
 #define pcibus_to_node(bus)		((long)(bus->sysdata))	
 #define pcibus_to_node(bus)		((long)(bus->sysdata))	
 #define pcibus_to_cpumask(bus)		node_to_cpumask(pcibus_to_node(bus));
 #define pcibus_to_cpumask(bus)		node_to_cpumask(pcibus_to_node(bus));
 
 
+#define numa_node_id()			read_pda(nodenumber)
+
 /* sched_domains SD_NODE_INIT for x86_64 machines */
 /* sched_domains SD_NODE_INIT for x86_64 machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
 #define SD_NODE_INIT (struct sched_domain) {		\
 	.span			= CPU_MASK_NONE,	\
 	.span			= CPU_MASK_NONE,	\

+ 2 - 1
include/asm-x86_64/unistd.h

@@ -462,7 +462,7 @@ __SYSCALL(__NR_fremovexattr, sys_fremovexattr)
 #define __NR_tkill	200
 #define __NR_tkill	200
 __SYSCALL(__NR_tkill, sys_tkill) 
 __SYSCALL(__NR_tkill, sys_tkill) 
 #define __NR_time      201
 #define __NR_time      201
-__SYSCALL(__NR_time, sys_time64)
+__SYSCALL(__NR_time, sys_time)
 #define __NR_futex     202
 #define __NR_futex     202
 __SYSCALL(__NR_futex, sys_futex)
 __SYSCALL(__NR_futex, sys_futex)
 #define __NR_sched_setaffinity    203
 #define __NR_sched_setaffinity    203
@@ -608,6 +608,7 @@ do { \
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGACTION
+#define __ARCH_WANT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_TIME
 #endif
 #endif
 
 

+ 10 - 0
include/linux/bitops.h

@@ -84,6 +84,16 @@ static __inline__ int get_bitmask_order(unsigned int count)
 	return order;	/* We could be slightly more clever with -1 here... */
 	return order;	/* We could be slightly more clever with -1 here... */
 }
 }
 
 
+static __inline__ int get_count_order(unsigned int count)
+{
+	int order;
+	
+	order = fls(count) - 1;
+	if (count & (count - 1))
+		order++;
+	return order;
+}
+
 /*
 /*
  * hweightN: returns the hamming weight (i.e. the number
  * hweightN: returns the hamming weight (i.e. the number
  * of bits set) of a N-bit word
  * of bits set) of a N-bit word

+ 11 - 0
include/linux/gfp.h

@@ -14,6 +14,13 @@ struct vm_area_struct;
 /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
 /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
 #define __GFP_DMA	((__force gfp_t)0x01u)
 #define __GFP_DMA	((__force gfp_t)0x01u)
 #define __GFP_HIGHMEM	((__force gfp_t)0x02u)
 #define __GFP_HIGHMEM	((__force gfp_t)0x02u)
+#ifdef CONFIG_DMA_IS_DMA32
+#define __GFP_DMA32	((__force gfp_t)0x01)	/* ZONE_DMA is ZONE_DMA32 */
+#elif BITS_PER_LONG < 64
+#define __GFP_DMA32	((__force gfp_t)0x00)	/* ZONE_NORMAL is ZONE_DMA32 */
+#else
+#define __GFP_DMA32	((__force gfp_t)0x04)	/* Has own ZONE_DMA32 */
+#endif
 
 
 /*
 /*
  * Action modifiers - doesn't change the zoning
  * Action modifiers - doesn't change the zoning
@@ -63,6 +70,10 @@ struct vm_area_struct;
 
 
 #define GFP_DMA		__GFP_DMA
 #define GFP_DMA		__GFP_DMA
 
 
+/* 4GB DMA on some platforms */
+#define GFP_DMA32	__GFP_DMA32
+
+
 #define gfp_zone(mask) ((__force int)((mask) & (__force gfp_t)GFP_ZONEMASK))
 #define gfp_zone(mask) ((__force int)((mask) & (__force gfp_t)GFP_ZONEMASK))
 
 
 /*
 /*

+ 2 - 8
include/linux/mm.h

@@ -206,12 +206,6 @@ struct vm_operations_struct {
 struct mmu_gather;
 struct mmu_gather;
 struct inode;
 struct inode;
 
 
-#ifdef ARCH_HAS_ATOMIC_UNSIGNED
-typedef unsigned page_flags_t;
-#else
-typedef unsigned long page_flags_t;
-#endif
-
 /*
 /*
  * Each physical page in the system has a struct page associated with
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
  * it to keep track of whatever it is we are using the page for at the
@@ -219,7 +213,7 @@ typedef unsigned long page_flags_t;
  * a page.
  * a page.
  */
  */
 struct page {
 struct page {
-	page_flags_t flags;		/* Atomic flags, some possibly
+	unsigned long flags;		/* Atomic flags, some possibly
 					 * updated asynchronously */
 					 * updated asynchronously */
 	atomic_t _count;		/* Usage count, see below. */
 	atomic_t _count;		/* Usage count, see below. */
 	atomic_t _mapcount;		/* Count of ptes mapped in mms,
 	atomic_t _mapcount;		/* Count of ptes mapped in mms,
@@ -435,7 +429,7 @@ static inline void put_page(struct page *page)
 #endif
 #endif
 
 
 /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */
 /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */
-#define SECTIONS_PGOFF		((sizeof(page_flags_t)*8) - SECTIONS_WIDTH)
+#define SECTIONS_PGOFF		((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
 #define NODES_PGOFF		(SECTIONS_PGOFF - NODES_WIDTH)
 #define NODES_PGOFF		(SECTIONS_PGOFF - NODES_WIDTH)
 #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
 #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
 
 

+ 12 - 8
include/linux/mmzone.h

@@ -71,10 +71,11 @@ struct per_cpu_pageset {
 #endif
 #endif
 
 
 #define ZONE_DMA		0
 #define ZONE_DMA		0
-#define ZONE_NORMAL		1
-#define ZONE_HIGHMEM		2
+#define ZONE_DMA32		1
+#define ZONE_NORMAL		2
+#define ZONE_HIGHMEM		3
 
 
-#define MAX_NR_ZONES		3	/* Sync this with ZONES_SHIFT */
+#define MAX_NR_ZONES		4	/* Sync this with ZONES_SHIFT */
 #define ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
 #define ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
 
 
 
 
@@ -108,9 +109,10 @@ struct per_cpu_pageset {
 
 
 /*
 /*
  * On machines where it is needed (eg PCs) we divide physical memory
  * On machines where it is needed (eg PCs) we divide physical memory
- * into multiple physical zones. On a PC we have 3 zones:
+ * into multiple physical zones. On a PC we have 4 zones:
  *
  *
  * ZONE_DMA	  < 16 MB	ISA DMA capable memory
  * ZONE_DMA	  < 16 MB	ISA DMA capable memory
+ * ZONE_DMA32	     0 MB 	Empty
  * ZONE_NORMAL	16-896 MB	direct mapped by the kernel
  * ZONE_NORMAL	16-896 MB	direct mapped by the kernel
  * ZONE_HIGHMEM	 > 896 MB	only page cache and user processes
  * ZONE_HIGHMEM	 > 896 MB	only page cache and user processes
  */
  */
@@ -433,7 +435,9 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
 
 
 #include <linux/topology.h>
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
 /* Returns the number of the current Node. */
+#ifndef numa_node_id
 #define numa_node_id()		(cpu_to_node(raw_smp_processor_id()))
 #define numa_node_id()		(cpu_to_node(raw_smp_processor_id()))
+#endif
 
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 
 
@@ -453,12 +457,12 @@ extern struct pglist_data contig_page_data;
 #include <asm/sparsemem.h>
 #include <asm/sparsemem.h>
 #endif
 #endif
 
 
-#if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED)
+#if BITS_PER_LONG == 32
 /*
 /*
- * with 32 bit page->flags field, we reserve 8 bits for node/zone info.
- * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes.
+ * with 32 bit page->flags field, we reserve 9 bits for node/zone info.
+ * there are 4 zones (3 bits) and this leaves 9-3=6 bits for nodes.
  */
  */
-#define FLAGS_RESERVED		8
+#define FLAGS_RESERVED		9
 
 
 #elif BITS_PER_LONG == 64
 #elif BITS_PER_LONG == 64
 /*
 /*

+ 1 - 1
mm/filemap.c

@@ -134,7 +134,7 @@ static int sync_page(void *word)
 	struct address_space *mapping;
 	struct address_space *mapping;
 	struct page *page;
 	struct page *page;
 
 
-	page = container_of((page_flags_t *)word, struct page, flags);
+	page = container_of((unsigned long *)word, struct page, flags);
 
 
 	/*
 	/*
 	 * page_mapping() is being called without PG_locked held.
 	 * page_mapping() is being called without PG_locked held.

+ 14 - 6
mm/page_alloc.c

@@ -60,8 +60,11 @@ long nr_swap_pages;
  *	NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
  *	NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
  *	HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
  *	HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
  *	HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
  *	HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
+ *
+ * TBD: should special case ZONE_DMA32 machines here - in those we normally
+ * don't need any ZONE_NORMAL reservation
  */
  */
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
 
 
 EXPORT_SYMBOL(totalram_pages);
 EXPORT_SYMBOL(totalram_pages);
 
 
@@ -72,7 +75,7 @@ EXPORT_SYMBOL(totalram_pages);
 struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
 struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
 EXPORT_SYMBOL(zone_table);
 EXPORT_SYMBOL(zone_table);
 
 
-static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
+static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
 int min_free_kbytes = 1024;
 int min_free_kbytes = 1024;
 
 
 unsigned long __initdata nr_kernel_pages;
 unsigned long __initdata nr_kernel_pages;
@@ -124,7 +127,7 @@ static void bad_page(const char *function, struct page *page)
 	printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
 	printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
 		function, current->comm, page);
 		function, current->comm, page);
 	printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
 	printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
-		(int)(2*sizeof(page_flags_t)), (unsigned long)page->flags,
+		(int)(2*sizeof(unsigned long)), (unsigned long)page->flags,
 		page->mapping, page_mapcount(page), page_count(page));
 		page->mapping, page_mapcount(page), page_count(page));
 	printk(KERN_EMERG "Backtrace:\n");
 	printk(KERN_EMERG "Backtrace:\n");
 	dump_stack();
 	dump_stack();
@@ -1421,6 +1424,10 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli
 		zone = pgdat->node_zones + ZONE_NORMAL;
 		zone = pgdat->node_zones + ZONE_NORMAL;
 		if (zone->present_pages)
 		if (zone->present_pages)
 			zonelist->zones[j++] = zone;
 			zonelist->zones[j++] = zone;
+	case ZONE_DMA32:
+		zone = pgdat->node_zones + ZONE_DMA32;
+		if (zone->present_pages)
+			zonelist->zones[j++] = zone;
 	case ZONE_DMA:
 	case ZONE_DMA:
 		zone = pgdat->node_zones + ZONE_DMA;
 		zone = pgdat->node_zones + ZONE_DMA;
 		if (zone->present_pages)
 		if (zone->present_pages)
@@ -1435,6 +1442,8 @@ static inline int highest_zone(int zone_bits)
 	int res = ZONE_NORMAL;
 	int res = ZONE_NORMAL;
 	if (zone_bits & (__force int)__GFP_HIGHMEM)
 	if (zone_bits & (__force int)__GFP_HIGHMEM)
 		res = ZONE_HIGHMEM;
 		res = ZONE_HIGHMEM;
+	if (zone_bits & (__force int)__GFP_DMA32)
+		res = ZONE_DMA32;
 	if (zone_bits & (__force int)__GFP_DMA)
 	if (zone_bits & (__force int)__GFP_DMA)
 		res = ZONE_DMA;
 		res = ZONE_DMA;
 	return res;
 	return res;
@@ -1846,11 +1855,10 @@ static int __devinit pageset_cpuup_callback(struct notifier_block *nfb,
 			if (process_zones(cpu))
 			if (process_zones(cpu))
 				ret = NOTIFY_BAD;
 				ret = NOTIFY_BAD;
 			break;
 			break;
-#ifdef CONFIG_HOTPLUG_CPU
+		case CPU_UP_CANCELED:
 		case CPU_DEAD:
 		case CPU_DEAD:
 			free_zone_pagesets(cpu);
 			free_zone_pagesets(cpu);
 			break;
 			break;
-#endif
 		default:
 		default:
 			break;
 			break;
 	}
 	}
@@ -1955,7 +1963,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
 		if (zholes_size)
 		if (zholes_size)
 			realsize -= zholes_size[j];
 			realsize -= zholes_size[j];
 
 
-		if (j == ZONE_DMA || j == ZONE_NORMAL)
+		if (j < ZONE_HIGHMEM)
 			nr_kernel_pages += realsize;
 			nr_kernel_pages += realsize;
 		nr_all_pages += realsize;
 		nr_all_pages += realsize;