Browse Source

Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-tip

* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-tip:
  x86: Fix double enable_IR_x2apic() call on SMP kernel on !SMP boards
  x86: Increase CONFIG_NODES_SHIFT max to 10
  ibft, x86: Change reserve_ibft_region() to find_ibft_region()
  x86, hpet: Fix bug in RTC emulation
  x86, hpet: Erratum workaround for read after write of HPET comparator
  bootmem, x86: Fix 32bit numa system without RAM on node 0
  nobootmem, x86: Fix 32bit numa system without RAM on node 0
  x86: Handle overlapping mptables
  x86: Make e820_remove_range to handle all covered case
  x86-32, resume: do a global tlb flush in S4 resume
Linus Torvalds 15 years ago
parent
commit
fb1ae63577

+ 2 - 2
arch/x86/Kconfig

@@ -1216,8 +1216,8 @@ config NUMA_EMU
 
 
 config NODES_SHIFT
 config NODES_SHIFT
 	int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
 	int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
-	range 1 9
-	default "9" if MAXSMP
+	range 1 10
+	default "10" if MAXSMP
 	default "6" if X86_64
 	default "6" if X86_64
 	default "4" if X86_NUMAQ
 	default "4" if X86_NUMAQ
 	default "3"
 	default "3"

+ 2 - 0
arch/x86/kernel/apic/apic.c

@@ -1640,8 +1640,10 @@ int __init APIC_init_uniprocessor(void)
 	}
 	}
 #endif
 #endif
 
 
+#ifndef CONFIG_SMP
 	enable_IR_x2apic();
 	enable_IR_x2apic();
 	default_setup_apic_routing();
 	default_setup_apic_routing();
+#endif
 
 
 	verify_local_APIC();
 	verify_local_APIC();
 	connect_bsp_APIC();
 	connect_bsp_APIC();

+ 20 - 4
arch/x86/kernel/e820.c

@@ -519,29 +519,45 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
 	printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
 	printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
 		       (unsigned long long) start,
 		       (unsigned long long) start,
 		       (unsigned long long) end);
 		       (unsigned long long) end);
-	e820_print_type(old_type);
+	if (checktype)
+		e820_print_type(old_type);
 	printk(KERN_CONT "\n");
 	printk(KERN_CONT "\n");
 
 
 	for (i = 0; i < e820.nr_map; i++) {
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
 		struct e820entry *ei = &e820.map[i];
 		u64 final_start, final_end;
 		u64 final_start, final_end;
+		u64 ei_end;
 
 
 		if (checktype && ei->type != old_type)
 		if (checktype && ei->type != old_type)
 			continue;
 			continue;
+
+		ei_end = ei->addr + ei->size;
 		/* totally covered? */
 		/* totally covered? */
-		if (ei->addr >= start &&
-		    (ei->addr + ei->size) <= (start + size)) {
+		if (ei->addr >= start && ei_end <= end) {
 			real_removed_size += ei->size;
 			real_removed_size += ei->size;
 			memset(ei, 0, sizeof(struct e820entry));
 			memset(ei, 0, sizeof(struct e820entry));
 			continue;
 			continue;
 		}
 		}
+
+		/* new range is totally covered? */
+		if (ei->addr < start && ei_end > end) {
+			e820_add_region(end, ei_end - end, ei->type);
+			ei->size = start - ei->addr;
+			real_removed_size += size;
+			continue;
+		}
+
 		/* partially covered */
 		/* partially covered */
 		final_start = max(start, ei->addr);
 		final_start = max(start, ei->addr);
-		final_end = min(start + size, ei->addr + ei->size);
+		final_end = min(end, ei_end);
 		if (final_start >= final_end)
 		if (final_start >= final_end)
 			continue;
 			continue;
 		real_removed_size += final_end - final_start;
 		real_removed_size += final_end - final_start;
 
 
+		/*
+		 * left range could be head or tail, so need to update
+		 * size at first.
+		 */
 		ei->size -= final_end - final_start;
 		ei->size -= final_end - final_start;
 		if (ei->addr < final_start)
 		if (ei->addr < final_start)
 			continue;
 			continue;

+ 8 - 1
arch/x86/kernel/hpet.c

@@ -400,9 +400,15 @@ static int hpet_next_event(unsigned long delta,
 	 * then we might have a real hardware problem. We can not do
 	 * then we might have a real hardware problem. We can not do
 	 * much about it here, but at least alert the user/admin with
 	 * much about it here, but at least alert the user/admin with
 	 * a prominent warning.
 	 * a prominent warning.
+	 * An erratum on some chipsets (ICH9,..), results in comparator read
+	 * immediately following a write returning old value. Workaround
+	 * for this is to read this value second time, when first
+	 * read returns old value.
 	 */
 	 */
-	WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt,
+	if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) {
+		WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt,
 		  KERN_WARNING "hpet: compare register read back failed.\n");
 		  KERN_WARNING "hpet: compare register read back failed.\n");
+	}
 
 
 	return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
 	return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
 }
 }
@@ -1144,6 +1150,7 @@ int hpet_set_periodic_freq(unsigned long freq)
 		do_div(clc, freq);
 		do_div(clc, freq);
 		clc >>= hpet_clockevent.shift;
 		clc >>= hpet_clockevent.shift;
 		hpet_pie_delta = clc;
 		hpet_pie_delta = clc;
+		hpet_pie_limit = 0;
 	}
 	}
 	return 1;
 	return 1;
 }
 }

+ 2 - 2
arch/x86/kernel/mpparse.c

@@ -664,7 +664,7 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf)
 {
 {
 	unsigned long size = get_mpc_size(mpf->physptr);
 	unsigned long size = get_mpc_size(mpf->physptr);
 
 
-	reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc");
+	reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc");
 }
 }
 
 
 static int __init smp_scan_config(unsigned long base, unsigned long length)
 static int __init smp_scan_config(unsigned long base, unsigned long length)
@@ -693,7 +693,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
 			       mpf, (u64)virt_to_phys(mpf));
 			       mpf, (u64)virt_to_phys(mpf));
 
 
 			mem = virt_to_phys(mpf);
 			mem = virt_to_phys(mpf);
-			reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf");
+			reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf");
 			if (mpf->physptr)
 			if (mpf->physptr)
 				smp_reserve_memory(mpf);
 				smp_reserve_memory(mpf);
 
 

+ 12 - 2
arch/x86/kernel/setup.c

@@ -607,6 +607,16 @@ static int __init setup_elfcorehdr(char *arg)
 early_param("elfcorehdr", setup_elfcorehdr);
 early_param("elfcorehdr", setup_elfcorehdr);
 #endif
 #endif
 
 
+static __init void reserve_ibft_region(void)
+{
+	unsigned long addr, size = 0;
+
+	addr = find_ibft_region(&size);
+
+	if (size)
+		reserve_early_overlap_ok(addr, addr + size, "ibft");
+}
+
 #ifdef CONFIG_X86_RESERVE_LOW_64K
 #ifdef CONFIG_X86_RESERVE_LOW_64K
 static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 {
 {
@@ -909,6 +919,8 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	 */
 	find_smp_config();
 	find_smp_config();
 
 
+	reserve_ibft_region();
+
 	reserve_trampoline_memory();
 	reserve_trampoline_memory();
 
 
 #ifdef CONFIG_ACPI_SLEEP
 #ifdef CONFIG_ACPI_SLEEP
@@ -976,8 +988,6 @@ void __init setup_arch(char **cmdline_p)
 
 
 	dma32_reserve_bootmem();
 	dma32_reserve_bootmem();
 
 
-	reserve_ibft_region();
-
 #ifdef CONFIG_KVM_CLOCK
 #ifdef CONFIG_KVM_CLOCK
 	kvmclock_init();
 	kvmclock_init();
 #endif
 #endif

+ 7 - 8
arch/x86/power/hibernate_asm_32.S

@@ -27,10 +27,17 @@ ENTRY(swsusp_arch_suspend)
 	ret
 	ret
 
 
 ENTRY(restore_image)
 ENTRY(restore_image)
+	movl	mmu_cr4_features, %ecx
 	movl	resume_pg_dir, %eax
 	movl	resume_pg_dir, %eax
 	subl	$__PAGE_OFFSET, %eax
 	subl	$__PAGE_OFFSET, %eax
 	movl	%eax, %cr3
 	movl	%eax, %cr3
 
 
+	jecxz	1f	# cr4 Pentium and higher, skip if zero
+	andl	$~(X86_CR4_PGE), %ecx
+	movl	%ecx, %cr4;  # turn off PGE
+	movl	%cr3, %eax;  # flush TLB
+	movl	%eax, %cr3
+1:
 	movl	restore_pblist, %edx
 	movl	restore_pblist, %edx
 	.p2align 4,,7
 	.p2align 4,,7
 
 
@@ -54,16 +61,8 @@ done:
 	movl	$swapper_pg_dir, %eax
 	movl	$swapper_pg_dir, %eax
 	subl	$__PAGE_OFFSET, %eax
 	subl	$__PAGE_OFFSET, %eax
 	movl	%eax, %cr3
 	movl	%eax, %cr3
-	/* Flush TLB, including "global" things (vmalloc) */
 	movl	mmu_cr4_features, %ecx
 	movl	mmu_cr4_features, %ecx
 	jecxz	1f	# cr4 Pentium and higher, skip if zero
 	jecxz	1f	# cr4 Pentium and higher, skip if zero
-	movl	%ecx, %edx
-	andl	$~(X86_CR4_PGE), %edx
-	movl	%edx, %cr4;  # turn off PGE
-1:
-	movl	%cr3, %eax;  # flush TLB
-	movl	%eax, %cr3
-	jecxz	1f	# cr4 Pentium and higher, skip if zero
 	movl	%ecx, %cr4;  # turn PGE back on
 	movl	%ecx, %cr4;  # turn PGE back on
 1:
 1:
 
 

+ 8 - 3
drivers/firmware/iscsi_ibft_find.c

@@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(ibft_addr);
  * Routine used to find the iSCSI Boot Format Table. The logical
  * Routine used to find the iSCSI Boot Format Table. The logical
  * kernel address is set in the ibft_addr global variable.
  * kernel address is set in the ibft_addr global variable.
  */
  */
-void __init reserve_ibft_region(void)
+unsigned long __init find_ibft_region(unsigned long *sizep)
 {
 {
 	unsigned long pos;
 	unsigned long pos;
 	unsigned int len = 0;
 	unsigned int len = 0;
@@ -77,6 +77,11 @@ void __init reserve_ibft_region(void)
 			}
 			}
 		}
 		}
 	}
 	}
-	if (ibft_addr)
-		reserve_bootmem(pos, PAGE_ALIGN(len), BOOTMEM_DEFAULT);
+	if (ibft_addr) {
+		*sizep = PAGE_ALIGN(len);
+		return pos;
+	}
+
+	*sizep = 0;
+	return 0;
 }
 }

+ 6 - 2
include/linux/iscsi_ibft.h

@@ -42,9 +42,13 @@ extern struct ibft_table_header *ibft_addr;
  * mapped address is set in the ibft_addr variable.
  * mapped address is set in the ibft_addr variable.
  */
  */
 #ifdef CONFIG_ISCSI_IBFT_FIND
 #ifdef CONFIG_ISCSI_IBFT_FIND
-extern void __init reserve_ibft_region(void);
+unsigned long find_ibft_region(unsigned long *sizep);
 #else
 #else
-static inline void reserve_ibft_region(void) { }
+static inline unsigned long find_ibft_region(unsigned long *sizep)
+{
+	*sizep = 0;
+	return 0;
+}
 #endif
 #endif
 
 
 #endif /* ISCSI_IBFT_H */
 #endif /* ISCSI_IBFT_H */

+ 15 - 2
mm/bootmem.c

@@ -304,9 +304,22 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
 unsigned long __init free_all_bootmem(void)
 unsigned long __init free_all_bootmem(void)
 {
 {
 #ifdef CONFIG_NO_BOOTMEM
 #ifdef CONFIG_NO_BOOTMEM
-	return free_all_memory_core_early(NODE_DATA(0)->node_id);
+	/*
+	 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
+	 *  because in some case like Node0 doesnt have RAM installed
+	 *  low ram will be on Node1
+	 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
+	 *  will be used instead of only Node0 related
+	 */
+	return free_all_memory_core_early(MAX_NUMNODES);
 #else
 #else
-	return free_all_bootmem_core(NODE_DATA(0)->bdata);
+	unsigned long total_pages = 0;
+	bootmem_data_t *bdata;
+
+	list_for_each_entry(bdata, &bdata_list, list)
+		total_pages += free_all_bootmem_core(bdata);
+
+	return total_pages;
 #endif
 #endif
 }
 }