Преглед изворни кода

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: (613 commits)
  x86: standalone trampoline code
  x86: move suspend wakeup code to C
  x86: coding style fixes to arch/x86/kernel/acpi/sleep.c
  x86: setup_trampoline() - fix section mismatch warning
  x86: section mismatch fixes, #1
  x86: fix paranoia about using BIOS quickboot mechanism.
  x86: print out buggy mptable
  x86: use cpu_online()
  x86: use cpumask_of_cpu()
  x86: remove unnecessary tmp local variable
  x86: remove unnecessary memset()
  x86: use ioapic_read_entry() and ioapic_write_entry()
  x86: avoid redundant loop in io_apic_level_ack_pending()
  x86: remove superfluous initialisation in boot code.
  x86: merge mpparse_{32,64}.c
  x86: unify mp_register_gsi
  x86: unify mp_config_acpi_legacy_irqs
  x86: unify mp_register_ioapic
  x86: unify uniq_io_apic_id
  x86: unify smp_scan_config
  ...
Linus Torvalds пре 17 година
родитељ
комит
9e9abecfc0
100 измењених фајлова са 3837 додато и 3379 уклоњено
  1. 1 1
      Documentation/feature-removal-schedule.txt
  2. 28 0
      Documentation/i386/boot.txt
  3. 18 0
      Documentation/kernel-parameters.txt
  4. 100 0
      Documentation/x86/pat.txt
  5. 5 0
      Documentation/x86_64/boot-options.txt
  6. 57 13
      arch/x86/Kconfig
  7. 1 1
      arch/x86/Kconfig.cpu
  8. 25 1
      arch/x86/Kconfig.debug
  9. 3 4
      arch/x86/Makefile
  10. 15 1
      arch/x86/boot/Makefile
  11. 5 0
      arch/x86/boot/boot.h
  12. 1 1
      arch/x86/boot/compressed/Makefile
  13. 134 68
      arch/x86/boot/compressed/misc.c
  14. 10 10
      arch/x86/boot/cpucheck.c
  15. 5 1
      arch/x86/boot/header.S
  16. 1 1
      arch/x86/boot/pm.c
  17. 87 1
      arch/x86/boot/tools/build.c
  18. 6 0
      arch/x86/boot/video-bios.c
  19. 173 0
      arch/x86/boot/video-mode.c
  20. 8 0
      arch/x86/boot/video-vesa.c
  21. 10 2
      arch/x86/boot/video-vga.c
  22. 1 156
      arch/x86/boot/video.c
  23. 1 1
      arch/x86/ia32/ia32_signal.c
  24. 8 4
      arch/x86/ia32/ia32entry.S
  25. 1 30
      arch/x86/ia32/sys_ia32.c
  26. 10 9
      arch/x86/kernel/Makefile
  27. 8 1
      arch/x86/kernel/acpi/Makefile
  28. 57 10
      arch/x86/kernel/acpi/boot.c
  29. 57 0
      arch/x86/kernel/acpi/realmode/Makefile
  30. 1 0
      arch/x86/kernel/acpi/realmode/copy.S
  31. 1 0
      arch/x86/kernel/acpi/realmode/video-bios.c
  32. 1 0
      arch/x86/kernel/acpi/realmode/video-mode.c
  33. 1 0
      arch/x86/kernel/acpi/realmode/video-vesa.c
  34. 1 0
      arch/x86/kernel/acpi/realmode/video-vga.c
  35. 81 0
      arch/x86/kernel/acpi/realmode/wakemain.c
  36. 113 0
      arch/x86/kernel/acpi/realmode/wakeup.S
  37. 36 0
      arch/x86/kernel/acpi/realmode/wakeup.h
  38. 61 0
      arch/x86/kernel/acpi/realmode/wakeup.lds.S
  39. 60 13
      arch/x86/kernel/acpi/sleep.c
  40. 16 0
      arch/x86/kernel/acpi/sleep.h
  41. 0 40
      arch/x86/kernel/acpi/sleep_32.c
  42. 20 227
      arch/x86/kernel/acpi/wakeup_32.S
  43. 9 304
      arch/x86/kernel/acpi/wakeup_64.S
  44. 10 0
      arch/x86/kernel/acpi/wakeup_rm.S
  45. 76 27
      arch/x86/kernel/alternative.c
  46. 3 3
      arch/x86/kernel/aperture_64.c
  47. 189 35
      arch/x86/kernel/apic_32.c
  48. 125 15
      arch/x86/kernel/apic_64.c
  49. 1 4
      arch/x86/kernel/apm_32.c
  50. 1 1
      arch/x86/kernel/asm-offsets_32.c
  51. 13 1
      arch/x86/kernel/bugs_64.c
  52. 2 2
      arch/x86/kernel/cpu/Makefile
  53. 63 58
      arch/x86/kernel/cpu/amd.c
  54. 244 246
      arch/x86/kernel/cpu/centaur.c
  55. 99 81
      arch/x86/kernel/cpu/common.c
  56. 13 13
      arch/x86/kernel/cpu/cpu.h
  57. 63 73
      arch/x86/kernel/cpu/cyrix.c
  58. 1 1
      arch/x86/kernel/cpu/feature_names.c
  59. 53 53
      arch/x86/kernel/cpu/intel.c
  60. 25 25
      arch/x86/kernel/cpu/mcheck/mce_32.c
  61. 12 9
      arch/x86/kernel/cpu/mcheck/non-fatal.c
  62. 8 8
      arch/x86/kernel/cpu/mcheck/p5.c
  63. 24 24
      arch/x86/kernel/cpu/mcheck/p6.c
  64. 4 4
      arch/x86/kernel/cpu/mcheck/winchip.c
  65. 139 0
      arch/x86/kernel/cpu/mtrr/generic.c
  66. 3 4
      arch/x86/kernel/cpu/mtrr/if.c
  67. 1 1
      arch/x86/kernel/cpu/mtrr/main.c
  68. 8 6
      arch/x86/kernel/cpu/mtrr/state.c
  69. 7 8
      arch/x86/kernel/cpu/nexgen.c
  70. 116 54
      arch/x86/kernel/cpu/proc.c
  71. 14 16
      arch/x86/kernel/cpu/transmeta.c
  72. 9 10
      arch/x86/kernel/cpu/umc.c
  73. 0 4
      arch/x86/kernel/crash.c
  74. 4 4
      arch/x86/kernel/ds.c
  75. 7 20
      arch/x86/kernel/e820_32.c
  76. 107 49
      arch/x86/kernel/e820_64.c
  77. 12 12
      arch/x86/kernel/early_printk.c
  78. 17 22
      arch/x86/kernel/entry_32.S
  79. 3 5
      arch/x86/kernel/entry_64.S
  80. 41 6
      arch/x86/kernel/genapic_64.c
  81. 2 5
      arch/x86/kernel/genapic_flat_64.c
  82. 245 0
      arch/x86/kernel/genx2apic_uv_x.c
  83. 14 0
      arch/x86/kernel/head32.c
  84. 57 21
      arch/x86/kernel/head64.c
  85. 1 1
      arch/x86/kernel/head_32.S
  86. 10 18
      arch/x86/kernel/head_64.S
  87. 46 42
      arch/x86/kernel/i387.c
  88. 64 100
      arch/x86/kernel/io_apic_32.c
  89. 30 33
      arch/x86/kernel/io_apic_64.c
  90. 178 0
      arch/x86/kernel/ipi.c
  91. 1 1
      arch/x86/kernel/irq_32.c
  92. 7 7
      arch/x86/kernel/kprobes.c
  93. 53 43
      arch/x86/kernel/mca_32.c
  94. 8 8
      arch/x86/kernel/microcode.c
  95. 353 405
      arch/x86/kernel/mpparse.c
  96. 0 867
      arch/x86/kernel/mpparse_64.c
  97. 2 2
      arch/x86/kernel/msr.c
  98. 9 5
      arch/x86/kernel/nmi_32.c
  99. 2 0
      arch/x86/kernel/nmi_64.c
  100. 1 17
      arch/x86/kernel/paravirt.c

+ 1 - 1
Documentation/feature-removal-schedule.txt

@@ -212,7 +212,7 @@ Who:    Stephen Hemminger <shemminger@linux-foundation.org>
 ---------------------------
 
 What:	i386/x86_64 bzImage symlinks
-When:	April 2008
+When:	April 2010
 
 Why:	The i386/x86_64 merge provides a symlink to the old bzImage
 	location so not yet updated user space tools, e.g. package

+ 28 - 0
Documentation/i386/boot.txt

@@ -170,6 +170,8 @@ Offset	Proto	Name		Meaning
 0238/4	2.06+	cmdline_size	Maximum size of the kernel command line
 023C/4	2.07+	hardware_subarch Hardware subarchitecture
 0240/8	2.07+	hardware_subarch_data Subarchitecture-specific data
+0248/4	2.08+	payload_offset	Offset of kernel payload
+024C/4	2.08+	payload_length	Length of kernel payload
 
 (1) For backwards compatibility, if the setup_sects field contains 0, the
     real value is 4.
@@ -512,6 +514,32 @@ Protocol:	2.07+
 
   A pointer to data that is specific to hardware subarch
 
+Field name:	payload_offset
+Type:		read
+Offset/size:	0x248/4
+Protocol:	2.08+
+
+  If non-zero then this field contains the offset from the end of the
+  real-mode code to the payload.
+
+  The payload may be compressed. The format of both the compressed and
+  uncompressed data should be determined using the standard magic
+  numbers. Currently only gzip compressed ELF is used.
+  
+Field name:	payload_length
+Type:		read
+Offset/size:	0x24c/4
+Protocol:	2.08+
+
+  The length of the payload.
+
+**** THE IMAGE CHECKSUM
+
+From boot protocol version 2.08 onwards the CRC-32 is calculated over
+the entire file using the characteristic polynomial 0x04C11DB7 and an
+initial remainder of 0xffffffff.  The checksum is appended to the
+file; therefore the CRC of the file up to the limit specified in the
+syssize field of the header is always 0.
 
 **** THE KERNEL COMMAND LINE
 

+ 18 - 0
Documentation/kernel-parameters.txt

@@ -812,6 +812,19 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	inttest=	[IA64]
 
+	iommu=		[x86]
+		off
+		force
+		noforce
+		biomerge
+		panic
+		nopanic
+		merge
+		nomerge
+		forcesac
+		soft
+
+
 	intel_iommu=	[DMAR] Intel IOMMU driver (DMAR) option
 		off
 			Disable intel iommu driver.
@@ -1134,6 +1147,11 @@ and is between 256 and 4096 characters. It is defined in the file
 			         or
 			         memmap=0x10000$0x18690000
 
+	memtest=	[KNL,X86_64] Enable memtest
+			Format: <integer>
+			range: 0,4 : pattern number
+			default : 0 <disable>
+
 	meye.*=		[HW] Set MotionEye Camera parameters
 			See Documentation/video4linux/meye.txt.
 

+ 100 - 0
Documentation/x86/pat.txt

@@ -0,0 +1,100 @@
+
+PAT (Page Attribute Table)
+
+x86 Page Attribute Table (PAT) allows for setting the memory attribute at the
+page level granularity. PAT is complementary to the MTRR settings which allows
+for setting of memory types over physical address ranges. However, PAT is
+more flexible than MTRR due to its capability to set attributes at page level
+and also due to the fact that there are no hardware limitations on number of
+such attribute settings allowed. Added flexibility comes with guidelines for
+not having memory type aliasing for the same physical memory with multiple
+virtual addresses.
+
+PAT allows for different types of memory attributes. The most commonly used
+ones that will be supported at this time are Write-back, Uncached,
+Write-combined and Uncached Minus.
+
+There are many different APIs in the kernel that allows setting of memory
+attributes at the page level. In order to avoid aliasing, these interfaces
+should be used thoughtfully. Below is a table of interfaces available,
+their intended usage and their memory attribute relationships. Internally,
+these APIs use a reserve_memtype()/free_memtype() interface on the physical
+address range to avoid any aliasing.
+
+
+-------------------------------------------------------------------
+API                    |    RAM   |  ACPI,...  |  Reserved/Holes  |
+-----------------------|----------|------------|------------------|
+                       |          |            |                  |
+ioremap                |    --    |    UC      |       UC         |
+                       |          |            |                  |
+ioremap_cache          |    --    |    WB      |       WB         |
+                       |          |            |                  |
+ioremap_nocache        |    --    |    UC      |       UC         |
+                       |          |            |                  |
+ioremap_wc             |    --    |    --      |       WC         |
+                       |          |            |                  |
+set_memory_uc          |    UC    |    --      |       --         |
+ set_memory_wb         |          |            |                  |
+                       |          |            |                  |
+set_memory_wc          |    WC    |    --      |       --         |
+ set_memory_wb         |          |            |                  |
+                       |          |            |                  |
+pci sysfs resource     |    --    |    --      |       UC         |
+                       |          |            |                  |
+pci sysfs resource_wc  |    --    |    --      |       WC         |
+ is IORESOURCE_PREFETCH|          |            |                  |
+                       |          |            |                  |
+pci proc               |    --    |    --      |       UC         |
+ !PCIIOC_WRITE_COMBINE |          |            |                  |
+                       |          |            |                  |
+pci proc               |    --    |    --      |       WC         |
+ PCIIOC_WRITE_COMBINE  |          |            |                  |
+                       |          |            |                  |
+/dev/mem               |    --    |    UC      |       UC         |
+ read-write            |          |            |                  |
+                       |          |            |                  |
+/dev/mem               |    --    |    UC      |       UC         |
+ mmap SYNC flag        |          |            |                  |
+                       |          |            |                  |
+/dev/mem               |    --    |  WB/WC/UC  |    WB/WC/UC      |
+ mmap !SYNC flag       |          |(from exist-|  (from exist-    |
+ and                   |          |  ing alias)|    ing alias)    |
+ any alias to this area|          |            |                  |
+                       |          |            |                  |
+/dev/mem               |    --    |    WB      |       WB         |
+ mmap !SYNC flag       |          |            |                  |
+ no alias to this area |          |            |                  |
+ and                   |          |            |                  |
+ MTRR says WB          |          |            |                  |
+                       |          |            |                  |
+/dev/mem               |    --    |    --      |    UC_MINUS      |
+ mmap !SYNC flag       |          |            |                  |
+ no alias to this area |          |            |                  |
+ and                   |          |            |                  |
+ MTRR says !WB         |          |            |                  |
+                       |          |            |                  |
+-------------------------------------------------------------------
+
+Notes:
+
+-- in the above table mean "Not suggested usage for the API". Some of the --'s
+are strictly enforced by the kernel. Some others are not really enforced
+today, but may be enforced in future.
+
+For ioremap and pci access through /sys or /proc - The actual type returned
+can be more restrictive, in case of any existing aliasing for that address.
+For example: If there is an existing uncached mapping, a new ioremap_wc can
+return uncached mapping in place of write-combine requested.
+
+set_memory_[uc|wc] and set_memory_wb should be used in pairs, where driver will
+first make a region uc or wc and switch it back to wb after use.
+
+Over time writes to /proc/mtrr will be deprecated in favor of using PAT based
+interfaces. Users writing to /proc/mtrr are suggested to use above interfaces.
+
+Drivers should use ioremap_[uc|wc] to access PCI BARs with [uc|wc] access
+types.
+
+Drivers should use set_memory_[uc|wc] to set access type for RAM ranges.
+

+ 5 - 0
Documentation/x86_64/boot-options.txt

@@ -307,3 +307,8 @@ Debugging
 			stuck (default)
 
 Miscellaneous
+
+	nogbpages
+		Do not use GB pages for kernel direct mappings.
+	gbpages
+		Use GB pages for kernel direct mappings.

+ 57 - 13
arch/x86/Kconfig

@@ -114,7 +114,7 @@ config ARCH_HAS_CPU_RELAX
 	def_bool y
 
 config HAVE_SETUP_PER_CPU_AREA
-	def_bool X86_64
+	def_bool X86_64 || (X86_SMP && !X86_VOYAGER)
 
 config ARCH_HIBERNATION_POSSIBLE
 	def_bool y
@@ -168,7 +168,7 @@ config X86_64_SMP
 config X86_HT
 	bool
 	depends on SMP
-	depends on (X86_32 && !(X86_VISWS || X86_VOYAGER)) || (X86_64 && !MK8)
+	depends on (X86_32 && !(X86_VISWS || X86_VOYAGER)) || X86_64
 	default y
 
 config X86_BIOS_REBOOT
@@ -178,7 +178,7 @@ config X86_BIOS_REBOOT
 
 config X86_TRAMPOLINE
 	bool
-	depends on X86_SMP || (X86_VOYAGER && SMP)
+	depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
 	default y
 
 config KTIME_SCALAR
@@ -238,8 +238,7 @@ config X86_ELAN
 
 config X86_VOYAGER
 	bool "Voyager (NCR)"
-	depends on X86_32
-	select SMP if !BROKEN
+	depends on X86_32 && (SMP || BROKEN)
 	help
 	  Voyager is an MCA-based 32-way capable SMP architecture proprietary
 	  to NCR Corp.  Machine classes 345x/35xx/4100/51xx are Voyager-based.
@@ -251,9 +250,8 @@ config X86_VOYAGER
 
 config X86_NUMAQ
 	bool "NUMAQ (IBM/Sequent)"
-	select SMP
+	depends on SMP && X86_32
 	select NUMA
-	depends on X86_32
 	help
 	  This option is used for getting Linux to run on a (IBM/Sequent) NUMA
 	  multiquad box. This changes the way that processors are bootstrapped,
@@ -324,8 +322,9 @@ config X86_RDC321X
 
 config X86_VSMP
 	bool "Support for ScaleMP vSMP"
-	depends on X86_64 && PCI
-	 help
+	select PARAVIRT
+	depends on X86_64
+	help
 	  Support for ScaleMP vSMP systems.  Say 'Y' here if this kernel is
 	  supposed to run on these EM64T-based machines.  Only choose this option
 	  if you have one of these machines.
@@ -380,6 +379,35 @@ config PARAVIRT
 
 endif
 
+config MEMTEST_BOOTPARAM
+	bool "Memtest boot parameter"
+	depends on X86_64
+	default y
+	help
+	  This option adds a kernel parameter 'memtest', which allows memtest
+	  to be disabled at boot.  If this option is selected, memtest
+	  functionality can be disabled with memtest=0 on the kernel
+	  command line.  The purpose of this option is to allow a single
+	  kernel image to be distributed with memtest built in, but not
+	  necessarily enabled.
+
+	  If you are unsure how to answer this question, answer Y.
+
+config MEMTEST_BOOTPARAM_VALUE
+	int "Memtest boot parameter default value (0-4)"
+	depends on MEMTEST_BOOTPARAM
+	range 0 4
+	default 0
+	help
+	  This option sets the default value for the kernel parameter
+	  'memtest', which allows memtest to be disabled at boot.  If this
+	  option is set to 0 (zero), the memtest kernel parameter will
+	  default to 0, disabling memtest at bootup.  If this option is
+	  set to 4, the memtest kernel parameter will default to 4,
+	  enabling memtest at bootup, and use that as pattern number.
+
+	  If you are unsure how to answer this question, answer 0.
+
 config ACPI_SRAT
 	def_bool y
 	depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH)
@@ -504,7 +532,7 @@ config NR_CPUS
 
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
-	depends on (X86_64 && SMP) || (X86_32 && X86_HT)
+	depends on X86_HT
 	help
 	  SMT scheduler support improves the CPU scheduler's decision making
 	  when dealing with Intel Pentium 4 chips with HyperThreading at a
@@ -514,7 +542,7 @@ config SCHED_SMT
 config SCHED_MC
 	def_bool y
 	prompt "Multi-core scheduler support"
-	depends on (X86_64 && SMP) || (X86_32 && X86_HT)
+	depends on X86_HT
 	help
 	  Multi-core scheduler support improves the CPU scheduler's decision
 	  making when dealing with multi-core CPU chips at a cost of slightly
@@ -883,7 +911,7 @@ config NUMA_EMU
 	  number of nodes. This is only useful for debugging.
 
 config NODES_SHIFT
-	int
+	int "Max num nodes shift(1-15)"
 	range 1 15  if X86_64
 	default "6" if X86_64
 	default "4" if X86_NUMAQ
@@ -1007,6 +1035,21 @@ config MTRR
 
 	  See <file:Documentation/mtrr.txt> for more information.
 
+config X86_PAT
+	def_bool y
+	prompt "x86 PAT support"
+	depends on MTRR && NONPROMISC_DEVMEM
+	help
+	  Use PAT attributes to setup page level cache control.
+
+	  PATs are the modern equivalents of MTRRs and are much more
+	  flexible than MTRRs.
+
+	  Say N here if you see bootup problems (boot crash, boot hang,
+	  spontaneous reboots) or a non-working video driver.
+
+	  If unsure, say Y.
+
 config EFI
 	def_bool n
 	prompt "EFI runtime service support"
@@ -1075,6 +1118,7 @@ source kernel/Kconfig.hz
 
 config KEXEC
 	bool "kexec system call"
+	depends on X86_64 || X86_BIOS_REBOOT
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
@@ -1376,7 +1420,7 @@ endmenu
 menu "Bus options (PCI etc.)"
 
 config PCI
-	bool "PCI support" if !X86_VISWS
+	bool "PCI support" if !X86_VISWS && !X86_VSMP
 	depends on !X86_VOYAGER
 	default y
 	select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)

+ 1 - 1
arch/x86/Kconfig.cpu

@@ -388,7 +388,7 @@ config X86_OOSTORE
 #
 config X86_P6_NOP
 	def_bool y
-	depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4)
+	depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4 || MPSC)
 
 config X86_TSC
 	def_bool y

+ 25 - 1
arch/x86/Kconfig.debug

@@ -54,6 +54,18 @@ config DEBUG_PER_CPU_MAPS
 
 	  Say N if unsure.
 
+config X86_PTDUMP
+	bool "Export kernel pagetable layout to userspace via debugfs"
+	depends on DEBUG_KERNEL
+	select DEBUG_FS
+	help
+	  Say Y here if you want to show the kernel pagetable layout in a
+	  debugfs file. This information is only useful for kernel developers
+	  who are working in architecture specific areas of the kernel.
+	  It is probably not a good idea to enable this feature in a production
+	  kernel.
+	  If in doubt, say "N"
+
 config DEBUG_RODATA
 	bool "Write protect kernel read-only data structures"
 	default y
@@ -64,6 +76,18 @@ config DEBUG_RODATA
 	  data. This is recommended so that we can catch kernel bugs sooner.
 	  If in doubt, say "Y".
 
+config DIRECT_GBPAGES
+	bool "Enable gbpages-mapped kernel pagetables"
+	depends on DEBUG_KERNEL && EXPERIMENTAL && X86_64
+	help
+	  Enable gigabyte pages support (if the CPU supports it). This can
+	  improve the kernel's performance a tiny bit by reducing TLB
+	  pressure.
+
+	  This is experimental code.
+
+	  If in doubt, say "N".
+
 config DEBUG_RODATA_TEST
 	bool "Testcase for the DEBUG_RODATA feature"
 	depends on DEBUG_RODATA
@@ -82,8 +106,8 @@ config DEBUG_NX_TEST
 
 config 4KSTACKS
 	bool "Use 4Kb for kernel stacks instead of 8Kb"
-	depends on DEBUG_KERNEL
 	depends on X86_32
+	default y
 	help
 	  If you say Y here the kernel will use a 4Kb stacksize for the
 	  kernel stack attached to each process/thread. This facilitates

+ 3 - 4
arch/x86/Makefile

@@ -151,7 +151,6 @@ mflags-y += -Iinclude/asm-x86/mach-default
 # 64 bit does not support subarch support - clear sub arch variables
 fcore-$(CONFIG_X86_64)  :=
 mcore-$(CONFIG_X86_64)  :=
-mflags-$(CONFIG_X86_64) :=
 
 KBUILD_CFLAGS += $(mflags-y)
 KBUILD_AFLAGS += $(mflags-y)
@@ -159,9 +158,9 @@ KBUILD_AFLAGS += $(mflags-y)
 ###
 # Kernel objects
 
-head-y                := arch/x86/kernel/head_$(BITS).o
-head-$(CONFIG_X86_64) += arch/x86/kernel/head64.o
-head-y                += arch/x86/kernel/init_task.o
+head-y := arch/x86/kernel/head_$(BITS).o
+head-y += arch/x86/kernel/head$(BITS).o
+head-y += arch/x86/kernel/init_task.o
 
 libs-y  += arch/x86/lib/
 

+ 15 - 1
arch/x86/boot/Makefile

@@ -30,7 +30,7 @@ subdir-		:= compressed
 
 setup-y		+= a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
 setup-y		+= header.o main.o mca.o memory.o pm.o pmjump.o
-setup-y		+= printf.o string.o tty.o video.o version.o
+setup-y		+= printf.o string.o tty.o video.o video-mode.o version.o
 setup-$(CONFIG_X86_APM_BOOT) += apm.o
 setup-$(CONFIG_X86_VOYAGER) += voyager.o
 
@@ -94,6 +94,20 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
 
 SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
 
+sed-offsets := -e 's/^00*/0/' \
+        -e 's/^\([0-9a-fA-F]*\) . \(input_data\|input_data_end\)$$/\#define \2 0x\1/p'
+
+quiet_cmd_offsets = OFFSETS $@
+      cmd_offsets = $(NM) $< | sed -n $(sed-offsets) > $@
+
+$(obj)/offsets.h: $(obj)/compressed/vmlinux FORCE
+	$(call if_changed,offsets)
+
+targets += offsets.h
+
+AFLAGS_header.o += -I$(obj)
+$(obj)/header.o: $(obj)/offsets.h
+
 LDFLAGS_setup.elf	:= -T
 $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
 	$(call if_changed,ld)

+ 5 - 0
arch/x86/boot/boot.h

@@ -286,6 +286,11 @@ int getchar_timeout(void);
 /* video.c */
 void set_video(void);
 
+/* video-mode.c */
+int set_mode(u16 mode);
+int mode_defined(u16 mode);
+void probe_cards(int unsafe);
+
 /* video-vesa.c */
 void vesa_store_edid(void);
 

+ 1 - 1
arch/x86/boot/compressed/Makefile

@@ -22,7 +22,7 @@ $(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $
 	$(call if_changed,ld)
 	@:
 
-OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
+OBJCOPYFLAGS_vmlinux.bin :=  -R .comment -S
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
 

+ 134 - 68
arch/x86/boot/compressed/misc.c

@@ -15,6 +15,10 @@
  * we just keep it from happening
  */
 #undef CONFIG_PARAVIRT
+#ifdef CONFIG_X86_32
+#define _ASM_DESC_H_ 1
+#endif
+
 #ifdef CONFIG_X86_64
 #define _LINUX_STRING_H_ 1
 #define __LINUX_BITMAP_H 1
@@ -22,6 +26,7 @@
 
 #include <linux/linkage.h>
 #include <linux/screen_info.h>
+#include <linux/elf.h>
 #include <asm/io.h>
 #include <asm/page.h>
 #include <asm/boot.h>
@@ -53,8 +58,8 @@
  * 1 bit (last block flag)
  * 2 bits (block type)
  *
- * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
- * The smallest block type encoding is always used.
+ * 1 block occurs every 32K -1 bytes or when there 50% compression
+ * has been achieved. The smallest block type encoding is always used.
  *
  * stored:
  *    32 bits length in bytes.
@@ -90,9 +95,9 @@
  *
  * All of which is enough to compute an amount of extra data that is required
  * to be safe.  To avoid problems at the block level allocating 5 extra bytes
- * per 32767 bytes of data is sufficient.  To avoind problems internal to a block
- * adding an extra 32767 bytes (the worst case uncompressed block size) is
- * sufficient, to ensure that in the worst case the decompressed data for
+ * per 32767 bytes of data is sufficient.  To avoind problems internal to a
+ * block adding an extra 32767 bytes (the worst case uncompressed block size)
+ * is sufficient, to ensure that in the worst case the decompressed data for
  * block will stop the byte before the compressed data for a block begins.
  * To avoid problems with the compressed data's meta information an extra 18
  * bytes are needed.  Leading to the formula:
@@ -111,58 +116,66 @@
  * gzip declarations
  */
 
-#define OF(args)  args
-#define STATIC static
+#define OF(args)	args
+#define STATIC		static
 
 #undef memset
 #undef memcpy
-#define memzero(s, n)     memset ((s), 0, (n))
+#define memzero(s, n)	memset((s), 0, (n))
+
+typedef unsigned char	uch;
+typedef unsigned short	ush;
+typedef unsigned long	ulg;
+
+/*
+ * Window size must be at least 32k, and a power of two.
+ * We don't actually have a window just a huge output buffer,
+ * so we report a 2G window size, as that should always be
+ * larger than our output buffer:
+ */
+#define WSIZE		0x80000000
+
+/* Input buffer: */
+static unsigned char	*inbuf;
 
-typedef unsigned char  uch;
-typedef unsigned short ush;
-typedef unsigned long  ulg;
+/* Sliding window buffer (and final output buffer): */
+static unsigned char	*window;
 
-#define WSIZE 0x80000000	/* Window size must be at least 32k,
-				 * and a power of two
-				 * We don't actually have a window just
-				 * a huge output buffer so I report
-				 * a 2G windows size, as that should
-				 * always be larger than our output buffer.
-				 */
+/* Valid bytes in inbuf: */
+static unsigned		insize;
 
-static uch *inbuf;	/* input buffer */
-static uch *window;	/* Sliding window buffer, (and final output buffer) */
+/* Index of next byte to be processed in inbuf: */
+static unsigned		inptr;
 
-static unsigned insize;  /* valid bytes in inbuf */
-static unsigned inptr;   /* index of next byte to be processed in inbuf */
-static unsigned outcnt;  /* bytes in output buffer */
+/* Bytes in output buffer: */
+static unsigned		outcnt;
 
 /* gzip flag byte */
-#define ASCII_FLAG   0x01 /* bit 0 set: file probably ASCII text */
-#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
-#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
-#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
-#define COMMENT      0x10 /* bit 4 set: file comment present */
-#define ENCRYPTED    0x20 /* bit 5 set: file is encrypted */
-#define RESERVED     0xC0 /* bit 6,7:   reserved */
-
-#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
-		
+#define ASCII_FLAG	0x01 /* bit 0 set: file probably ASCII text */
+#define CONTINUATION	0x02 /* bit 1 set: continuation of multi-part gz file */
+#define EXTRA_FIELD	0x04 /* bit 2 set: extra field present */
+#define ORIG_NAM	0x08 /* bit 3 set: original file name present */
+#define COMMENT		0x10 /* bit 4 set: file comment present */
+#define ENCRYPTED	0x20 /* bit 5 set: file is encrypted */
+#define RESERVED	0xC0 /* bit 6, 7:  reserved */
+
+#define get_byte()	(inptr < insize ? inbuf[inptr++] : fill_inbuf())
+
 /* Diagnostic functions */
 #ifdef DEBUG
-#  define Assert(cond,msg) {if(!(cond)) error(msg);}
-#  define Trace(x) fprintf x
-#  define Tracev(x) {if (verbose) fprintf x ;}
-#  define Tracevv(x) {if (verbose>1) fprintf x ;}
-#  define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
-#  define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
+#  define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0)
+#  define Trace(x)	do { fprintf x; } while (0)
+#  define Tracev(x)	do { if (verbose) fprintf x ; } while (0)
+#  define Tracevv(x)	do { if (verbose > 1) fprintf x ; } while (0)
+#  define Tracec(c, x)	do { if (verbose && (c)) fprintf x ; } while (0)
+#  define Tracecv(c, x)	do { if (verbose > 1 && (c)) fprintf x ; } while (0)
 #else
-#  define Assert(cond,msg)
+#  define Assert(cond, msg)
 #  define Trace(x)
 #  define Tracev(x)
 #  define Tracevv(x)
-#  define Tracec(c,x)
-#  define Tracecv(c,x)
+#  define Tracec(c, x)
+#  define Tracecv(c, x)
 #endif
 
 static int  fill_inbuf(void);
@@ -170,7 +183,7 @@ static void flush_window(void);
 static void error(char *m);
 static void gzip_mark(void **);
 static void gzip_release(void **);
-  
+
 /*
  * This is set up by the setup-routine at boot-time
  */
@@ -185,7 +198,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */
 extern unsigned char input_data[];
 extern int input_len;
 
-static long bytes_out = 0;
+static long bytes_out;
 
 static void *malloc(int size);
 static void free(void *where);
@@ -210,7 +223,7 @@ static memptr free_mem_end_ptr;
 #define HEAP_SIZE             0x4000
 #endif
 
-static char *vidmem = (char *)0xb8000;
+static char *vidmem;
 static int vidport;
 static int lines, cols;
 
@@ -224,8 +237,10 @@ static void *malloc(int size)
 {
 	void *p;
 
-	if (size <0) error("Malloc error");
-	if (free_mem_ptr <= 0) error("Memory error");
+	if (size < 0)
+		error("Malloc error");
+	if (free_mem_ptr <= 0)
+		error("Memory error");
 
 	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
 
@@ -251,19 +266,19 @@ static void gzip_release(void **ptr)
 {
 	free_mem_ptr = (memptr) *ptr;
 }
- 
+
 static void scroll(void)
 {
 	int i;
 
-	memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 );
-	for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 )
+	memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
+	for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2)
 		vidmem[i] = ' ';
 }
 
 static void putstr(const char *s)
 {
-	int x,y,pos;
+	int x, y, pos;
 	char c;
 
 #ifdef CONFIG_X86_32
@@ -274,18 +289,18 @@ static void putstr(const char *s)
 	x = RM_SCREEN_INFO.orig_x;
 	y = RM_SCREEN_INFO.orig_y;
 
-	while ( ( c = *s++ ) != '\0' ) {
-		if ( c == '\n' ) {
+	while ((c = *s++) != '\0') {
+		if (c == '\n') {
 			x = 0;
-			if ( ++y >= lines ) {
+			if (++y >= lines) {
 				scroll();
 				y--;
 			}
 		} else {
 			vidmem [(x + cols * y) * 2] = c;
-			if ( ++x >= cols ) {
+			if (++x >= cols) {
 				x = 0;
-				if ( ++y >= lines ) {
+				if (++y >= lines) {
 					scroll();
 					y--;
 				}
@@ -303,22 +318,22 @@ static void putstr(const char *s)
 	outb(0xff & (pos >> 1), vidport+1);
 }
 
-static void* memset(void* s, int c, unsigned n)
+static void *memset(void *s, int c, unsigned n)
 {
 	int i;
 	char *ss = s;
 
-	for (i=0;i<n;i++) ss[i] = c;
+	for (i = 0; i < n; i++) ss[i] = c;
 	return s;
 }
 
-static void* memcpy(void* dest, const void* src, unsigned n)
+static void *memcpy(void *dest, const void *src, unsigned n)
 {
 	int i;
 	const char *s = src;
 	char *d = dest;
 
-	for (i=0;i<n;i++) d[i] = s[i];
+	for (i = 0; i < n; i++) d[i] = s[i];
 	return dest;
 }
 
@@ -341,9 +356,9 @@ static void flush_window(void)
 	/* With my window equal to my output buffer
 	 * I only need to compute the crc here.
 	 */
-	ulg c = crc;         /* temporary variable */
+	unsigned long c = crc;         /* temporary variable */
 	unsigned n;
-	uch *in, ch;
+	unsigned char *in, ch;
 
 	in = window;
 	for (n = 0; n < outcnt; n++) {
@@ -351,7 +366,7 @@ static void flush_window(void)
 		c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
 	}
 	crc = c;
-	bytes_out += (ulg)outcnt;
+	bytes_out += (unsigned long)outcnt;
 	outcnt = 0;
 }
 
@@ -365,9 +380,59 @@ static void error(char *x)
 		asm("hlt");
 }
 
+static void parse_elf(void *output)
+{
+#ifdef CONFIG_X86_64
+	Elf64_Ehdr ehdr;
+	Elf64_Phdr *phdrs, *phdr;
+#else
+	Elf32_Ehdr ehdr;
+	Elf32_Phdr *phdrs, *phdr;
+#endif
+	void *dest;
+	int i;
+
+	memcpy(&ehdr, output, sizeof(ehdr));
+	if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
+	   ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
+	   ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
+	   ehdr.e_ident[EI_MAG3] != ELFMAG3) {
+		error("Kernel is not a valid ELF file");
+		return;
+	}
+
+	putstr("Parsing ELF... ");
+
+	phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
+	if (!phdrs)
+		error("Failed to allocate space for phdrs");
+
+	memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
+
+	for (i = 0; i < ehdr.e_phnum; i++) {
+		phdr = &phdrs[i];
+
+		switch (phdr->p_type) {
+		case PT_LOAD:
+#ifdef CONFIG_RELOCATABLE
+			dest = output;
+			dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
+#else
+			dest = (void *)(phdr->p_paddr);
+#endif
+			memcpy(dest,
+			       output + phdr->p_offset,
+			       phdr->p_filesz);
+			break;
+		default: /* Ignore other PT_* */ break;
+		}
+	}
+}
+
 asmlinkage void decompress_kernel(void *rmode, memptr heap,
-				  uch *input_data, unsigned long input_len,
-				  uch *output)
+				  unsigned char *input_data,
+				  unsigned long input_len,
+				  unsigned char *output)
 {
 	real_mode = rmode;
 
@@ -390,12 +455,12 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
 	inptr  = 0;
 
 #ifdef CONFIG_X86_64
-	if ((ulg)output & (__KERNEL_ALIGN - 1))
+	if ((unsigned long)output & (__KERNEL_ALIGN - 1))
 		error("Destination address not 2M aligned");
-	if ((ulg)output >= 0xffffffffffUL)
+	if ((unsigned long)output >= 0xffffffffffUL)
 		error("Destination address too large");
 #else
-	if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1))
+	if ((u32)output & (CONFIG_PHYSICAL_ALIGN - 1))
 		error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
 	if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
 		error("Destination address too large");
@@ -408,6 +473,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
 	makecrc();
 	putstr("\nDecompressing Linux... ");
 	gunzip();
+	parse_elf(output);
 	putstr("done.\nBooting the kernel.\n");
 	return;
 }

+ 10 - 10
arch/x86/boot/cpucheck.c

@@ -56,27 +56,27 @@ static const u32 req_flags[NCAPINTS] =
 	REQUIRED_MASK7,
 };
 
-#define A32(a,b,c,d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
+#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
 
 static int is_amd(void)
 {
-	return cpu_vendor[0] == A32('A','u','t','h') &&
-	       cpu_vendor[1] == A32('e','n','t','i') &&
-	       cpu_vendor[2] == A32('c','A','M','D');
+	return cpu_vendor[0] == A32('A', 'u', 't', 'h') &&
+	       cpu_vendor[1] == A32('e', 'n', 't', 'i') &&
+	       cpu_vendor[2] == A32('c', 'A', 'M', 'D');
 }
 
 static int is_centaur(void)
 {
-	return cpu_vendor[0] == A32('C','e','n','t') &&
-	       cpu_vendor[1] == A32('a','u','r','H') &&
-	       cpu_vendor[2] == A32('a','u','l','s');
+	return cpu_vendor[0] == A32('C', 'e', 'n', 't') &&
+	       cpu_vendor[1] == A32('a', 'u', 'r', 'H') &&
+	       cpu_vendor[2] == A32('a', 'u', 'l', 's');
 }
 
 static int is_transmeta(void)
 {
-	return cpu_vendor[0] == A32('G','e','n','u') &&
-	       cpu_vendor[1] == A32('i','n','e','T') &&
-	       cpu_vendor[2] == A32('M','x','8','6');
+	return cpu_vendor[0] == A32('G', 'e', 'n', 'u') &&
+	       cpu_vendor[1] == A32('i', 'n', 'e', 'T') &&
+	       cpu_vendor[2] == A32('M', 'x', '8', '6');
 }
 
 static int has_fpu(void)

+ 5 - 1
arch/x86/boot/header.S

@@ -22,6 +22,7 @@
 #include <asm/page.h>
 #include <asm/setup.h>
 #include "boot.h"
+#include "offsets.h"
 
 SETUPSECTS	= 4			/* default nr of setup-sectors */
 BOOTSEG		= 0x07C0		/* original address of boot-sector */
@@ -119,7 +120,7 @@ _start:
 	# Part 2 of the header, from the old setup.S
 
 		.ascii	"HdrS"		# header signature
-		.word	0x0207		# header version number (>= 0x0105)
+		.word	0x0208		# header version number (>= 0x0105)
 					# or else old loadlin-1.5 will fail)
 		.globl realmode_swtch
 realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
@@ -223,6 +224,9 @@ hardware_subarch:	.long 0			# subarchitecture, added with 2.07
 
 hardware_subarch_data:	.quad 0
 
+payload_offset:		.long input_data
+payload_length:		.long input_data_end-input_data
+
 # End of setup header #####################################################
 
 	.section ".inittext", "ax"

+ 1 - 1
arch/x86/boot/pm.c

@@ -100,7 +100,7 @@ static void reset_coprocessor(void)
 /*
  * Set up the GDT
  */
-#define GDT_ENTRY(flags,base,limit)		\
+#define GDT_ENTRY(flags, base, limit)		\
 	(((u64)(base & 0xff000000) << 32) |	\
 	 ((u64)flags << 40) |			\
 	 ((u64)(limit & 0x00ff0000) << 32) |	\

+ 87 - 1
arch/x86/boot/tools/build.c

@@ -50,6 +50,75 @@ typedef unsigned long  u32;
 u8 buf[SETUP_SECT_MAX*512];
 int is_big_kernel;
 
+/*----------------------------------------------------------------------*/
+
+static const u32 crctab32[] = {
+	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
+	0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
+	0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
+	0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
+	0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+	0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
+	0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+	0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
+	0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
+	0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
+	0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
+	0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
+	0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
+	0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+	0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
+	0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
+	0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+	0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
+	0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
+	0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
+	0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
+	0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
+	0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
+	0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+	0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
+	0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
+	0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+	0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
+	0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
+	0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
+	0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
+	0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
+	0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
+	0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+	0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
+	0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
+	0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+	0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
+	0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
+	0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
+	0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
+	0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
+	0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
+	0x2d02ef8d
+};
+
+static u32 partial_crc32_one(u8 c, u32 crc)
+{
+	return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8);
+}
+
+static u32 partial_crc32(const u8 *s, int len, u32 crc)
+{
+	while (len--)
+		crc = partial_crc32_one(*s++, crc);
+	return crc;
+}
+
 static void die(const char * str, ...)
 {
 	va_list args;
@@ -74,6 +143,7 @@ int main(int argc, char ** argv)
 	FILE *file;
 	int fd;
 	void *kernel;
+	u32 crc = 0xffffffffUL;
 
 	if (argc > 2 && !strcmp(argv[1], "-b"))
 	  {
@@ -144,7 +214,8 @@ int main(int argc, char ** argv)
 	kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
 	if (kernel == MAP_FAILED)
 		die("Unable to mmap '%s': %m", argv[2]);
-	sys_size = (sz + 15) / 16;
+	/* Number of 16-byte paragraphs, including space for a 4-byte CRC */
+	sys_size = (sz + 15 + 4) / 16;
 	if (!is_big_kernel && sys_size > DEF_SYSSIZE)
 		die("System is too big. Try using bzImage or modules.");
 
@@ -155,12 +226,27 @@ int main(int argc, char ** argv)
 	buf[0x1f6] = sys_size >> 16;
 	buf[0x1f7] = sys_size >> 24;
 
+	crc = partial_crc32(buf, i, crc);
 	if (fwrite(buf, 1, i, stdout) != i)
 		die("Writing setup failed");
 
 	/* Copy the kernel code */
+	crc = partial_crc32(kernel, sz, crc);
 	if (fwrite(kernel, 1, sz, stdout) != sz)
 		die("Writing kernel failed");
+
+	/* Add padding leaving 4 bytes for the checksum */
+	while (sz++ < (sys_size*16) - 4) {
+		crc = partial_crc32_one('\0', crc);
+		if (fwrite("\0", 1, 1, stdout) != 1)
+			die("Writing padding failed");
+	}
+
+	/* Write the CRC */
+	fprintf(stderr, "CRC %lx\n", crc);
+	if (fwrite(&crc, 1, 4, stdout) != 4)
+		die("Writing CRC failed");
+
 	close(fd);
 
 	/* Everything is OK */

+ 6 - 0
arch/x86/boot/video-bios.c

@@ -50,6 +50,7 @@ static int set_bios_mode(u8 mode)
 	if (new_mode == mode)
 		return 0;	/* Mode change OK */
 
+#ifndef _WAKEUP
 	if (new_mode != boot_params.screen_info.orig_video_mode) {
 		/* Mode setting failed, but we didn't end up where we
 		   started.  That's bad.  Try to revert to the original
@@ -59,13 +60,18 @@ static int set_bios_mode(u8 mode)
 			     : "+a" (ax)
 			     : : "ebx", "ecx", "edx", "esi", "edi");
 	}
+#endif
 	return -1;
 }
 
 static int bios_probe(void)
 {
 	u8 mode;
+#ifdef _WAKEUP
+	u8 saved_mode = 0x03;
+#else
 	u8 saved_mode = boot_params.screen_info.orig_video_mode;
+#endif
 	u16 crtc;
 	struct mode_info *mi;
 	int nmodes = 0;

+ 173 - 0
arch/x86/boot/video-mode.c

@@ -0,0 +1,173 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright (C) 1991, 1992 Linus Torvalds
+ *   Copyright 2007-2008 rPath, Inc. - All Rights Reserved
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * arch/i386/boot/video-mode.c
+ *
+ * Set the video mode.  This is separated out into a different
+ * file in order to be shared with the ACPI wakeup code.
+ */
+
+#include "boot.h"
+#include "video.h"
+#include "vesa.h"
+
+/*
+ * Common variables
+ */
+int adapter;			/* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
+u16 video_segment;
+int force_x, force_y;	/* Don't query the BIOS for cols/rows */
+
+int do_restore;		/* Screen contents changed during mode flip */
+int graphic_mode;	/* Graphic mode with linear frame buffer */
+
+/* Probe the video drivers and have them generate their mode lists. */
+void probe_cards(int unsafe)
+{
+	struct card_info *card;
+	static u8 probed[2];
+
+	if (probed[unsafe])
+		return;
+
+	probed[unsafe] = 1;
+
+	for (card = video_cards; card < video_cards_end; card++) {
+		if (card->unsafe == unsafe) {
+			if (card->probe)
+				card->nmodes = card->probe();
+			else
+				card->nmodes = 0;
+		}
+	}
+}
+
+/* Test if a mode is defined */
+int mode_defined(u16 mode)
+{
+	struct card_info *card;
+	struct mode_info *mi;
+	int i;
+
+	for (card = video_cards; card < video_cards_end; card++) {
+		mi = card->modes;
+		for (i = 0; i < card->nmodes; i++, mi++) {
+			if (mi->mode == mode)
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+/* Set mode (without recalc) */
+static int raw_set_mode(u16 mode, u16 *real_mode)
+{
+	int nmode, i;
+	struct card_info *card;
+	struct mode_info *mi;
+
+	/* Drop the recalc bit if set */
+	mode &= ~VIDEO_RECALC;
+
+	/* Scan for mode based on fixed ID, position, or resolution */
+	nmode = 0;
+	for (card = video_cards; card < video_cards_end; card++) {
+		mi = card->modes;
+		for (i = 0; i < card->nmodes; i++, mi++) {
+			int visible = mi->x || mi->y;
+
+			if ((mode == nmode && visible) ||
+			    mode == mi->mode ||
+			    mode == (mi->y << 8)+mi->x) {
+				*real_mode = mi->mode;
+				return card->set_mode(mi);
+			}
+
+			if (visible)
+				nmode++;
+		}
+	}
+
+	/* Nothing found?  Is it an "exceptional" (unprobed) mode? */
+	for (card = video_cards; card < video_cards_end; card++) {
+		if (mode >= card->xmode_first &&
+		    mode < card->xmode_first+card->xmode_n) {
+			struct mode_info mix;
+			*real_mode = mix.mode = mode;
+			mix.x = mix.y = 0;
+			return card->set_mode(&mix);
+		}
+	}
+
+	/* Otherwise, failure... */
+	return -1;
+}
+
+/*
+ * Recalculate the vertical video cutoff (hack!)
+ */
+static void vga_recalc_vertical(void)
+{
+	unsigned int font_size, rows;
+	u16 crtc;
+	u8 pt, ov;
+
+	set_fs(0);
+	font_size = rdfs8(0x485); /* BIOS: font size (pixels) */
+	rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */
+
+	rows *= font_size;	/* Visible scan lines */
+	rows--;			/* ... minus one */
+
+	crtc = vga_crtc();
+
+	pt = in_idx(crtc, 0x11);
+	pt &= ~0x80;		/* Unlock CR0-7 */
+	out_idx(pt, crtc, 0x11);
+
+	out_idx((u8)rows, crtc, 0x12); /* Lower height register */
+
+	ov = in_idx(crtc, 0x07); /* Overflow register */
+	ov &= 0xbd;
+	ov |= (rows >> (8-1)) & 0x02;
+	ov |= (rows >> (9-6)) & 0x40;
+	out_idx(ov, crtc, 0x07);
+}
+
+/* Set mode (with recalc if specified) */
+int set_mode(u16 mode)
+{
+	int rv;
+	u16 real_mode;
+
+	/* Very special mode numbers... */
+	if (mode == VIDEO_CURRENT_MODE)
+		return 0;	/* Nothing to do... */
+	else if (mode == NORMAL_VGA)
+		mode = VIDEO_80x25;
+	else if (mode == EXTENDED_VGA)
+		mode = VIDEO_8POINT;
+
+	rv = raw_set_mode(mode, &real_mode);
+	if (rv)
+		return rv;
+
+	if (mode & VIDEO_RECALC)
+		vga_recalc_vertical();
+
+	/* Save the canonical mode number for the kernel, not
+	   an alias, size specification or menu position */
+#ifndef _WAKEUP
+	boot_params.hdr.vid_mode = real_mode;
+#endif
+	return 0;
+}

+ 8 - 0
arch/x86/boot/video-vesa.c

@@ -24,7 +24,11 @@ static struct vesa_mode_info vminfo;
 
 __videocard video_vesa;
 
+#ifndef _WAKEUP
 static void vesa_store_mode_params_graphics(void);
+#else /* _WAKEUP */
+static inline void vesa_store_mode_params_graphics(void) {}
+#endif /* _WAKEUP */
 
 static int vesa_probe(void)
 {
@@ -165,6 +169,8 @@ static int vesa_set_mode(struct mode_info *mode)
 }
 
 
+#ifndef _WAKEUP
+
 /* Switch DAC to 8-bit mode */
 static void vesa_dac_set_8bits(void)
 {
@@ -288,6 +294,8 @@ void vesa_store_edid(void)
 #endif /* CONFIG_FIRMWARE_EDID */
 }
 
+#endif /* not _WAKEUP */
+
 __videocard video_vesa =
 {
 	.card_name	= "VESA",

+ 10 - 2
arch/x86/boot/video-vga.c

@@ -210,6 +210,8 @@ static int vga_set_mode(struct mode_info *mode)
  */
 static int vga_probe(void)
 {
+	u16 ega_bx;
+
 	static const char *card_name[] = {
 		"CGA/MDA/HGC", "EGA", "VGA"
 	};
@@ -226,12 +228,16 @@ static int vga_probe(void)
 	u8 vga_flag;
 
 	asm(INT10
-	    : "=b" (boot_params.screen_info.orig_video_ega_bx)
+	    : "=b" (ega_bx)
 	    : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */
 	    : "ecx", "edx", "esi", "edi");
 
+#ifndef _WAKEUP
+	boot_params.screen_info.orig_video_ega_bx = ega_bx;
+#endif
+
 	/* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
-	if ((u8)boot_params.screen_info.orig_video_ega_bx != 0x10) {
+	if ((u8)ega_bx != 0x10) {
 		/* EGA/VGA */
 		asm(INT10
 		    : "=a" (vga_flag)
@@ -240,7 +246,9 @@ static int vga_probe(void)
 
 		if (vga_flag == 0x1a) {
 			adapter = ADAPTER_VGA;
+#ifndef _WAKEUP
 			boot_params.screen_info.orig_video_isVGA = 1;
+#endif
 		} else {
 			adapter = ADAPTER_EGA;
 		}

+ 1 - 156
arch/x86/boot/video.c

@@ -18,21 +18,6 @@
 #include "video.h"
 #include "vesa.h"
 
-/*
- * Mode list variables
- */
-static struct card_info cards[];    /* List of cards to probe for */
-
-/*
- * Common variables
- */
-int adapter;			/* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
-u16 video_segment;
-int force_x, force_y;	/* Don't query the BIOS for cols/rows */
-
-int do_restore = 0;	/* Screen contents changed during mode flip */
-int graphic_mode;	/* Graphic mode with linear frame buffer */
-
 static void store_cursor_position(void)
 {
 	u16 curpos;
@@ -107,147 +92,6 @@ static void store_mode_params(void)
 	boot_params.screen_info.orig_video_lines = y;
 }
 
-/* Probe the video drivers and have them generate their mode lists. */
-static void probe_cards(int unsafe)
-{
-	struct card_info *card;
-	static u8 probed[2];
-
-	if (probed[unsafe])
-		return;
-
-	probed[unsafe] = 1;
-
-	for (card = video_cards; card < video_cards_end; card++) {
-		if (card->unsafe == unsafe) {
-			if (card->probe)
-				card->nmodes = card->probe();
-			else
-				card->nmodes = 0;
-		}
-	}
-}
-
-/* Test if a mode is defined */
-int mode_defined(u16 mode)
-{
-	struct card_info *card;
-	struct mode_info *mi;
-	int i;
-
-	for (card = video_cards; card < video_cards_end; card++) {
-		mi = card->modes;
-		for (i = 0; i < card->nmodes; i++, mi++) {
-			if (mi->mode == mode)
-				return 1;
-		}
-	}
-
-	return 0;
-}
-
-/* Set mode (without recalc) */
-static int raw_set_mode(u16 mode, u16 *real_mode)
-{
-	int nmode, i;
-	struct card_info *card;
-	struct mode_info *mi;
-
-	/* Drop the recalc bit if set */
-	mode &= ~VIDEO_RECALC;
-
-	/* Scan for mode based on fixed ID, position, or resolution */
-	nmode = 0;
-	for (card = video_cards; card < video_cards_end; card++) {
-		mi = card->modes;
-		for (i = 0; i < card->nmodes; i++, mi++) {
-			int visible = mi->x || mi->y;
-
-			if ((mode == nmode && visible) ||
-			    mode == mi->mode ||
-			    mode == (mi->y << 8)+mi->x) {
-				*real_mode = mi->mode;
-				return card->set_mode(mi);
-			}
-
-			if (visible)
-				nmode++;
-		}
-	}
-
-	/* Nothing found?  Is it an "exceptional" (unprobed) mode? */
-	for (card = video_cards; card < video_cards_end; card++) {
-		if (mode >= card->xmode_first &&
-		    mode < card->xmode_first+card->xmode_n) {
-			struct mode_info mix;
-			*real_mode = mix.mode = mode;
-			mix.x = mix.y = 0;
-			return card->set_mode(&mix);
-		}
-	}
-
-	/* Otherwise, failure... */
-	return -1;
-}
-
-/*
- * Recalculate the vertical video cutoff (hack!)
- */
-static void vga_recalc_vertical(void)
-{
-	unsigned int font_size, rows;
-	u16 crtc;
-	u8 pt, ov;
-
-	set_fs(0);
-	font_size = rdfs8(0x485); /* BIOS: font size (pixels) */
-	rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */
-
-	rows *= font_size;	/* Visible scan lines */
-	rows--;			/* ... minus one */
-
-	crtc = vga_crtc();
-
-	pt = in_idx(crtc, 0x11);
-	pt &= ~0x80;		/* Unlock CR0-7 */
-	out_idx(pt, crtc, 0x11);
-
-	out_idx((u8)rows, crtc, 0x12); /* Lower height register */
-
-	ov = in_idx(crtc, 0x07); /* Overflow register */
-	ov &= 0xbd;
-	ov |= (rows >> (8-1)) & 0x02;
-	ov |= (rows >> (9-6)) & 0x40;
-	out_idx(ov, crtc, 0x07);
-}
-
-/* Set mode (with recalc if specified) */
-static int set_mode(u16 mode)
-{
-	int rv;
-	u16 real_mode;
-
-	/* Very special mode numbers... */
-	if (mode == VIDEO_CURRENT_MODE)
-		return 0;	/* Nothing to do... */
-	else if (mode == NORMAL_VGA)
-		mode = VIDEO_80x25;
-	else if (mode == EXTENDED_VGA)
-		mode = VIDEO_8POINT;
-
-	rv = raw_set_mode(mode, &real_mode);
-	if (rv)
-		return rv;
-
-	if (mode & VIDEO_RECALC)
-		vga_recalc_vertical();
-
-	/* Save the canonical mode number for the kernel, not
-	   an alias, size specification or menu position */
-	boot_params.hdr.vid_mode = real_mode;
-	return 0;
-}
-
 static unsigned int get_entry(void)
 {
 	char entry_buf[4];
@@ -486,6 +330,7 @@ void set_video(void)
 		printf("Undefined video mode number: %x\n", mode);
 		mode = ASK_VGA;
 	}
+	boot_params.hdr.vid_mode = mode;
 	vesa_store_edid();
 	store_mode_params();
 

+ 1 - 1
arch/x86/ia32/ia32_signal.c

@@ -468,7 +468,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 		restorer = ka->sa.sa_restorer;
 	} else {
 		/* Return stub is in 32bit vsyscall page */
-		if (current->binfmt->hasvdso)
+		if (current->mm->context.vdso)
 			restorer = VDSO32_SYMBOL(current->mm->context.vdso,
 						 sigreturn);
 		else

+ 8 - 4
arch/x86/ia32/ia32entry.S

@@ -162,12 +162,14 @@ sysenter_tracesys:
 	SAVE_REST
 	CLEAR_RREGS
 	movq	%r9,R9(%rsp)
-	movq	$-ENOSYS,RAX(%rsp)	/* really needed? */
+	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
 	call	syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
 	xchgl	%ebp,%r9d
+	cmpl	$(IA32_NR_syscalls-1),%eax
+	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
 	jmp	sysenter_do_call
 	CFI_ENDPROC
 ENDPROC(ia32_sysenter_target)
@@ -261,13 +263,15 @@ cstar_tracesys:
 	SAVE_REST
 	CLEAR_RREGS
 	movq %r9,R9(%rsp)
-	movq $-ENOSYS,RAX(%rsp)	/* really needed? */
+	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
 	xchgl %ebp,%r9d
 	movl RSP-ARGOFFSET(%rsp), %r8d
+	cmpl $(IA32_NR_syscalls-1),%eax
+	ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
 	jmp cstar_do_call
 END(ia32_cstar_target)
 				
@@ -325,7 +329,7 @@ ENTRY(ia32_syscall)
 	jnz ia32_tracesys
 ia32_do_syscall:	
 	cmpl $(IA32_NR_syscalls-1),%eax
-	ja  ia32_badsys
+	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
 	IA32_ARG_FIXUP
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
@@ -335,7 +339,7 @@ ia32_sysret:
 ia32_tracesys:			 
 	SAVE_REST
 	CLEAR_RREGS
-	movq $-ENOSYS,RAX(%rsp)	/* really needed? */
+	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */

+ 1 - 30
arch/x86/ia32/sys_ia32.c

@@ -26,51 +26,27 @@
 #include <linux/file.h>
 #include <linux/signal.h>
 #include <linux/syscalls.h>
-#include <linux/resource.h>
 #include <linux/times.h>
 #include <linux/utsname.h>
-#include <linux/smp.h>
 #include <linux/smp_lock.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
 #include <linux/mm.h>
-#include <linux/shm.h>
-#include <linux/slab.h>
 #include <linux/uio.h>
-#include <linux/nfs_fs.h>
-#include <linux/quota.h>
-#include <linux/module.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
-#include <linux/nfsd/syscall.h>
 #include <linux/poll.h>
 #include <linux/personality.h>
 #include <linux/stat.h>
-#include <linux/ipc.h>
 #include <linux/rwsem.h>
-#include <linux/binfmts.h>
-#include <linux/init.h>
-#include <linux/aio_abi.h>
-#include <linux/aio.h>
 #include <linux/compat.h>
 #include <linux/vfs.h>
 #include <linux/ptrace.h>
 #include <linux/highuid.h>
-#include <linux/vmalloc.h>
-#include <linux/fsnotify.h>
 #include <linux/sysctl.h>
 #include <asm/mman.h>
 #include <asm/types.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 #include <asm/atomic.h>
-#include <asm/ldt.h>
-
-#include <net/scm.h>
-#include <net/sock.h>
 #include <asm/ia32.h>
+#include <asm/vgtod.h>
 
 #define AA(__x)		((unsigned long)(__x))
 
@@ -804,11 +780,6 @@ asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
 	if (IS_ERR(filename))
 		return error;
 	error = compat_do_execve(filename, argv, envp, regs);
-	if (error == 0) {
-		task_lock(current);
-		current->ptrace &= ~PT_DTRACE;
-		task_unlock(current);
-	}
 	putname(filename);
 	return error;
 }

+ 10 - 9
arch/x86/kernel/Makefile

@@ -2,8 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-extra-y                := head_$(BITS).o init_task.o vmlinux.lds
-extra-$(CONFIG_X86_64) += head64.o
+extra-y                := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds
 
 CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 
@@ -19,7 +18,7 @@ CFLAGS_tsc_64.o		:= $(nostackp)
 obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
 obj-y			+= traps_$(BITS).o irq_$(BITS).o
 obj-y			+= time_$(BITS).o ioport.o ldt.o
-obj-y			+= setup_$(BITS).o i8259_$(BITS).o
+obj-y			+= setup_$(BITS).o i8259_$(BITS).o setup.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o setup64.o
@@ -29,6 +28,7 @@ obj-y			+= alternative.o i8253.o
 obj-$(CONFIG_X86_64)	+= pci-nommu_64.o bugs_64.o
 obj-y			+= tsc_$(BITS).o io_delay.o rtc.o
 
+obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= i387.o
 obj-y				+= ptrace.o
 obj-y				+= ds.o
@@ -47,11 +47,12 @@ obj-$(CONFIG_MICROCODE)		+= microcode.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
 apm-y				:= apm_32.o
 obj-$(CONFIG_APM)		+= apm.o
-obj-$(CONFIG_X86_SMP)		+= smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o
-obj-$(CONFIG_X86_32_SMP)	+= smpcommon_32.o
-obj-$(CONFIG_X86_64_SMP)	+= smp_64.o smpboot_64.o tsc_sync.o
+obj-$(CONFIG_X86_SMP)		+= smp.o
+obj-$(CONFIG_X86_SMP)		+= smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o
+obj-$(CONFIG_X86_32_SMP)	+= smpcommon.o
+obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o smpcommon.o
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
-obj-$(CONFIG_X86_MPPARSE)	+= mpparse_$(BITS).o
+obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic_$(BITS).o nmi_$(BITS).o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic_$(BITS).o
 obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
@@ -60,7 +61,7 @@ obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
 obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
 obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit_32.o
-obj-$(CONFIG_X86_VSMP)		+= vsmp_64.o
+obj-y				+= vsmp_64.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_MODULES)		+= module_$(BITS).o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat_32.o
@@ -89,7 +90,7 @@ scx200-y			+= scx200_32.o
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
-        obj-y				+= genapic_64.o genapic_flat_64.o
+        obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
         obj-$(CONFIG_AUDIT)		+= audit_64.o
 

+ 8 - 1
arch/x86/kernel/acpi/Makefile

@@ -1,7 +1,14 @@
+subdir-				:= realmode
+
 obj-$(CONFIG_ACPI)		+= boot.o
-obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_$(BITS).o
+obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_rm.o wakeup_$(BITS).o
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
 obj-y				+= cstate.o processor.o
 endif
 
+$(obj)/wakeup_rm.o:    $(obj)/realmode/wakeup.bin
+
+$(obj)/realmode/wakeup.bin: FORCE
+	$(Q)$(MAKE) $(build)=$(obj)/realmode $@
+

+ 57 - 10
arch/x86/kernel/acpi/boot.c

@@ -39,6 +39,11 @@
 #include <asm/apic.h>
 #include <asm/io.h>
 #include <asm/mpspec.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+# include <mach_apic.h>
+#endif
 
 static int __initdata acpi_force = 0;
 
@@ -52,9 +57,7 @@ EXPORT_SYMBOL(acpi_disabled);
 #ifdef	CONFIG_X86_64
 
 #include <asm/proto.h>
-
-static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
-
+#include <asm/genapic.h>
 
 #else				/* X86 */
 
@@ -111,7 +114,7 @@ char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
 	if (!phys_addr || !size)
 		return NULL;
 
-	if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE)
+	if (phys_addr+size <= (max_pfn_mapped << PAGE_SHIFT) + PAGE_SIZE)
 		return __va(phys_addr);
 
 	return NULL;
@@ -237,6 +240,16 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
 	return 0;
 }
 
+static void __cpuinit acpi_register_lapic(int id, u8 enabled)
+{
+	if (!enabled) {
+		++disabled_cpus;
+		return;
+	}
+
+	generic_processor_info(id, 0);
+}
+
 static int __init
 acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
 {
@@ -256,8 +269,26 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
 	 * to not preallocating memory for all NR_CPUS
 	 * when we use CPU hotplug.
 	 */
-	mp_register_lapic(processor->id,	/* APIC ID */
-			  processor->lapic_flags & ACPI_MADT_ENABLED);	/* Enabled? */
+	acpi_register_lapic(processor->id,	/* APIC ID */
+			    processor->lapic_flags & ACPI_MADT_ENABLED);
+
+	return 0;
+}
+
+static int __init
+acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end)
+{
+	struct acpi_madt_local_sapic *processor = NULL;
+
+	processor = (struct acpi_madt_local_sapic *)header;
+
+	if (BAD_MADT_ENTRY(processor, end))
+		return -EINVAL;
+
+	acpi_table_print_madt_entry(header);
+
+	acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */
+			    processor->lapic_flags & ACPI_MADT_ENABLED);
 
 	return 0;
 }
@@ -300,6 +331,8 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
 
 #ifdef CONFIG_X86_IO_APIC
 
+struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
+
 static int __init
 acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
 {
@@ -532,7 +565,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
 	buffer.pointer = NULL;
 
 	tmp_map = cpu_present_map;
-	mp_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED);
+	acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED);
 
 	/*
 	 * If mp_register_lapic successfully generates a new logical cpu
@@ -732,6 +765,16 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
  * Parse LAPIC entries in MADT
  * returns 0 on success, < 0 on error
  */
+
+static void __init acpi_register_lapic_address(unsigned long address)
+{
+	mp_lapic_addr = address;
+
+	set_fixmap_nocache(FIX_APIC_BASE, address);
+	if (boot_cpu_physical_apicid == -1U)
+		boot_cpu_physical_apicid  = GET_APIC_ID(read_apic_id());
+}
+
 static int __init acpi_parse_madt_lapic_entries(void)
 {
 	int count;
@@ -753,10 +796,14 @@ static int __init acpi_parse_madt_lapic_entries(void)
 		return count;
 	}
 
-	mp_register_lapic_address(acpi_lapic_addr);
+	acpi_register_lapic_address(acpi_lapic_addr);
+
+	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
+				      acpi_parse_sapic, MAX_APICS);
 
-	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_parse_lapic,
-				      MAX_APICS);
+	if (!count)
+		count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
+					      acpi_parse_lapic, MAX_APICS);
 	if (!count) {
 		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
 		/* TBD: Cleanup to allow fallback to MPS */

+ 57 - 0
arch/x86/kernel/acpi/realmode/Makefile

@@ -0,0 +1,57 @@
+#
+# arch/x86/kernel/acpi/realmode/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+
+targets		:= wakeup.bin wakeup.elf
+
+wakeup-y	+= wakeup.o wakemain.o video-mode.o copy.o
+
+# The link order of the video-*.o modules can matter.  In particular,
+# video-vga.o *must* be listed first, followed by video-vesa.o.
+# Hardware-specific drivers should follow in the order they should be
+# probed, and video-bios.o should typically be last.
+wakeup-y	+= video-vga.o
+wakeup-y	+= video-vesa.o
+wakeup-y	+= video-bios.o
+
+targets		+= $(wakeup-y)
+
+bootsrc		:= $(src)/../../../boot
+
+# ---------------------------------------------------------------------------
+
+# How to compile the 16-bit code.  Note we always compile for -march=i386,
+# that way we can complain to the user if the CPU is insufficient.
+# Compile with _SETUP since this is similar to the boot-time setup code.
+KBUILD_CFLAGS	:= $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \
+		   -I$(srctree)/$(bootsrc) \
+		   $(cflags-y) \
+		   -Wall -Wstrict-prototypes \
+		   -march=i386 -mregparm=3 \
+		   -include $(srctree)/$(bootsrc)/code16gcc.h \
+		   -fno-strict-aliasing -fomit-frame-pointer \
+		   $(call cc-option, -ffreestanding) \
+		   $(call cc-option, -fno-toplevel-reorder,\
+			$(call cc-option, -fno-unit-at-a-time)) \
+		   $(call cc-option, -fno-stack-protector) \
+		   $(call cc-option, -mpreferred-stack-boundary=2)
+KBUILD_CFLAGS	+= $(call cc-option, -m32)
+KBUILD_AFLAGS	:= $(KBUILD_CFLAGS) -D__ASSEMBLY__
+
+WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y))
+
+LDFLAGS_wakeup.elf	:= -T
+
+CPPFLAGS_wakeup.lds += -P -C
+
+$(obj)/wakeup.elf: $(src)/wakeup.lds $(WAKEUP_OBJS) FORCE
+	$(call if_changed,ld)
+
+OBJCOPYFLAGS_wakeup.bin	:= -O binary
+
+$(obj)/wakeup.bin: $(obj)/wakeup.elf FORCE
+	$(call if_changed,objcopy)

+ 1 - 0
arch/x86/kernel/acpi/realmode/copy.S

@@ -0,0 +1 @@
+#include "../../../boot/copy.S"

+ 1 - 0
arch/x86/kernel/acpi/realmode/video-bios.c

@@ -0,0 +1 @@
+#include "../../../boot/video-bios.c"

+ 1 - 0
arch/x86/kernel/acpi/realmode/video-mode.c

@@ -0,0 +1 @@
+#include "../../../boot/video-mode.c"

+ 1 - 0
arch/x86/kernel/acpi/realmode/video-vesa.c

@@ -0,0 +1 @@
+#include "../../../boot/video-vesa.c"

+ 1 - 0
arch/x86/kernel/acpi/realmode/video-vga.c

@@ -0,0 +1 @@
+#include "../../../boot/video-vga.c"

+ 81 - 0
arch/x86/kernel/acpi/realmode/wakemain.c

@@ -0,0 +1,81 @@
+#include "wakeup.h"
+#include "boot.h"
+
+static void udelay(int loops)
+{
+	while (loops--)
+		io_delay();	/* Approximately 1 us */
+}
+
+static void beep(unsigned int hz)
+{
+	u8 enable;
+
+	if (!hz) {
+		enable = 0x00;		/* Turn off speaker */
+	} else {
+		u16 div = 1193181/hz;
+
+		outb(0xb6, 0x43);	/* Ctr 2, squarewave, load, binary */
+		io_delay();
+		outb(div, 0x42);	/* LSB of counter */
+		io_delay();
+		outb(div >> 8, 0x42);	/* MSB of counter */
+		io_delay();
+
+		enable = 0x03;		/* Turn on speaker */
+	}
+	inb(0x61);		/* Dummy read of System Control Port B */
+	io_delay();
+	outb(enable, 0x61);	/* Enable timer 2 output to speaker */
+	io_delay();
+}
+
+#define DOT_HZ		880
+#define DASH_HZ		587
+#define US_PER_DOT	125000
+
+/* Okay, this is totally silly, but it's kind of fun. */
+static void send_morse(const char *pattern)
+{
+	char s;
+
+	while ((s = *pattern++)) {
+		switch (s) {
+		case '.':
+			beep(DOT_HZ);
+			udelay(US_PER_DOT);
+			beep(0);
+			udelay(US_PER_DOT);
+			break;
+		case '-':
+			beep(DASH_HZ);
+			udelay(US_PER_DOT * 3);
+			beep(0);
+			udelay(US_PER_DOT);
+			break;
+		default:	/* Assume it's a space */
+			udelay(US_PER_DOT * 3);
+			break;
+		}
+	}
+}
+
+void main(void)
+{
+	/* Kill machine if structures are wrong */
+	if (wakeup_header.real_magic != 0x12345678)
+		while (1);
+
+	if (wakeup_header.realmode_flags & 4)
+		send_morse("...-");
+
+	if (wakeup_header.realmode_flags & 1)
+		asm volatile("lcallw   $0xc000,$3");
+
+	if (wakeup_header.realmode_flags & 2) {
+		/* Need to call BIOS */
+		probe_cards(0);
+		set_mode(wakeup_header.video_mode);
+	}
+}

+ 113 - 0
arch/x86/kernel/acpi/realmode/wakeup.S

@@ -0,0 +1,113 @@
+/*
+ * ACPI wakeup real mode startup stub
+ */
+#include <asm/segment.h>
+#include <asm/msr-index.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+	.code16
+	.section ".header", "a"
+
+/* This should match the structure in wakeup.h */
+		.globl	wakeup_header
+wakeup_header:
+video_mode:	.short	0	/* Video mode number */
+pmode_return:	.byte	0x66, 0xea	/* ljmpl */
+		.long	0	/* offset goes here */
+		.short	__KERNEL_CS
+pmode_cr0:	.long	0	/* Saved %cr0 */
+pmode_cr3:	.long	0	/* Saved %cr3 */
+pmode_cr4:	.long	0	/* Saved %cr4 */
+pmode_efer:	.quad	0	/* Saved EFER */
+pmode_gdt:	.quad	0
+realmode_flags:	.long	0
+real_magic:	.long	0
+trampoline_segment:	.word 0
+signature:	.long	0x51ee1111
+
+	.text
+	.globl	_start
+	.code16
+wakeup_code:
+_start:
+	cli
+	cld
+
+	/* Set up segments */
+	movw	%cs, %ax
+	movw	%ax, %ds
+	movw	%ax, %es
+	movw	%ax, %ss
+
+	movl	$wakeup_stack_end, %esp
+
+	/* Clear the EFLAGS */
+	pushl	$0
+	popfl
+
+	/* Check header signature... */
+	movl	signature, %eax
+	cmpl	$0x51ee1111, %eax
+	jne	bogus_real_magic
+
+	/* Check we really have everything... */
+	movl	end_signature, %eax
+	cmpl	$0x65a22c82, %eax
+	jne	bogus_real_magic
+
+	/* Call the C code */
+	calll	main
+
+	/* Do any other stuff... */
+
+#ifndef CONFIG_64BIT
+	/* This could also be done in C code... */
+	movl	pmode_cr3, %eax
+	movl	%eax, %cr3
+
+	movl	pmode_cr4, %ecx
+	jecxz	1f
+	movl	%ecx, %cr4
+1:
+	movl	pmode_efer, %eax
+	movl	pmode_efer + 4, %edx
+	movl	%eax, %ecx
+	orl	%edx, %ecx
+	jz	1f
+	movl	$0xc0000080, %ecx
+	wrmsr
+1:
+
+	lgdtl	pmode_gdt
+
+	/* This really couldn't... */
+	movl	pmode_cr0, %eax
+	movl	%eax, %cr0
+	jmp	pmode_return
+#else
+	pushw	$0
+	pushw	trampoline_segment
+	pushw	$0
+	lret
+#endif
+
+bogus_real_magic:
+1:
+	hlt
+	jmp	1b
+
+	.data
+	.balign	4
+	.globl	HEAP, heap_end
+HEAP:
+	.long	wakeup_heap
+heap_end:
+	.long	wakeup_stack
+
+	.bss
+wakeup_heap:
+	.space	2048
+wakeup_stack:
+	.space	2048
+wakeup_stack_end:

+ 36 - 0
arch/x86/kernel/acpi/realmode/wakeup.h

@@ -0,0 +1,36 @@
+/*
+ * Definitions for the wakeup data structure at the head of the
+ * wakeup code.
+ */
+
+#ifndef ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H
+#define ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+/* This must match data at wakeup.S */
+struct wakeup_header {
+	u16 video_mode;		/* Video mode number */
+	u16 _jmp1;		/* ljmpl opcode, 32-bit only */
+	u32 pmode_entry;	/* Protected mode resume point, 32-bit only */
+	u16 _jmp2;		/* CS value, 32-bit only */
+	u32 pmode_cr0;		/* Protected mode cr0 */
+	u32 pmode_cr3;		/* Protected mode cr3 */
+	u32 pmode_cr4;		/* Protected mode cr4 */
+	u32 pmode_efer_low;	/* Protected mode EFER */
+	u32 pmode_efer_high;
+	u64 pmode_gdt;
+	u32 realmode_flags;
+	u32 real_magic;
+	u16 trampoline_segment;	/* segment with trampoline code, 64-bit only */
+	u32 signature;		/* To check we have correct structure */
+} __attribute__((__packed__));
+
+extern struct wakeup_header wakeup_header;
+#endif
+
+#define HEADER_OFFSET 0x3f00
+#define WAKEUP_SIZE   0x4000
+
+#endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */

+ 61 - 0
arch/x86/kernel/acpi/realmode/wakeup.lds.S

@@ -0,0 +1,61 @@
+/*
+ * wakeup.ld
+ *
+ * Linker script for the real-mode wakeup code
+ */
+#undef i386
+#include "wakeup.h"
+
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+
+SECTIONS
+{
+	. = HEADER_OFFSET;
+	.header : {
+		 *(.header)
+	}
+
+	. = 0;
+	.text : {
+		 *(.text*)
+	}
+
+	. = ALIGN(16);
+	.rodata : {
+		*(.rodata*)
+	}
+
+	.videocards : {
+		video_cards = .;
+		*(.videocards)
+		video_cards_end = .;
+	}
+
+	. = ALIGN(16);
+	.data : {
+		 *(.data*)
+	}
+
+	.signature : {
+		end_signature = .;
+		LONG(0x65a22c82)
+	}
+
+	. = ALIGN(16);
+	.bss :	{
+		__bss_start = .;
+		*(.bss)
+		__bss_end = .;
+	}
+
+	. = ALIGN(16);
+	_end = .;
+
+	/DISCARD/ : {
+		*(.note*)
+	}
+
+	. = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!");
+}

+ 60 - 13
arch/x86/kernel/acpi/sleep.c

@@ -10,30 +10,72 @@
 #include <linux/dmi.h>
 #include <linux/cpumask.h>
 
-#include <asm/smp.h>
+#include "realmode/wakeup.h"
+#include "sleep.h"
 
-/* address in low memory of the wakeup routine. */
-unsigned long acpi_wakeup_address = 0;
+unsigned long acpi_wakeup_address;
 unsigned long acpi_realmode_flags;
-extern char wakeup_start, wakeup_end;
 
-extern unsigned long acpi_copy_wakeup_routine(unsigned long);
+/* address in low memory of the wakeup routine. */
+static unsigned long acpi_realmode;
+
+#ifdef CONFIG_64BIT
+static char temp_stack[10240];
+#endif
 
 /**
  * acpi_save_state_mem - save kernel state
  *
  * Create an identity mapped page table and copy the wakeup routine to
  * low memory.
+ *
+ * Note that this is too late to change acpi_wakeup_address.
  */
 int acpi_save_state_mem(void)
 {
-	if (!acpi_wakeup_address) {
-		printk(KERN_ERR "Could not allocate memory during boot, S3 disabled\n");
+	struct wakeup_header *header;
+
+	if (!acpi_realmode) {
+		printk(KERN_ERR "Could not allocate memory during boot, "
+		       "S3 disabled\n");
 		return -ENOMEM;
 	}
-	memcpy((void *)acpi_wakeup_address, &wakeup_start,
-	       &wakeup_end - &wakeup_start);
-	acpi_copy_wakeup_routine(acpi_wakeup_address);
+	memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE);
+
+	header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET);
+	if (header->signature != 0x51ee1111) {
+		printk(KERN_ERR "wakeup header does not match\n");
+		return -EINVAL;
+	}
+
+	header->video_mode = saved_video_mode;
+
+#ifndef CONFIG_64BIT
+	store_gdt((struct desc_ptr *)&header->pmode_gdt);
+
+	header->pmode_efer_low = nx_enabled;
+	if (header->pmode_efer_low & 1) {
+		/* This is strange, why not save efer, always? */
+		rdmsr(MSR_EFER, header->pmode_efer_low,
+			header->pmode_efer_high);
+	}
+#endif /* !CONFIG_64BIT */
+
+	header->pmode_cr0 = read_cr0();
+	header->pmode_cr4 = read_cr4();
+	header->realmode_flags = acpi_realmode_flags;
+	header->real_magic = 0x12345678;
+
+#ifndef CONFIG_64BIT
+	header->pmode_entry = (u32)&wakeup_pmode_return;
+	header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET);
+	saved_magic = 0x12345678;
+#else /* CONFIG_64BIT */
+	header->trampoline_segment = setup_trampoline() >> 4;
+	init_rsp = (unsigned long)temp_stack + 4096;
+	initial_code = (unsigned long)wakeup_long64;
+	saved_magic = 0x123456789abcdef0;
+#endif /* CONFIG_64BIT */
 
 	return 0;
 }
@@ -56,15 +98,20 @@ void acpi_restore_state_mem(void)
  */
 void __init acpi_reserve_bootmem(void)
 {
-	if ((&wakeup_end - &wakeup_start) > PAGE_SIZE*2) {
+	if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
 		printk(KERN_ERR
 		       "ACPI: Wakeup code way too big, S3 disabled.\n");
 		return;
 	}
 
-	acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
-	if (!acpi_wakeup_address)
+	acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE);
+
+	if (!acpi_realmode) {
 		printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
+		return;
+	}
+
+	acpi_wakeup_address = acpi_realmode;
 }
 
 

+ 16 - 0
arch/x86/kernel/acpi/sleep.h

@@ -0,0 +1,16 @@
+/*
+ *	Variables and functions used by the code in sleep.c
+ */
+
+#include <asm/trampoline.h>
+
+extern char wakeup_code_start, wakeup_code_end;
+
+extern unsigned long saved_video_mode;
+extern long saved_magic;
+
+extern int wakeup_pmode_return;
+extern char swsusp_pg_dir[PAGE_SIZE];
+
+extern unsigned long acpi_copy_wakeup_routine(unsigned long);
+extern void wakeup_long64(void);

+ 0 - 40
arch/x86/kernel/acpi/sleep_32.c

@@ -1,40 +0,0 @@
-/*
- * sleep.c - x86-specific ACPI sleep support.
- *
- *  Copyright (C) 2001-2003 Patrick Mochel
- *  Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz>
- */
-
-#include <linux/acpi.h>
-#include <linux/bootmem.h>
-#include <linux/dmi.h>
-#include <linux/cpumask.h>
-
-#include <asm/smp.h>
-
-/* Ouch, we want to delete this. We already have better version in userspace, in
-   s2ram from suspend.sf.net project */
-static __init int reset_videomode_after_s3(const struct dmi_system_id *d)
-{
-	acpi_realmode_flags |= 2;
-	return 0;
-}
-
-static __initdata struct dmi_system_id acpisleep_dmi_table[] = {
-	{			/* Reset video mode after returning from ACPI S3 sleep */
-	 .callback = reset_videomode_after_s3,
-	 .ident = "Toshiba Satellite 4030cdt",
-	 .matches = {
-		     DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
-		     },
-	 },
-	{}
-};
-
-static int __init acpisleep_dmi_init(void)
-{
-	dmi_check_system(acpisleep_dmi_table);
-	return 0;
-}
-
-core_initcall(acpisleep_dmi_init);

+ 20 - 227
arch/x86/kernel/acpi/wakeup_32.S

@@ -3,178 +3,12 @@
 #include <asm/segment.h>
 #include <asm/page.h>
 
-#
-# wakeup_code runs in real mode, and at unknown address (determined at run-time).
-# Therefore it must only use relative jumps/calls. 
-#
-# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled
-#
-# If physical address of wakeup_code is 0x12345, BIOS should call us with
-# cs = 0x1234, eip = 0x05
-#
-
-#define BEEP \
-	inb	$97, %al; 	\
-	outb	%al, $0x80; 	\
-	movb	$3, %al; 	\
-	outb	%al, $97; 	\
-	outb	%al, $0x80; 	\
-	movb	$-74, %al; 	\
-	outb	%al, $67; 	\
-	outb	%al, $0x80; 	\
-	movb	$-119, %al; 	\
-	outb	%al, $66; 	\
-	outb	%al, $0x80; 	\
-	movb	$15, %al; 	\
-	outb	%al, $66;
-
-ALIGN
-	.align	4096
-ENTRY(wakeup_start)
-wakeup_code:
-	wakeup_code_start = .
-	.code16
-
-	cli
-	cld
-
-	# setup data segment
-	movw	%cs, %ax
-	movw	%ax, %ds					# Make ds:0 point to wakeup_start
-	movw	%ax, %ss
-
-	testl   $4, realmode_flags - wakeup_code
-	jz      1f
-	BEEP
-1:
-	mov	$(wakeup_stack - wakeup_code), %sp		# Private stack is needed for ASUS board
-
-	pushl	$0						# Kill any dangerous flags
-	popfl
-
-	movl	real_magic - wakeup_code, %eax
-	cmpl	$0x12345678, %eax
-	jne	bogus_real_magic
-
-	testl	$1, realmode_flags - wakeup_code
-	jz	1f
-	lcall   $0xc000,$3
-	movw	%cs, %ax
-	movw	%ax, %ds					# Bios might have played with that
-	movw	%ax, %ss
-1:
-
-	testl	$2, realmode_flags - wakeup_code
-	jz	1f
-	mov	video_mode - wakeup_code, %ax
-	call	mode_set
-1:
-
-	# set up page table
-	movl	$swsusp_pg_dir-__PAGE_OFFSET, %eax
-	movl	%eax, %cr3
-
-	testl	$1, real_efer_save_restore - wakeup_code
-	jz	4f
-	# restore efer setting
-	movl	real_save_efer_edx - wakeup_code, %edx
-	movl	real_save_efer_eax - wakeup_code, %eax
-	mov     $0xc0000080, %ecx
-	wrmsr
-4:
-	# make sure %cr4 is set correctly (features, etc)
-	movl	real_save_cr4 - wakeup_code, %eax
-	movl	%eax, %cr4
-	
-	# need a gdt -- use lgdtl to force 32-bit operands, in case
-	# the GDT is located past 16 megabytes.
-	lgdtl	real_save_gdt - wakeup_code
-
-	movl	real_save_cr0 - wakeup_code, %eax
-	movl	%eax, %cr0
-	jmp 1f
-1:
-	movl	real_magic - wakeup_code, %eax
-	cmpl	$0x12345678, %eax
-	jne	bogus_real_magic
-
-	testl   $8, realmode_flags - wakeup_code
-	jz      1f
-	BEEP
-1:
-	ljmpl	$__KERNEL_CS, $wakeup_pmode_return
-
-real_save_gdt:	.word 0
-		.long 0
-real_save_cr0:	.long 0
-real_save_cr3:	.long 0
-real_save_cr4:	.long 0
-real_magic:	.long 0
-video_mode:	.long 0
-realmode_flags:	.long 0
-real_efer_save_restore:	.long 0
-real_save_efer_edx: 	.long 0
-real_save_efer_eax: 	.long 0
-
-bogus_real_magic:
-	jmp bogus_real_magic
-
-/* This code uses an extended set of video mode numbers. These include:
- * Aliases for standard modes
- *	NORMAL_VGA (-1)
- *	EXTENDED_VGA (-2)
- *	ASK_VGA (-3)
- * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
- * of compatibility when extending the table. These are between 0x00 and 0xff.
- */
-#define VIDEO_FIRST_MENU 0x0000
-
-/* Standard BIOS video modes (BIOS number + 0x0100) */
-#define VIDEO_FIRST_BIOS 0x0100
-
-/* VESA BIOS video modes (VESA number + 0x0200) */
-#define VIDEO_FIRST_VESA 0x0200
-
-/* Video7 special modes (BIOS number + 0x0900) */
-#define VIDEO_FIRST_V7 0x0900
-
-# Setting of user mode (AX=mode ID) => CF=success
-
-# For now, we only handle VESA modes (0x0200..0x03ff).  To handle other
-# modes, we should probably compile in the video code from the boot
-# directory.
-mode_set:
-	movw	%ax, %bx
-	subb	$VIDEO_FIRST_VESA>>8, %bh
-	cmpb	$2, %bh
-	jb	check_vesa
-
-setbad:
-	clc
-	ret
-
-check_vesa:
-	orw	$0x4000, %bx			# Use linear frame buffer
-	movw	$0x4f02, %ax			# VESA BIOS mode set call
-	int	$0x10
-	cmpw	$0x004f, %ax			# AL=4f if implemented
-	jnz	setbad				# AH=0 if OK
-
-	stc
-	ret
+# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
 
 	.code32
 	ALIGN
 
-.org	0x800
-wakeup_stack_begin:	# Stack grows down
-
-.org	0xff0		# Just below end of page
-wakeup_stack:
-ENTRY(wakeup_end)
-	
-.org	0x1000
-
+ENTRY(wakeup_pmode_return)
 wakeup_pmode_return:
 	movw	$__KERNEL_DS, %ax
 	movw	%ax, %ss
@@ -187,7 +21,7 @@ wakeup_pmode_return:
 	lgdt	saved_gdt
 	lidt	saved_idt
 	lldt	saved_ldt
-	ljmp	$(__KERNEL_CS),$1f
+	ljmp	$(__KERNEL_CS), $1f
 1:
 	movl	%cr3, %eax
 	movl	%eax, %cr3
@@ -201,82 +35,41 @@ wakeup_pmode_return:
 	jne	bogus_magic
 
 	# jump to place where we left off
-	movl	saved_eip,%eax
+	movl	saved_eip, %eax
 	jmp	*%eax
 
 bogus_magic:
 	jmp	bogus_magic
 
 
-##
-# acpi_copy_wakeup_routine
-#
-# Copy the above routine to low memory.
-#
-# Parameters:
-# %eax:	place to copy wakeup routine to
-#
-# Returned address is location of code in low memory (past data and stack)
-#
-ENTRY(acpi_copy_wakeup_routine)
 
-	pushl	%ebx
+save_registers:
 	sgdt	saved_gdt
 	sidt	saved_idt
 	sldt	saved_ldt
 	str	saved_tss
 
-	movl	nx_enabled, %edx
-	movl	%edx, real_efer_save_restore - wakeup_start (%eax)
-	testl	$1, real_efer_save_restore - wakeup_start (%eax)
-	jz	2f
-	# save efer setting
-	pushl	%eax
-	movl	%eax, %ebx
-	mov     $0xc0000080, %ecx
-	rdmsr
-	movl	%edx, real_save_efer_edx - wakeup_start (%ebx)
-	movl	%eax, real_save_efer_eax - wakeup_start (%ebx)
-	popl	%eax
-2:
-
-	movl    %cr3, %edx
-	movl    %edx, real_save_cr3 - wakeup_start (%eax)
-	movl    %cr4, %edx
-	movl    %edx, real_save_cr4 - wakeup_start (%eax)
-	movl	%cr0, %edx
-	movl	%edx, real_save_cr0 - wakeup_start (%eax)
-	sgdt    real_save_gdt - wakeup_start (%eax)
-
-	movl	saved_videomode, %edx
-	movl	%edx, video_mode - wakeup_start (%eax)
-	movl	acpi_realmode_flags, %edx
-	movl	%edx, realmode_flags - wakeup_start (%eax)
-	movl	$0x12345678, real_magic - wakeup_start (%eax)
-	movl	$0x12345678, saved_magic
-	popl	%ebx
-	ret
-
-save_registers:
 	leal	4(%esp), %eax
 	movl	%eax, saved_context_esp
-	movl %ebx, saved_context_ebx
-	movl %ebp, saved_context_ebp
-	movl %esi, saved_context_esi
-	movl %edi, saved_context_edi
-	pushfl ; popl saved_context_eflags
-
-	movl $ret_point, saved_eip
+	movl	%ebx, saved_context_ebx
+	movl	%ebp, saved_context_ebp
+	movl	%esi, saved_context_esi
+	movl	%edi, saved_context_edi
+	pushfl
+	popl	saved_context_eflags
+
+	movl	$ret_point, saved_eip
 	ret
 
 
 restore_registers:
-	movl saved_context_ebp, %ebp
-	movl saved_context_ebx, %ebx
-	movl saved_context_esi, %esi
-	movl saved_context_edi, %edi
-	pushl saved_context_eflags ; popfl
-	ret	
+	movl	saved_context_ebp, %ebp
+	movl	saved_context_ebx, %ebx
+	movl	saved_context_esi, %esi
+	movl	saved_context_edi, %edi
+	pushl	saved_context_eflags
+	popfl
+	ret
 
 ENTRY(do_suspend_lowlevel)
 	call	save_processor_state

+ 9 - 304
arch/x86/kernel/acpi/wakeup_64.S

@@ -7,191 +7,18 @@
 #include <asm/asm-offsets.h>
 
 # Copyright 2003 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
-#
-# wakeup_code runs in real mode, and at unknown address (determined at run-time).
-# Therefore it must only use relative jumps/calls. 
-#
-# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled
-#
-# If physical address of wakeup_code is 0x12345, BIOS should call us with
-# cs = 0x1234, eip = 0x05
-#
-
-#define BEEP \
-	inb	$97, %al; 	\
-	outb	%al, $0x80; 	\
-	movb	$3, %al; 	\
-	outb	%al, $97; 	\
-	outb	%al, $0x80; 	\
-	movb	$-74, %al; 	\
-	outb	%al, $67; 	\
-	outb	%al, $0x80; 	\
-	movb	$-119, %al; 	\
-	outb	%al, $66; 	\
-	outb	%al, $0x80; 	\
-	movb	$15, %al; 	\
-	outb	%al, $66;
-
-
-ALIGN
-	.align	16
-ENTRY(wakeup_start)
-wakeup_code:
-	wakeup_code_start = .
-	.code16
-
-# Running in *copy* of this code, somewhere in low 1MB.
-
-	cli
-	cld
-	# setup data segment
-	movw	%cs, %ax
-	movw	%ax, %ds		# Make ds:0 point to wakeup_start
-	movw	%ax, %ss
-
-	# Data segment must be set up before we can see whether to beep.
-	testl   $4, realmode_flags - wakeup_code
-	jz      1f
-	BEEP
-1:
-
-					# Private stack is needed for ASUS board
-	mov	$(wakeup_stack - wakeup_code), %sp
-
-	pushl	$0			# Kill any dangerous flags
-	popfl
-
-	movl	real_magic - wakeup_code, %eax
-	cmpl	$0x12345678, %eax
-	jne	bogus_real_magic
-
-	testl	$1, realmode_flags - wakeup_code
-	jz	1f
-	lcall   $0xc000,$3
-	movw	%cs, %ax
-	movw	%ax, %ds		# Bios might have played with that
-	movw	%ax, %ss
-1:
-
-	testl	$2, realmode_flags - wakeup_code
-	jz	1f
-	mov	video_mode - wakeup_code, %ax
-	call	mode_set
-1:
-
-	mov	%ds, %ax			# Find 32bit wakeup_code addr
-	movzx   %ax, %esi			# (Convert %ds:gdt to a liner ptr)
-	shll    $4, %esi
-						# Fix up the vectors
-	addl    %esi, wakeup_32_vector - wakeup_code
-	addl    %esi, wakeup_long64_vector - wakeup_code
-	addl    %esi, gdt_48a + 2 - wakeup_code # Fixup the gdt pointer
-
-	lidtl	%ds:idt_48a - wakeup_code
-	lgdtl	%ds:gdt_48a - wakeup_code	# load gdt with whatever is
-						# appropriate
-
-	movl	$1, %eax			# protected mode (PE) bit
-	lmsw	%ax				# This is it!
-	jmp	1f
-1:
-
-	ljmpl   *(wakeup_32_vector - wakeup_code)
-
-	.balign 4
-wakeup_32_vector:
-	.long   wakeup_32 - wakeup_code
-	.word   __KERNEL32_CS, 0
-
-	.code32
-wakeup_32:
-# Running in this code, but at low address; paging is not yet turned on.
-
-	movl	$__KERNEL_DS, %eax
-	movl	%eax, %ds
-
-	/*
-	 * Prepare for entering 64bits mode
-	 */
-
-	/* Enable PAE */
-	xorl	%eax, %eax
-	btsl	$5, %eax
-	movl	%eax, %cr4
-
-	/* Setup early boot stage 4 level pagetables */
-	leal    (wakeup_level4_pgt - wakeup_code)(%esi), %eax
-	movl	%eax, %cr3
-
-        /* Check if nx is implemented */
-        movl    $0x80000001, %eax
-        cpuid
-        movl    %edx,%edi
-
-	/* Enable Long Mode */
-	xorl    %eax, %eax
-	btsl	$_EFER_LME, %eax
-
-	/* No Execute supported? */
-	btl	$20,%edi
-	jnc     1f
-	btsl	$_EFER_NX, %eax
-				
-	/* Make changes effective */
-1:	movl    $MSR_EFER, %ecx
-	xorl    %edx, %edx
-	wrmsr
-
-	xorl	%eax, %eax
-	btsl	$31, %eax			/* Enable paging and in turn activate Long Mode */
-	btsl	$0, %eax			/* Enable protected mode */
-
-	/* Make changes effective */
-	movl	%eax, %cr0
-
-	/* At this point:
-		CR4.PAE must be 1
-		CS.L must be 0
-		CR3 must point to PML4
-		Next instruction must be a branch
-		This must be on identity-mapped page
-	*/
-	/*
-	 * At this point we're in long mode but in 32bit compatibility mode
-	 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
-	 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we load
-	 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
-	 */
-
-	/* Finally jump in 64bit mode */
-        ljmp    *(wakeup_long64_vector - wakeup_code)(%esi)
-
-	.balign 4
-wakeup_long64_vector:
-	.long   wakeup_long64 - wakeup_code
-	.word   __KERNEL_CS, 0
 
 .code64
-
-	/* Hooray, we are in Long 64-bit mode (but still running in
-	 * low memory)
-	 */
-wakeup_long64:
 	/*
-	 * We must switch to a new descriptor in kernel space for the GDT
-	 * because soon the kernel won't have access anymore to the userspace
-	 * addresses where we're currently running on. We have to do that here
-	 * because in 32bit we couldn't load a 64bit linear address.
+	 * Hooray, we are in Long 64-bit mode (but still running in low memory)
 	 */
-	lgdt	cpu_gdt_descr
-
-	movq    saved_magic, %rax
-	movq    $0x123456789abcdef0, %rdx
-	cmpq    %rdx, %rax
-	jne     bogus_64_magic
+ENTRY(wakeup_long64)
+wakeup_long64:
+	movq	saved_magic, %rax
+	movq	$0x123456789abcdef0, %rdx
+	cmpq	%rdx, %rax
+	jne	bogus_64_magic
 
-	nop
-	nop
 	movw	$__KERNEL_DS, %ax
 	movw	%ax, %ss	
 	movw	%ax, %ds
@@ -208,130 +35,8 @@ wakeup_long64:
 	movq	saved_rip, %rax
 	jmp	*%rax
 
-.code32
-
-	.align	64	
-gdta:
-	/* Its good to keep gdt in sync with one in trampoline.S */
-	.word	0, 0, 0, 0			# dummy
-	/* ??? Why I need the accessed bit set in order for this to work? */
-	.quad   0x00cf9b000000ffff              # __KERNEL32_CS
-	.quad   0x00af9b000000ffff              # __KERNEL_CS
-	.quad   0x00cf93000000ffff              # __KERNEL_DS
-
-idt_48a:
-	.word	0				# idt limit = 0
-	.word	0, 0				# idt base = 0L
-
-gdt_48a:
-	.word	0x800				# gdt limit=2048,
-						#  256 GDT entries
-	.long   gdta - wakeup_code              # gdt base (relocated in later)
-	
-real_magic:	.quad 0
-video_mode:	.quad 0
-realmode_flags:	.quad 0
-
-.code16
-bogus_real_magic:
-	jmp bogus_real_magic
-
-.code64
 bogus_64_magic:
-	jmp bogus_64_magic
-
-/* This code uses an extended set of video mode numbers. These include:
- * Aliases for standard modes
- *	NORMAL_VGA (-1)
- *	EXTENDED_VGA (-2)
- *	ASK_VGA (-3)
- * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
- * of compatibility when extending the table. These are between 0x00 and 0xff.
- */
-#define VIDEO_FIRST_MENU 0x0000
-
-/* Standard BIOS video modes (BIOS number + 0x0100) */
-#define VIDEO_FIRST_BIOS 0x0100
-
-/* VESA BIOS video modes (VESA number + 0x0200) */
-#define VIDEO_FIRST_VESA 0x0200
-
-/* Video7 special modes (BIOS number + 0x0900) */
-#define VIDEO_FIRST_V7 0x0900
-
-# Setting of user mode (AX=mode ID) => CF=success
-
-# For now, we only handle VESA modes (0x0200..0x03ff).  To handle other
-# modes, we should probably compile in the video code from the boot
-# directory.
-.code16
-mode_set:
-	movw	%ax, %bx
-	subb	$VIDEO_FIRST_VESA>>8, %bh
-	cmpb	$2, %bh
-	jb	check_vesa
-
-setbad:
-	clc
-	ret
-
-check_vesa:
-	orw	$0x4000, %bx			# Use linear frame buffer
-	movw	$0x4f02, %ax			# VESA BIOS mode set call
-	int	$0x10
-	cmpw	$0x004f, %ax			# AL=4f if implemented
-	jnz	setbad				# AH=0 if OK
-
-	stc
-	ret
-
-wakeup_stack_begin:	# Stack grows down
-
-.org	0xff0
-wakeup_stack:		# Just below end of page
-
-.org   0x1000
-ENTRY(wakeup_level4_pgt)
-	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
-	.fill   510,8,0
-	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
-	.quad   level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
-
-ENTRY(wakeup_end)
-	
-##
-# acpi_copy_wakeup_routine
-#
-# Copy the above routine to low memory.
-#
-# Parameters:
-# %rdi:	place to copy wakeup routine to
-#
-# Returned address is location of code in low memory (past data and stack)
-#
-	.code64
-ENTRY(acpi_copy_wakeup_routine)
-	pushq	%rax
-	pushq	%rdx
-
-	movl	saved_video_mode, %edx
-	movl	%edx, video_mode - wakeup_start (,%rdi)
-	movl	acpi_realmode_flags, %edx
-	movl	%edx, realmode_flags - wakeup_start (,%rdi)
-	movq	$0x12345678, real_magic - wakeup_start (,%rdi)
-	movq	$0x123456789abcdef0, %rdx
-	movq	%rdx, saved_magic
-
-	movq    saved_magic, %rax
-	movq    $0x123456789abcdef0, %rdx
-	cmpq    %rdx, %rax
-	jne     bogus_64_magic
-
-	# restore the regs we used
-	popq	%rdx
-	popq	%rax
-ENTRY(do_suspend_lowlevel_s4bios)
-	ret
+	jmp	bogus_64_magic
 
 	.align 2
 	.p2align 4,,15
@@ -414,7 +119,7 @@ do_suspend_lowlevel:
 	jmp	restore_processor_state
 .LFE5:
 .Lfe5:
-	.size	do_suspend_lowlevel,.Lfe5-do_suspend_lowlevel
+	.size	do_suspend_lowlevel, .Lfe5-do_suspend_lowlevel
 	
 .data
 ALIGN

+ 10 - 0
arch/x86/kernel/acpi/wakeup_rm.S

@@ -0,0 +1,10 @@
+/*
+ * Wrapper script for the realmode binary as a transport object
+ * before copying to low memory.
+ */
+	.section ".rodata","a"
+	.globl	wakeup_code_start, wakeup_code_end
+wakeup_code_start:
+	.incbin	"arch/x86/kernel/acpi/realmode/wakeup.bin"
+wakeup_code_end:
+	.size	wakeup_code_start, .-wakeup_code_start

+ 76 - 27
arch/x86/kernel/alternative.c

@@ -11,6 +11,8 @@
 #include <asm/mce.h>
 #include <asm/nmi.h>
 #include <asm/vsyscall.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
 
 #define MAX_PATCH_LEN (255-1)
 
@@ -177,7 +179,7 @@ static const unsigned char*const * find_nop_table(void)
 #endif /* CONFIG_X86_64 */
 
 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
-static void add_nops(void *insns, unsigned int len)
+void add_nops(void *insns, unsigned int len)
 {
 	const unsigned char *const *noptable = find_nop_table();
 
@@ -190,6 +192,7 @@ static void add_nops(void *insns, unsigned int len)
 		len -= noplen;
 	}
 }
+EXPORT_SYMBOL_GPL(add_nops);
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern u8 *__smp_locks[], *__smp_locks_end[];
@@ -205,7 +208,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 	struct alt_instr *a;
 	char insnbuf[MAX_PATCH_LEN];
 
-	DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
+	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
 	for (a = start; a < end; a++) {
 		u8 *instr = a->instr;
 		BUG_ON(a->replacementlen > a->instrlen);
@@ -217,13 +220,13 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 		if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
 			instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
 			DPRINTK("%s: vsyscall fixup: %p => %p\n",
-				__FUNCTION__, a->instr, instr);
+				__func__, a->instr, instr);
 		}
 #endif
 		memcpy(insnbuf, a->replacement, a->replacementlen);
 		add_nops(insnbuf + a->replacementlen,
 			 a->instrlen - a->replacementlen);
-		text_poke(instr, insnbuf, a->instrlen);
+		text_poke_early(instr, insnbuf, a->instrlen);
 	}
 }
 
@@ -284,7 +287,6 @@ void alternatives_smp_module_add(struct module *mod, char *name,
 				 void *text,  void *text_end)
 {
 	struct smp_alt_module *smp;
-	unsigned long flags;
 
 	if (noreplace_smp)
 		return;
@@ -307,42 +309,40 @@ void alternatives_smp_module_add(struct module *mod, char *name,
 	smp->text	= text;
 	smp->text_end	= text_end;
 	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
-		__FUNCTION__, smp->locks, smp->locks_end,
+		__func__, smp->locks, smp->locks_end,
 		smp->text, smp->text_end, smp->name);
 
-	spin_lock_irqsave(&smp_alt, flags);
+	spin_lock(&smp_alt);
 	list_add_tail(&smp->next, &smp_alt_modules);
 	if (boot_cpu_has(X86_FEATURE_UP))
 		alternatives_smp_unlock(smp->locks, smp->locks_end,
 					smp->text, smp->text_end);
-	spin_unlock_irqrestore(&smp_alt, flags);
+	spin_unlock(&smp_alt);
 }
 
 void alternatives_smp_module_del(struct module *mod)
 {
 	struct smp_alt_module *item;
-	unsigned long flags;
 
 	if (smp_alt_once || noreplace_smp)
 		return;
 
-	spin_lock_irqsave(&smp_alt, flags);
+	spin_lock(&smp_alt);
 	list_for_each_entry(item, &smp_alt_modules, next) {
 		if (mod != item->mod)
 			continue;
 		list_del(&item->next);
-		spin_unlock_irqrestore(&smp_alt, flags);
-		DPRINTK("%s: %s\n", __FUNCTION__, item->name);
+		spin_unlock(&smp_alt);
+		DPRINTK("%s: %s\n", __func__, item->name);
 		kfree(item);
 		return;
 	}
-	spin_unlock_irqrestore(&smp_alt, flags);
+	spin_unlock(&smp_alt);
 }
 
 void alternatives_smp_switch(int smp)
 {
 	struct smp_alt_module *mod;
-	unsigned long flags;
 
 #ifdef CONFIG_LOCKDEP
 	/*
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
 		return;
 	BUG_ON(!smp && (num_online_cpus() > 1));
 
-	spin_lock_irqsave(&smp_alt, flags);
+	spin_lock(&smp_alt);
 
 	/*
 	 * Avoid unnecessary switches because it forces JIT based VMs to
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
 						mod->text, mod->text_end);
 	}
 	smp_mode = smp;
-	spin_unlock_irqrestore(&smp_alt, flags);
+	spin_unlock(&smp_alt);
 }
 
 #endif
@@ -411,7 +411,7 @@ void apply_paravirt(struct paravirt_patch_site *start,
 
 		/* Pad the rest with nops */
 		add_nops(insnbuf + used, p->len - used);
-		text_poke(p->instr, insnbuf, p->len);
+		text_poke_early(p->instr, insnbuf, p->len);
 	}
 }
 extern struct paravirt_patch_site __start_parainstructions[],
@@ -420,8 +420,6 @@ extern struct paravirt_patch_site __start_parainstructions[],
 
 void __init alternative_instructions(void)
 {
-	unsigned long flags;
-
 	/* The patching is not fully atomic, so try to avoid local interruptions
 	   that might execute the to be patched code.
 	   Other CPUs are not running. */
@@ -430,7 +428,6 @@ void __init alternative_instructions(void)
 	stop_mce();
 #endif
 
-	local_irq_save(flags);
 	apply_alternatives(__alt_instructions, __alt_instructions_end);
 
 	/* switch to patch-once-at-boottime-only mode and free the
@@ -462,7 +459,6 @@ void __init alternative_instructions(void)
 	}
 #endif
  	apply_paravirt(__parainstructions, __parainstructions_end);
-	local_irq_restore(flags);
 
 	if (smp_alt_once)
 		free_init_pages("SMP alternatives",
@@ -475,18 +471,71 @@ void __init alternative_instructions(void)
 #endif
 }
 
-/*
- * Warning:
+/**
+ * text_poke_early - Update instructions on a live kernel at boot time
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
  * When you use this code to patch more than one byte of an instruction
  * you need to make sure that other CPUs cannot execute this code in parallel.
- * Also no thread must be currently preempted in the middle of these instructions.
- * And on the local CPU you need to be protected again NMI or MCE handlers
- * seeing an inconsistent instruction while you patch.
+ * Also no thread must be currently preempted in the middle of these
+ * instructions. And on the local CPU you need to be protected again NMI or MCE
+ * handlers seeing an inconsistent instruction while you patch.
  */
-void __kprobes text_poke(void *addr, unsigned char *opcode, int len)
+void *text_poke_early(void *addr, const void *opcode, size_t len)
 {
+	unsigned long flags;
+	local_irq_save(flags);
 	memcpy(addr, opcode, len);
+	local_irq_restore(flags);
+	sync_core();
+	/* Could also do a CLFLUSH here to speed up CPU recovery; but
+	   that causes hangs on some VIA CPUs. */
+	return addr;
+}
+
+/**
+ * text_poke - Update instructions on a live kernel
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
+ * Only atomic text poke/set should be allowed when not doing early patching.
+ * It means the size must be writable atomically and the address must be aligned
+ * in a way that permits an atomic write. It also makes sure we fit on a single
+ * page.
+ */
+void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
+{
+	unsigned long flags;
+	char *vaddr;
+	int nr_pages = 2;
+
+	BUG_ON(len > sizeof(long));
+	BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1))
+		- ((long)addr & ~(sizeof(long) - 1)));
+	if (kernel_text_address((unsigned long)addr)) {
+		struct page *pages[2] = { virt_to_page(addr),
+			virt_to_page(addr + PAGE_SIZE) };
+		if (!pages[1])
+			nr_pages = 1;
+		vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+		BUG_ON(!vaddr);
+		local_irq_save(flags);
+		memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
+		local_irq_restore(flags);
+		vunmap(vaddr);
+	} else {
+		/*
+		 * modules are in vmalloc'ed memory, always writable.
+		 */
+		local_irq_save(flags);
+		memcpy(addr, opcode, len);
+		local_irq_restore(flags);
+	}
 	sync_core();
 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
 	   that causes hangs on some VIA CPUs. */
+	return addr;
 }

+ 3 - 3
arch/x86/kernel/aperture_64.c

@@ -27,11 +27,11 @@
 #include <asm/k8.h>
 
 int gart_iommu_aperture;
-int gart_iommu_aperture_disabled __initdata = 0;
-int gart_iommu_aperture_allowed __initdata = 0;
+int gart_iommu_aperture_disabled __initdata;
+int gart_iommu_aperture_allowed __initdata;
 
 int fallback_aper_order __initdata = 1; /* 64MB */
-int fallback_aper_force __initdata = 0;
+int fallback_aper_force __initdata;
 
 int fix_aperture __initdata = 1;
 

+ 189 - 35
arch/x86/kernel/apic_32.c

@@ -50,6 +50,11 @@
 # error SPURIOUS_APIC_VECTOR definition error
 #endif
 
+unsigned long mp_lapic_addr;
+
+DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+
 /*
  * Knob to control our willingness to enable the local APIC.
  *
@@ -620,6 +625,35 @@ int setup_profiling_timer(unsigned int multiplier)
 	return -EINVAL;
 }
 
+/*
+ * Setup extended LVT, AMD specific (K8, family 10h)
+ *
+ * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
+ * MCE interrupts are supported. Thus MCE offset must be set to 0.
+ */
+
+#define APIC_EILVT_LVTOFF_MCE 0
+#define APIC_EILVT_LVTOFF_IBS 1
+
+static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
+{
+	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
+	apic_write(reg, v);
+}
+
+u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
+{
+	setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
+	return APIC_EILVT_LVTOFF_MCE;
+}
+
+u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
+{
+	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
+	return APIC_EILVT_LVTOFF_IBS;
+}
+
 /*
  * Local APIC start and shutdown
  */
@@ -868,12 +902,50 @@ void __init init_bsp_APIC(void)
 	apic_write_around(APIC_LVT1, value);
 }
 
+void __cpuinit lapic_setup_esr(void)
+{
+	unsigned long oldvalue, value, maxlvt;
+	if (lapic_is_integrated() && !esr_disable) {
+		/* !82489DX */
+		maxlvt = lapic_get_maxlvt();
+		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+			apic_write(APIC_ESR, 0);
+		oldvalue = apic_read(APIC_ESR);
+
+		/* enables sending errors */
+		value = ERROR_APIC_VECTOR;
+		apic_write_around(APIC_LVTERR, value);
+		/*
+		 * spec says clear errors after enabling vector.
+		 */
+		if (maxlvt > 3)
+			apic_write(APIC_ESR, 0);
+		value = apic_read(APIC_ESR);
+		if (value != oldvalue)
+			apic_printk(APIC_VERBOSE, "ESR value before enabling "
+				"vector: 0x%08lx  after: 0x%08lx\n",
+				oldvalue, value);
+	} else {
+		if (esr_disable)
+			/*
+			 * Something untraceable is creating bad interrupts on
+			 * secondary quads ... for the moment, just leave the
+			 * ESR disabled - we can't do anything useful with the
+			 * errors anyway - mbligh
+			 */
+			printk(KERN_INFO "Leaving ESR disabled.\n");
+		else
+			printk(KERN_INFO "No ESR for 82489DX.\n");
+	}
+}
+
+
 /**
  * setup_local_APIC - setup the local APIC
  */
 void __cpuinit setup_local_APIC(void)
 {
-	unsigned long oldvalue, value, maxlvt, integrated;
+	unsigned long value, integrated;
 	int i, j;
 
 	/* Pound the ESR really hard over the head with a big hammer - mbligh */
@@ -997,40 +1069,13 @@ void __cpuinit setup_local_APIC(void)
 	if (!integrated)		/* 82489DX */
 		value |= APIC_LVT_LEVEL_TRIGGER;
 	apic_write_around(APIC_LVT1, value);
+}
 
-	if (integrated && !esr_disable) {
-		/* !82489DX */
-		maxlvt = lapic_get_maxlvt();
-		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-		oldvalue = apic_read(APIC_ESR);
-
-		/* enables sending errors */
-		value = ERROR_APIC_VECTOR;
-		apic_write_around(APIC_LVTERR, value);
-		/*
-		 * spec says clear errors after enabling vector.
-		 */
-		if (maxlvt > 3)
-			apic_write(APIC_ESR, 0);
-		value = apic_read(APIC_ESR);
-		if (value != oldvalue)
-			apic_printk(APIC_VERBOSE, "ESR value before enabling "
-				"vector: 0x%08lx  after: 0x%08lx\n",
-				oldvalue, value);
-	} else {
-		if (esr_disable)
-			/*
-			 * Something untraceable is creating bad interrupts on
-			 * secondary quads ... for the moment, just leave the
-			 * ESR disabled - we can't do anything useful with the
-			 * errors anyway - mbligh
-			 */
-			printk(KERN_INFO "Leaving ESR disabled.\n");
-		else
-			printk(KERN_INFO "No ESR for 82489DX.\n");
-	}
+void __cpuinit end_local_APIC_setup(void)
+{
+	unsigned long value;
 
+	lapic_setup_esr();
 	/* Disable the local apic timer */
 	value = apic_read(APIC_LVTT);
 	value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
@@ -1147,7 +1192,7 @@ void __init init_apic_mappings(void)
 	 * default configuration (or the MP table is broken).
 	 */
 	if (boot_cpu_physical_apicid == -1U)
-		boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+		boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 
 #ifdef CONFIG_X86_IO_APIC
 	{
@@ -1185,6 +1230,9 @@ fake_ioapic_page:
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
+
+int apic_version[MAX_APICS];
+
 int __init APIC_init_uniprocessor(void)
 {
 	if (enable_local_apic < 0)
@@ -1214,12 +1262,13 @@ int __init APIC_init_uniprocessor(void)
 	 * might be zero if read from MP tables. Get it from LAPIC.
 	 */
 #ifdef CONFIG_CRASH_DUMP
-	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 #endif
 	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
 
 	setup_local_APIC();
 
+	end_local_APIC_setup();
 #ifdef CONFIG_X86_IO_APIC
 	if (smp_found_config)
 		if (!skip_ioapic_setup && nr_ioapics)
@@ -1288,6 +1337,29 @@ void smp_error_interrupt(struct pt_regs *regs)
 	irq_exit();
 }
 
+#ifdef CONFIG_SMP
+void __init smp_intr_init(void)
+{
+	/*
+	 * IRQ0 must be given a fixed assignment and initialized,
+	 * because it's used before the IO-APIC is set up.
+	 */
+	set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+
+	/*
+	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+	 * IPI, driven by wakeup.
+	 */
+	set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+
+	/* IPI for invalidation */
+	set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+	/* IPI for generic function call */
+	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+}
+#endif
+
 /*
  * Initialize APIC interrupts
  */
@@ -1394,6 +1466,88 @@ void disconnect_bsp_APIC(int virt_wire_setup)
 	}
 }
 
+unsigned int __cpuinitdata maxcpus = NR_CPUS;
+
+void __cpuinit generic_processor_info(int apicid, int version)
+{
+	int cpu;
+	cpumask_t tmp_map;
+	physid_mask_t phys_cpu;
+
+	/*
+	 * Validate version
+	 */
+	if (version == 0x0) {
+		printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
+				"fixing up to 0x10. (tell your hw vendor)\n",
+				version);
+		version = 0x10;
+	}
+	apic_version[apicid] = version;
+
+	phys_cpu = apicid_to_cpu_present(apicid);
+	physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
+
+	if (num_processors >= NR_CPUS) {
+		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+			"  Processor ignored.\n", NR_CPUS);
+		return;
+	}
+
+	if (num_processors >= maxcpus) {
+		printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+			" Processor ignored.\n", maxcpus);
+		return;
+	}
+
+	num_processors++;
+	cpus_complement(tmp_map, cpu_present_map);
+	cpu = first_cpu(tmp_map);
+
+	if (apicid == boot_cpu_physical_apicid)
+		/*
+		 * x86_bios_cpu_apicid is required to have processors listed
+		 * in same order as logical cpu numbers. Hence the first
+		 * entry is BSP, and so on.
+		 */
+		cpu = 0;
+
+	/*
+	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+	 * but we need to work other dependencies like SMP_SUSPEND etc
+	 * before this can be done without some confusion.
+	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+	 *       - Ashok Raj <ashok.raj@intel.com>
+	 */
+	if (num_processors > 8) {
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_INTEL:
+			if (!APIC_XAPIC(version)) {
+				def_to_bigsmp = 0;
+				break;
+			}
+			/* If P4 and above fall through */
+		case X86_VENDOR_AMD:
+			def_to_bigsmp = 1;
+		}
+	}
+#ifdef CONFIG_SMP
+	/* are we being called early in kernel startup? */
+	if (x86_cpu_to_apicid_early_ptr) {
+		u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
+		u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+
+		cpu_to_apicid[cpu] = apicid;
+		bios_cpu_apicid[cpu] = apicid;
+	} else {
+		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+		per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
+	}
+#endif
+	cpu_set(cpu, cpu_possible_map);
+	cpu_set(cpu, cpu_present_map);
+}
+
 /*
  * Power management
  */

+ 125 - 15
arch/x86/kernel/apic_64.c

@@ -34,13 +34,15 @@
 #include <asm/mpspec.h>
 #include <asm/hpet.h>
 #include <asm/pgalloc.h>
-#include <asm/mach_apic.h>
 #include <asm/nmi.h>
 #include <asm/idle.h>
 #include <asm/proto.h>
 #include <asm/timex.h>
 #include <asm/apic.h>
 
+#include <mach_ipi.h>
+#include <mach_apic.h>
+
 int disable_apic_timer __cpuinitdata;
 static int apic_calibrate_pmtmr __initdata;
 int disable_apic;
@@ -83,6 +85,12 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
 static unsigned long apic_phys;
 
+unsigned long mp_lapic_addr;
+
+DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+
+unsigned int __cpuinitdata maxcpus = NR_CPUS;
 /*
  * Get the LAPIC version
  */
@@ -431,7 +439,8 @@ void __cpuinit check_boot_apic_timer_broadcast(void)
 	lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
 
 	local_irq_enable();
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id);
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
+			   &boot_cpu_physical_apicid);
 	local_irq_disable();
 }
 
@@ -640,10 +649,10 @@ int __init verify_local_APIC(void)
 	/*
 	 * The ID register is read/write in a real APIC.
 	 */
-	reg0 = apic_read(APIC_ID);
+	reg0 = read_apic_id();
 	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
 	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-	reg1 = apic_read(APIC_ID);
+	reg1 = read_apic_id();
 	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
 	apic_write(APIC_ID, reg0);
 	if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -728,6 +737,7 @@ void __cpuinit setup_local_APIC(void)
 	unsigned int value;
 	int i, j;
 
+	preempt_disable();
 	value = apic_read(APIC_LVR);
 
 	BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
@@ -821,6 +831,7 @@ void __cpuinit setup_local_APIC(void)
 	else
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
 	apic_write(APIC_LVT1, value);
+	preempt_enable();
 }
 
 void __cpuinit lapic_setup_esr(void)
@@ -857,10 +868,34 @@ static int __init detect_init_APIC(void)
 	}
 
 	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-	boot_cpu_id = 0;
+	boot_cpu_physical_apicid = 0;
 	return 0;
 }
 
+void __init early_init_lapic_mapping(void)
+{
+	unsigned long apic_phys;
+
+	/*
+	 * If no local APIC can be found then go out
+	 * : it means there is no mpatable and MADT
+	 */
+	if (!smp_found_config)
+		return;
+
+	apic_phys = mp_lapic_addr;
+
+	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+				 APIC_BASE, apic_phys);
+
+	/*
+	 * Fetch the APIC ID of the BSP in case we have a
+	 * default configuration (or the MP table is broken).
+	 */
+	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+}
+
 /**
  * init_apic_mappings - initialize APIC mappings
  */
@@ -881,16 +916,11 @@ void __init init_apic_mappings(void)
 	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
 				APIC_BASE, apic_phys);
 
-	/* Put local APIC into the resource map. */
-	lapic_resource.start = apic_phys;
-	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
-	insert_resource(&iomem_resource, &lapic_resource);
-
 	/*
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 */
-	boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 }
 
 /*
@@ -911,8 +941,8 @@ int __init APIC_init_uniprocessor(void)
 
 	verify_local_APIC();
 
-	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
-	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
+	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
 
 	setup_local_APIC();
 
@@ -1029,6 +1059,52 @@ void disconnect_bsp_APIC(int virt_wire_setup)
 	apic_write(APIC_LVT1, value);
 }
 
+void __cpuinit generic_processor_info(int apicid, int version)
+{
+	int cpu;
+	cpumask_t tmp_map;
+
+	if (num_processors >= NR_CPUS) {
+		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+		       " Processor ignored.\n", NR_CPUS);
+		return;
+	}
+
+	if (num_processors >= maxcpus) {
+		printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+		       " Processor ignored.\n", maxcpus);
+		return;
+	}
+
+	num_processors++;
+	cpus_complement(tmp_map, cpu_present_map);
+	cpu = first_cpu(tmp_map);
+
+	physid_set(apicid, phys_cpu_present_map);
+	if (apicid == boot_cpu_physical_apicid) {
+		/*
+		 * x86_bios_cpu_apicid is required to have processors listed
+		 * in same order as logical cpu numbers. Hence the first
+		 * entry is BSP, and so on.
+		 */
+		cpu = 0;
+	}
+	/* are we being called early in kernel startup? */
+	if (x86_cpu_to_apicid_early_ptr) {
+		u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
+		u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+
+		cpu_to_apicid[cpu] = apicid;
+		bios_cpu_apicid[cpu] = apicid;
+	} else {
+		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+		per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
+	}
+
+	cpu_set(cpu, cpu_possible_map);
+	cpu_set(cpu, cpu_present_map);
+}
+
 /*
  * Power management
  */
@@ -1065,7 +1141,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 
 	maxlvt = lapic_get_maxlvt();
 
-	apic_pm_state.apic_id = apic_read(APIC_ID);
+	apic_pm_state.apic_id = read_apic_id();
 	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
 	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
 	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1180,9 +1256,19 @@ __cpuinit int apic_is_clustered_box(void)
 {
 	int i, clusters, zeros;
 	unsigned id;
-	u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+	u16 *bios_cpu_apicid;
 	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
 
+	/*
+	 * there is not this kind of box with AMD CPU yet.
+	 * Some AMD box with quadcore cpu and 8 sockets apicid
+	 * will be [4, 0x23] or [8, 0x27] could be thought to
+	 * vsmp box still need checking...
+	 */
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
+		return 0;
+
+	bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
 	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
 	for (i = 0; i < NR_CPUS; i++) {
@@ -1219,6 +1305,12 @@ __cpuinit int apic_is_clustered_box(void)
 			++zeros;
 	}
 
+	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+	 * not guaranteed to be synced between boards
+	 */
+	if (is_vsmp_box() && clusters > 1)
+		return 1;
+
 	/*
 	 * If clusters > 2, then should be multi-chassis.
 	 * May have to revisit this when multi-core + hyperthreaded CPUs come
@@ -1290,3 +1382,21 @@ static __init int setup_apicpmtimer(char *s)
 }
 __setup("apicpmtimer", setup_apicpmtimer);
 
+static int __init lapic_insert_resource(void)
+{
+	if (!apic_phys)
+		return -1;
+
+	/* Put local APIC into the resource map. */
+	lapic_resource.start = apic_phys;
+	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+	insert_resource(&iomem_resource, &lapic_resource);
+
+	return 0;
+}
+
+/*
+ * need call insert after e820_reserve_resources()
+ * that is using request_resource
+ */
+late_initcall(lapic_insert_resource);

+ 1 - 4
arch/x86/kernel/apm_32.c

@@ -2217,7 +2217,6 @@ static struct dmi_system_id __initdata apm_dmi_table[] = {
  */
 static int __init apm_init(void)
 {
-	struct proc_dir_entry *apm_proc;
 	struct desc_struct *gdt;
 	int err;
 
@@ -2322,9 +2321,7 @@ static int __init apm_init(void)
 	set_base(gdt[APM_DS >> 3],
 		 __va((unsigned long)apm_info.bios.dseg << 4));
 
-	apm_proc = create_proc_entry("apm", 0, NULL);
-	if (apm_proc)
-		apm_proc->proc_fops = &apm_file_ops;
+	proc_create("apm", 0, NULL, &apm_file_ops);
 
 	kapmd_task = kthread_create(apm, NULL, "kapmd");
 	if (IS_ERR(kapmd_task)) {

+ 1 - 1
arch/x86/kernel/asm-offsets_32.c

@@ -10,7 +10,7 @@
 #include <linux/personality.h>
 #include <linux/suspend.h>
 #include <asm/ucontext.h>
-#include "sigframe_32.h"
+#include "sigframe.h"
 #include <asm/pgtable.h>
 #include <asm/fixmap.h>
 #include <asm/processor.h>

+ 13 - 1
arch/x86/kernel/bugs_64.c

@@ -9,13 +9,25 @@
 #include <asm/bugs.h>
 #include <asm/processor.h>
 #include <asm/mtrr.h>
+#include <asm/cacheflush.h>
 
 void __init check_bugs(void)
 {
-	identify_cpu(&boot_cpu_data);
+	identify_boot_cpu();
 #if !defined(CONFIG_SMP)
 	printk("CPU: ");
 	print_cpu_info(&boot_cpu_data);
 #endif
 	alternative_instructions();
+
+	/*
+	 * Make sure the first 2MB area is not mapped by huge pages
+	 * There are typically fixed size MTRRs in there and overlapping
+	 * MTRRs into large pages causes slow downs.
+	 *
+	 * Right now we don't do that with gbpages because there seems
+	 * very little benefit for that case.
+	 */
+	if (!direct_gbpages)
+		set_memory_4k((unsigned long)__va(0), 1);
 }

+ 2 - 2
arch/x86/kernel/cpu/Makefile

@@ -3,9 +3,9 @@
 #
 
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
-obj-y			+= feature_names.o
+obj-y			+= proc.o feature_names.o
 
-obj-$(CONFIG_X86_32)	+= common.o proc.o bugs.o
+obj-$(CONFIG_X86_32)	+= common.o bugs.o
 obj-$(CONFIG_X86_32)	+= amd.o
 obj-$(CONFIG_X86_32)	+= cyrix.o
 obj-$(CONFIG_X86_32)	+= centaur.o

+ 63 - 58
arch/x86/kernel/cpu/amd.c

@@ -4,8 +4,8 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
-#include <asm/mach_apic.h>
 
+#include <mach_apic.h>
 #include "cpu.h"
 
 /*
@@ -20,7 +20,7 @@
  *	the chip setting when fixing the bug but they also tweaked some
  *	performance at the same time..
  */
- 
+
 extern void vide(void);
 __asm__(".align 4\nvide: ret");
 
@@ -63,12 +63,12 @@ static __cpuinit int amd_apic_timer_broken(void)
 
 int force_mwait __cpuinitdata;
 
-void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 {
 	if (cpuid_eax(0x80000000) >= 0x80000007) {
 		c->x86_power = cpuid_edx(0x80000007);
 		if (c->x86_power & (1<<8))
-			set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 	}
 }
 
@@ -81,7 +81,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 #ifdef CONFIG_SMP
 	unsigned long long value;
 
-	/* Disable TLB flush filter by setting HWCR.FFDIS on K8
+	/*
+	 * Disable TLB flush filter by setting HWCR.FFDIS on K8
 	 * bit 6 of msr C001_0015
 	 *
 	 * Errata 63 for SH-B3 steppings
@@ -102,15 +103,16 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	 *	no bus pipeline)
 	 */
 
-	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
-	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-	clear_bit(0*32+31, c->x86_capability);
-	
+	/*
+	 * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
+	 */
+	clear_cpu_cap(c, 0*32+31);
+
 	r = get_model_name(c);
 
-	switch(c->x86)
-	{
-		case 4:
+	switch (c->x86) {
+	case 4:
 		/*
 		 * General Systems BIOSen alias the cpu frequency registers
 		 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
@@ -120,61 +122,60 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 #define CBAR		(0xfffc) /* Configuration Base Address  (32-bit) */
 #define CBAR_ENB	(0x80000000)
 #define CBAR_KEY	(0X000000CB)
-			if (c->x86_model==9 || c->x86_model == 10) {
+			if (c->x86_model == 9 || c->x86_model == 10) {
 				if (inl (CBAR) & CBAR_ENB)
 					outl (0 | CBAR_KEY, CBAR);
 			}
 			break;
-		case 5:
-			if( c->x86_model < 6 )
-			{
+	case 5:
+			if (c->x86_model < 6) {
 				/* Based on AMD doc 20734R - June 2000 */
-				if ( c->x86_model == 0 ) {
-					clear_bit(X86_FEATURE_APIC, c->x86_capability);
-					set_bit(X86_FEATURE_PGE, c->x86_capability);
+				if (c->x86_model == 0) {
+					clear_cpu_cap(c, X86_FEATURE_APIC);
+					set_cpu_cap(c, X86_FEATURE_PGE);
 				}
 				break;
 			}
-			
-			if ( c->x86_model == 6 && c->x86_mask == 1 ) {
+
+			if (c->x86_model == 6 && c->x86_mask == 1) {
 				const int K6_BUG_LOOP = 1000000;
 				int n;
 				void (*f_vide)(void);
 				unsigned long d, d2;
-				
+
 				printk(KERN_INFO "AMD K6 stepping B detected - ");
-				
+
 				/*
-				 * It looks like AMD fixed the 2.6.2 bug and improved indirect 
+				 * It looks like AMD fixed the 2.6.2 bug and improved indirect
 				 * calls at the same time.
 				 */
 
 				n = K6_BUG_LOOP;
 				f_vide = vide;
 				rdtscl(d);
-				while (n--) 
+				while (n--)
 					f_vide();
 				rdtscl(d2);
 				d = d2-d;
 
-				if (d > 20*K6_BUG_LOOP) 
+				if (d > 20*K6_BUG_LOOP)
 					printk("system stability may be impaired when more than 32 MB are used.\n");
-				else 
+				else
 					printk("probably OK (after B9730xxxx).\n");
 				printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
 			}
 
 			/* K6 with old style WHCR */
 			if (c->x86_model < 8 ||
-			   (c->x86_model== 8 && c->x86_mask < 8)) {
+			   (c->x86_model == 8 && c->x86_mask < 8)) {
 				/* We can only write allocate on the low 508Mb */
-				if(mbytes>508)
-					mbytes=508;
+				if (mbytes > 508)
+					mbytes = 508;
 
 				rdmsr(MSR_K6_WHCR, l, h);
-				if ((l&0x0000FFFF)==0) {
+				if ((l&0x0000FFFF) == 0) {
 					unsigned long flags;
-					l=(1<<0)|((mbytes/4)<<1);
+					l = (1<<0)|((mbytes/4)<<1);
 					local_irq_save(flags);
 					wbinvd();
 					wrmsr(MSR_K6_WHCR, l, h);
@@ -185,17 +186,17 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 				break;
 			}
 
-			if ((c->x86_model == 8 && c->x86_mask >7) ||
+			if ((c->x86_model == 8 && c->x86_mask > 7) ||
 			     c->x86_model == 9 || c->x86_model == 13) {
 				/* The more serious chips .. */
 
-				if(mbytes>4092)
-					mbytes=4092;
+				if (mbytes > 4092)
+					mbytes = 4092;
 
 				rdmsr(MSR_K6_WHCR, l, h);
-				if ((l&0xFFFF0000)==0) {
+				if ((l&0xFFFF0000) == 0) {
 					unsigned long flags;
-					l=((mbytes>>2)<<22)|(1<<16);
+					l = ((mbytes>>2)<<22)|(1<<16);
 					local_irq_save(flags);
 					wbinvd();
 					wrmsr(MSR_K6_WHCR, l, h);
@@ -207,7 +208,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 				/*  Set MTRR capability flag if appropriate */
 				if (c->x86_model == 13 || c->x86_model == 9 ||
 				   (c->x86_model == 8 && c->x86_mask >= 8))
-					set_bit(X86_FEATURE_K6_MTRR, c->x86_capability);
+					set_cpu_cap(c, X86_FEATURE_K6_MTRR);
 				break;
 			}
 
@@ -217,10 +218,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 				break;
 			}
 			break;
-		case 6: /* An Athlon/Duron */
- 
-			/* Bit 15 of Athlon specific MSR 15, needs to be 0
- 			 * to enable SSE on Palomino/Morgan/Barton CPU's.
+	case 6: /* An Athlon/Duron */
+
+			/*
+			 * Bit 15 of Athlon specific MSR 15, needs to be 0
+			 * to enable SSE on Palomino/Morgan/Barton CPU's.
 			 * If the BIOS didn't enable it already, enable it here.
 			 */
 			if (c->x86_model >= 6 && c->x86_model <= 10) {
@@ -229,15 +231,16 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 					rdmsr(MSR_K7_HWCR, l, h);
 					l &= ~0x00008000;
 					wrmsr(MSR_K7_HWCR, l, h);
-					set_bit(X86_FEATURE_XMM, c->x86_capability);
+					set_cpu_cap(c, X86_FEATURE_XMM);
 				}
 			}
 
-			/* It's been determined by AMD that Athlons since model 8 stepping 1
+			/*
+			 * It's been determined by AMD that Athlons since model 8 stepping 1
 			 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
 			 * As per AMD technical note 27212 0.2
 			 */
-			if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) {
+			if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
 				rdmsr(MSR_K7_CLK_CTL, l, h);
 				if ((l & 0xfff00000) != 0x20000000) {
 					printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
@@ -253,20 +256,19 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	/* Use K8 tuning for Fam10h and Fam11h */
 	case 0x10:
 	case 0x11:
-		set_bit(X86_FEATURE_K8, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_K8);
 		break;
 	case 6:
-		set_bit(X86_FEATURE_K7, c->x86_capability); 
+		set_cpu_cap(c, X86_FEATURE_K7);
 		break;
 	}
 	if (c->x86 >= 6)
-		set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
 
 	display_cacheinfo(c);
 
-	if (cpuid_eax(0x80000000) >= 0x80000008) {
+	if (cpuid_eax(0x80000000) >= 0x80000008)
 		c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-	}
 
 #ifdef CONFIG_X86_HT
 	/*
@@ -302,20 +304,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	/* K6s reports MCEs but don't actually have all the MSRs */
 	if (c->x86 < 6)
-		clear_bit(X86_FEATURE_MCE, c->x86_capability);
+		clear_cpu_cap(c, X86_FEATURE_MCE);
 
 	if (cpu_has_xmm2)
-		set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
 }
 
-static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 {
 	/* AMD errata T13 (order #21922) */
 	if ((c->x86 == 6)) {
 		if (c->x86_model == 3 && c->x86_mask == 0)	/* Duron Rev A0 */
 			size = 64;
 		if (c->x86_model == 4 &&
-		    (c->x86_mask==0 || c->x86_mask==1))	/* Tbird rev A1/A2 */
+		    (c->x86_mask == 0 || c->x86_mask == 1))	/* Tbird rev A1/A2 */
 			size = 256;
 	}
 	return size;
@@ -323,19 +325,20 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned in
 
 static struct cpu_dev amd_cpu_dev __cpuinitdata = {
 	.c_vendor	= "AMD",
-	.c_ident 	= { "AuthenticAMD" },
+	.c_ident	= { "AuthenticAMD" },
 	.c_models = {
 		{ .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
 		  {
 			  [3] = "486 DX/2",
 			  [7] = "486 DX/2-WB",
-			  [8] = "486 DX/4", 
-			  [9] = "486 DX/4-WB", 
+			  [8] = "486 DX/4",
+			  [9] = "486 DX/4-WB",
 			  [14] = "Am5x86-WT",
-			  [15] = "Am5x86-WB" 
+			  [15] = "Am5x86-WB"
 		  }
 		},
 	},
+	.c_early_init   = early_init_amd,
 	.c_init		= init_amd,
 	.c_size_cache	= amd_size_cache,
 };
@@ -345,3 +348,5 @@ int __init amd_init_cpu(void)
 	cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
 	return 0;
 }
+
+cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);

+ 244 - 246
arch/x86/kernel/cpu/centaur.c

@@ -1,31 +1,34 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/bitops.h>
+
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
+
 #include "cpu.h"
 
 #ifdef CONFIG_X86_OOSTORE
 
 static u32 __cpuinit power2(u32 x)
 {
-	u32 s=1;
-	while(s<=x)
-		s<<=1;
-	return s>>=1;
+	u32 s = 1;
+
+	while (s <= x)
+		s <<= 1;
+
+	return s >>= 1;
 }
 
 
 /*
- *	Set up an actual MCR
+ * Set up an actual MCR
  */
- 
 static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
 {
 	u32 lo, hi;
-	
+
 	hi = base & ~0xFFF;
 	lo = ~(size-1);		/* Size is a power of 2 so this makes a mask */
 	lo &= ~0xFFF;		/* Remove the ctrl value bits */
@@ -35,30 +38,28 @@ static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
 }
 
 /*
- *	Figure what we can cover with MCR's
+ * Figure what we can cover with MCR's
  *
- *	Shortcut: We know you can't put 4Gig of RAM on a winchip
+ * Shortcut: We know you can't put 4Gig of RAM on a winchip
  */
-
-static u32 __cpuinit ramtop(void)		/* 16388 */
+static u32 __cpuinit ramtop(void)
 {
-	int i;
-	u32 top = 0;
 	u32 clip = 0xFFFFFFFFUL;
-	
+	u32 top = 0;
+	int i;
+
 	for (i = 0; i < e820.nr_map; i++) {
 		unsigned long start, end;
 
 		if (e820.map[i].addr > 0xFFFFFFFFUL)
 			continue;
 		/*
-		 *	Don't MCR over reserved space. Ignore the ISA hole
-		 *	we frob around that catastrophe already
+		 * Don't MCR over reserved space. Ignore the ISA hole
+		 * we frob around that catastrophe already
 		 */
-		 			
-		if (e820.map[i].type == E820_RESERVED)
-		{
-			if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip)
+		if (e820.map[i].type == E820_RESERVED) {
+			if (e820.map[i].addr >= 0x100000UL &&
+			    e820.map[i].addr < clip)
 				clip = e820.map[i].addr;
 			continue;
 		}
@@ -69,28 +70,27 @@ static u32 __cpuinit ramtop(void)		/* 16388 */
 		if (end > top)
 			top = end;
 	}
-	/* Everything below 'top' should be RAM except for the ISA hole.
-	   Because of the limited MCR's we want to map NV/ACPI into our
-	   MCR range for gunk in RAM 
-	   
-	   Clip might cause us to MCR insufficient RAM but that is an
-	   acceptable failure mode and should only bite obscure boxes with
-	   a VESA hole at 15Mb
-	   
-	   The second case Clip sometimes kicks in is when the EBDA is marked
-	   as reserved. Again we fail safe with reasonable results
-	*/
-	
-	if(top>clip)
-		top=clip;
-		
+	/*
+	 * Everything below 'top' should be RAM except for the ISA hole.
+	 * Because of the limited MCR's we want to map NV/ACPI into our
+	 * MCR range for gunk in RAM
+	 *
+	 * Clip might cause us to MCR insufficient RAM but that is an
+	 * acceptable failure mode and should only bite obscure boxes with
+	 * a VESA hole at 15Mb
+	 *
+	 * The second case Clip sometimes kicks in is when the EBDA is marked
+	 * as reserved. Again we fail safe with reasonable results
+	 */
+	if (top > clip)
+		top = clip;
+
 	return top;
 }
 
 /*
- *	Compute a set of MCR's to give maximum coverage
+ * Compute a set of MCR's to give maximum coverage
  */
-
 static int __cpuinit centaur_mcr_compute(int nr, int key)
 {
 	u32 mem = ramtop();
@@ -99,141 +99,131 @@ static int __cpuinit centaur_mcr_compute(int nr, int key)
 	u32 top = root;
 	u32 floor = 0;
 	int ct = 0;
-	
-	while(ct<nr)
-	{
+
+	while (ct < nr) {
 		u32 fspace = 0;
+		u32 high;
+		u32 low;
 
 		/*
-		 *	Find the largest block we will fill going upwards
+		 * Find the largest block we will fill going upwards
 		 */
-
-		u32 high = power2(mem-top);	
+		high = power2(mem-top);
 
 		/*
-		 *	Find the largest block we will fill going downwards
+		 * Find the largest block we will fill going downwards
 		 */
-
-		u32 low = base/2;
+		low = base/2;
 
 		/*
-		 *	Don't fill below 1Mb going downwards as there
-		 *	is an ISA hole in the way.
-		 */		
-		 
-		if(base <= 1024*1024)
+		 * Don't fill below 1Mb going downwards as there
+		 * is an ISA hole in the way.
+		 */
+		if (base <= 1024*1024)
 			low = 0;
-			
+
 		/*
-		 *	See how much space we could cover by filling below
-		 *	the ISA hole
+		 * See how much space we could cover by filling below
+		 * the ISA hole
 		 */
-		 
-		if(floor == 0)
+
+		if (floor == 0)
 			fspace = 512*1024;
-		else if(floor ==512*1024)
+		else if (floor == 512*1024)
 			fspace = 128*1024;
 
 		/* And forget ROM space */
-		
+
 		/*
-		 *	Now install the largest coverage we get
+		 * Now install the largest coverage we get
 		 */
-		 
-		if(fspace > high && fspace > low)
-		{
+		if (fspace > high && fspace > low) {
 			centaur_mcr_insert(ct, floor, fspace, key);
 			floor += fspace;
-		}
-		else if(high > low)
-		{
+		} else if (high > low) {
 			centaur_mcr_insert(ct, top, high, key);
 			top += high;
-		}
-		else if(low > 0)
-		{
+		} else if (low > 0) {
 			base -= low;
 			centaur_mcr_insert(ct, base, low, key);
-		}
-		else break;
+		} else
+			break;
 		ct++;
 	}
 	/*
-	 *	We loaded ct values. We now need to set the mask. The caller
-	 *	must do this bit.
+	 * We loaded ct values. We now need to set the mask. The caller
+	 * must do this bit.
 	 */
-	 
 	return ct;
 }
 
 static void __cpuinit centaur_create_optimal_mcr(void)
 {
+	int used;
 	int i;
+
 	/*
-	 *	Allocate up to 6 mcrs to mark as much of ram as possible
-	 *	as write combining and weak write ordered.
+	 * Allocate up to 6 mcrs to mark as much of ram as possible
+	 * as write combining and weak write ordered.
 	 *
-	 *	To experiment with: Linux never uses stack operations for 
-	 *	mmio spaces so we could globally enable stack operation wc
+	 * To experiment with: Linux never uses stack operations for
+	 * mmio spaces so we could globally enable stack operation wc
 	 *
-	 *	Load the registers with type 31 - full write combining, all
-	 *	writes weakly ordered.
+	 * Load the registers with type 31 - full write combining, all
+	 * writes weakly ordered.
 	 */
-	int used = centaur_mcr_compute(6, 31);
+	used = centaur_mcr_compute(6, 31);
 
 	/*
-	 *	Wipe unused MCRs
+	 * Wipe unused MCRs
 	 */
-	 
-	for(i=used;i<8;i++)
+	for (i = used; i < 8; i++)
 		wrmsr(MSR_IDT_MCR0+i, 0, 0);
 }
 
 static void __cpuinit winchip2_create_optimal_mcr(void)
 {
 	u32 lo, hi;
+	int used;
 	int i;
 
 	/*
-	 *	Allocate up to 6 mcrs to mark as much of ram as possible
-	 *	as write combining, weak store ordered.
+	 * Allocate up to 6 mcrs to mark as much of ram as possible
+	 * as write combining, weak store ordered.
 	 *
-	 *	Load the registers with type 25
-	 *		8	-	weak write ordering
-	 *		16	-	weak read ordering
-	 *		1	-	write combining
+	 * Load the registers with type 25
+	 *	8	-	weak write ordering
+	 *	16	-	weak read ordering
+	 *	1	-	write combining
 	 */
+	used = centaur_mcr_compute(6, 25);
 
-	int used = centaur_mcr_compute(6, 25);
-	
 	/*
-	 *	Mark the registers we are using.
+	 * Mark the registers we are using.
 	 */
-	 
 	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-	for(i=0;i<used;i++)
-		lo|=1<<(9+i);
+	for (i = 0; i < used; i++)
+		lo |= 1<<(9+i);
 	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-	
+
 	/*
-	 *	Wipe unused MCRs
+	 * Wipe unused MCRs
 	 */
-	 
-	for(i=used;i<8;i++)
+
+	for (i = used; i < 8; i++)
 		wrmsr(MSR_IDT_MCR0+i, 0, 0);
 }
 
 /*
- *	Handle the MCR key on the Winchip 2.
+ * Handle the MCR key on the Winchip 2.
  */
-
 static void __cpuinit winchip2_unprotect_mcr(void)
 {
 	u32 lo, hi;
 	u32 key;
-	
+
 	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-	lo&=~0x1C0;	/* blank bits 8-6 */
+	lo &= ~0x1C0;	/* blank bits 8-6 */
 	key = (lo>>17) & 7;
 	lo |= key<<6;	/* replace with unlock key */
 	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
@@ -242,9 +232,9 @@ static void __cpuinit winchip2_unprotect_mcr(void)
 static void __cpuinit winchip2_protect_mcr(void)
 {
 	u32 lo, hi;
-	
+
 	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-	lo&=~0x1C0;	/* blank bits 8-6 */
+	lo &= ~0x1C0;	/* blank bits 8-6 */
 	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
 }
 #endif /* CONFIG_X86_OOSTORE */
@@ -267,17 +257,17 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
 
 		/* enable ACE unit, if present and disabled */
 		if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
-			rdmsr (MSR_VIA_FCR, lo, hi);
+			rdmsr(MSR_VIA_FCR, lo, hi);
 			lo |= ACE_FCR;		/* enable ACE unit */
-			wrmsr (MSR_VIA_FCR, lo, hi);
+			wrmsr(MSR_VIA_FCR, lo, hi);
 			printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
 		}
 
 		/* enable RNG unit, if present and disabled */
 		if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
-			rdmsr (MSR_VIA_RNG, lo, hi);
+			rdmsr(MSR_VIA_RNG, lo, hi);
 			lo |= RNG_ENABLE;	/* enable RNG unit */
-			wrmsr (MSR_VIA_RNG, lo, hi);
+			wrmsr(MSR_VIA_RNG, lo, hi);
 			printk(KERN_INFO "CPU: Enabled h/w RNG\n");
 		}
 
@@ -288,171 +278,183 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
 	}
 
 	/* Cyrix III family needs CX8 & PGE explicitly enabled. */
-	if (c->x86_model >=6 && c->x86_model <= 9) {
-		rdmsr (MSR_VIA_FCR, lo, hi);
+	if (c->x86_model >= 6 && c->x86_model <= 9) {
+		rdmsr(MSR_VIA_FCR, lo, hi);
 		lo |= (1<<1 | 1<<7);
-		wrmsr (MSR_VIA_FCR, lo, hi);
-		set_bit(X86_FEATURE_CX8, c->x86_capability);
+		wrmsr(MSR_VIA_FCR, lo, hi);
+		set_cpu_cap(c, X86_FEATURE_CX8);
 	}
 
 	/* Before Nehemiah, the C3's had 3dNOW! */
-	if (c->x86_model >=6 && c->x86_model <9)
-		set_bit(X86_FEATURE_3DNOW, c->x86_capability);
+	if (c->x86_model >= 6 && c->x86_model < 9)
+		set_cpu_cap(c, X86_FEATURE_3DNOW);
 
 	get_model_name(c);
 	display_cacheinfo(c);
 }
 
+enum {
+		ECX8		= 1<<1,
+		EIERRINT	= 1<<2,
+		DPM		= 1<<3,
+		DMCE		= 1<<4,
+		DSTPCLK		= 1<<5,
+		ELINEAR		= 1<<6,
+		DSMC		= 1<<7,
+		DTLOCK		= 1<<8,
+		EDCTLB		= 1<<8,
+		EMMX		= 1<<9,
+		DPDC		= 1<<11,
+		EBRPRED		= 1<<12,
+		DIC		= 1<<13,
+		DDC		= 1<<14,
+		DNA		= 1<<15,
+		ERETSTK		= 1<<16,
+		E2MMX		= 1<<19,
+		EAMD3D		= 1<<20,
+};
+
 static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
 {
-	enum {
-		ECX8=1<<1,
-		EIERRINT=1<<2,
-		DPM=1<<3,
-		DMCE=1<<4,
-		DSTPCLK=1<<5,
-		ELINEAR=1<<6,
-		DSMC=1<<7,
-		DTLOCK=1<<8,
-		EDCTLB=1<<8,
-		EMMX=1<<9,
-		DPDC=1<<11,
-		EBRPRED=1<<12,
-		DIC=1<<13,
-		DDC=1<<14,
-		DNA=1<<15,
-		ERETSTK=1<<16,
-		E2MMX=1<<19,
-		EAMD3D=1<<20,
-	};
 
 	char *name;
-	u32  fcr_set=0;
-	u32  fcr_clr=0;
-	u32  lo,hi,newlo;
-	u32  aa,bb,cc,dd;
+	u32  fcr_set = 0;
+	u32  fcr_clr = 0;
+	u32  lo, hi, newlo;
+	u32  aa, bb, cc, dd;
 
-	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
-	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-	clear_bit(0*32+31, c->x86_capability);
+	/*
+	 * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
+	 */
+	clear_cpu_cap(c, 0*32+31);
 
 	switch (c->x86) {
-
-		case 5:
-			switch(c->x86_model) {
-			case 4:
-				name="C6";
-				fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
-				fcr_clr=DPDC;
-				printk(KERN_NOTICE "Disabling bugged TSC.\n");
-				clear_bit(X86_FEATURE_TSC, c->x86_capability);
+	case 5:
+		switch (c->x86_model) {
+		case 4:
+			name = "C6";
+			fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
+			fcr_clr = DPDC;
+			printk(KERN_NOTICE "Disabling bugged TSC.\n");
+			clear_cpu_cap(c, X86_FEATURE_TSC);
 #ifdef CONFIG_X86_OOSTORE
-				centaur_create_optimal_mcr();
-				/* Enable
-					write combining on non-stack, non-string
-					write combining on string, all types
-					weak write ordering 
-					
-				   The C6 original lacks weak read order 
-				   
-				   Note 0x120 is write only on Winchip 1 */
-				   
-				wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
-#endif				
+			centaur_create_optimal_mcr();
+			/*
+			 * Enable:
+			 *	write combining on non-stack, non-string
+			 *	write combining on string, all types
+			 *	weak write ordering
+			 *
+			 * The C6 original lacks weak read order
+			 *
+			 * Note 0x120 is write only on Winchip 1
+			 */
+			wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
+#endif
+			break;
+		case 8:
+			switch (c->x86_mask) {
+			default:
+			name = "2";
+				break;
+			case 7 ... 9:
+				name = "2A";
 				break;
-			case 8:
-				switch(c->x86_mask) {
-				default:
-					name="2";
-					break;
-				case 7 ... 9:
-					name="2A";
-					break;
-				case 10 ... 15:
-					name="2B";
-					break;
-				}
-				fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
-				fcr_clr=DPDC;
+			case 10 ... 15:
+				name = "2B";
+				break;
+			}
+			fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
+				  E2MMX|EAMD3D;
+			fcr_clr = DPDC;
 #ifdef CONFIG_X86_OOSTORE
-				winchip2_unprotect_mcr();
-				winchip2_create_optimal_mcr();
-				rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-				/* Enable
-					write combining on non-stack, non-string
-					write combining on string, all types
-					weak write ordering 
-				*/
-				lo|=31;				
-				wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-				winchip2_protect_mcr();
+			winchip2_unprotect_mcr();
+			winchip2_create_optimal_mcr();
+			rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+			/*
+			 * Enable:
+			 *	write combining on non-stack, non-string
+			 *	write combining on string, all types
+			 *	weak write ordering
+			 */
+			lo |= 31;
+			wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+			winchip2_protect_mcr();
 #endif
-				break;
-			case 9:
-				name="3";
-				fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
-				fcr_clr=DPDC;
+			break;
+		case 9:
+			name = "3";
+			fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
+				  E2MMX|EAMD3D;
+			fcr_clr = DPDC;
 #ifdef CONFIG_X86_OOSTORE
-				winchip2_unprotect_mcr();
-				winchip2_create_optimal_mcr();
-				rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-				/* Enable
-					write combining on non-stack, non-string
-					write combining on string, all types
-					weak write ordering 
-				*/
-				lo|=31;				
-				wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-				winchip2_protect_mcr();
+			winchip2_unprotect_mcr();
+			winchip2_create_optimal_mcr();
+			rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+			/*
+			 * Enable:
+			 *	write combining on non-stack, non-string
+			 *	write combining on string, all types
+			 *	weak write ordering
+			 */
+			lo |= 31;
+			wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+			winchip2_protect_mcr();
 #endif
-				break;
-			default:
-				name="??";
-			}
+			break;
+		default:
+			name = "??";
+		}
 
-			rdmsr(MSR_IDT_FCR1, lo, hi);
-			newlo=(lo|fcr_set) & (~fcr_clr);
+		rdmsr(MSR_IDT_FCR1, lo, hi);
+		newlo = (lo|fcr_set) & (~fcr_clr);
 
-			if (newlo!=lo) {
-				printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo );
-				wrmsr(MSR_IDT_FCR1, newlo, hi );
-			} else {
-				printk(KERN_INFO "Centaur FCR is 0x%X\n",lo);
-			}
-			/* Emulate MTRRs using Centaur's MCR. */
-			set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability);
-			/* Report CX8 */
-			set_bit(X86_FEATURE_CX8, c->x86_capability);
-			/* Set 3DNow! on Winchip 2 and above. */
-			if (c->x86_model >=8)
-				set_bit(X86_FEATURE_3DNOW, c->x86_capability);
-			/* See if we can find out some more. */
-			if ( cpuid_eax(0x80000000) >= 0x80000005 ) {
-				/* Yes, we can. */
-				cpuid(0x80000005,&aa,&bb,&cc,&dd);
-				/* Add L1 data and code cache sizes. */
-				c->x86_cache_size = (cc>>24)+(dd>>24);
-			}
-			sprintf( c->x86_model_id, "WinChip %s", name );
-			break;
+		if (newlo != lo) {
+			printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n",
+				lo, newlo);
+			wrmsr(MSR_IDT_FCR1, newlo, hi);
+		} else {
+			printk(KERN_INFO "Centaur FCR is 0x%X\n", lo);
+		}
+		/* Emulate MTRRs using Centaur's MCR. */
+		set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
+		/* Report CX8 */
+		set_cpu_cap(c, X86_FEATURE_CX8);
+		/* Set 3DNow! on Winchip 2 and above. */
+		if (c->x86_model >= 8)
+			set_cpu_cap(c, X86_FEATURE_3DNOW);
+		/* See if we can find out some more. */
+		if (cpuid_eax(0x80000000) >= 0x80000005) {
+			/* Yes, we can. */
+			cpuid(0x80000005, &aa, &bb, &cc, &dd);
+			/* Add L1 data and code cache sizes. */
+			c->x86_cache_size = (cc>>24)+(dd>>24);
+		}
+		sprintf(c->x86_model_id, "WinChip %s", name);
+		break;
 
-		case 6:
-			init_c3(c);
-			break;
+	case 6:
+		init_c3(c);
+		break;
 	}
 }
 
-static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+static unsigned int __cpuinit
+centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 {
 	/* VIA C3 CPUs (670-68F) need further shifting. */
 	if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
 		size >>= 8;
 
-	/* VIA also screwed up Nehemiah stepping 1, and made
-	   it return '65KB' instead of '64KB'
-	   - Note, it seems this may only be in engineering samples. */
-	if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65))
-		size -=1;
+	/*
+	 * There's also an erratum in Nehemiah stepping 1, which
+	 * returns '65KB' instead of '64KB'
+	 *  - Note, it seems this may only be in engineering samples.
+	 */
+	if ((c->x86 == 6) && (c->x86_model == 9) &&
+				(c->x86_mask == 1) && (size == 65))
+		size -= 1;
 
 	return size;
 }
@@ -464,8 +466,4 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
 	.c_size_cache	= centaur_size_cache,
 };
 
-int __init centaur_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_CENTAUR] = &centaur_cpu_dev;
-	return 0;
-}
+cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev);

+ 99 - 81
arch/x86/kernel/cpu/common.c

@@ -62,9 +62,9 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
 static int cachesize_override __cpuinitdata = -1;
 static int disable_x86_serial_nr __cpuinitdata = 1;
 
-struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
+struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
 
-static void __cpuinit default_init(struct cpuinfo_x86 * c)
+static void __cpuinit default_init(struct cpuinfo_x86 *c)
 {
 	/* Not much we can do here... */
 	/* Check if at least it has cpuid */
@@ -81,11 +81,11 @@ static struct cpu_dev __cpuinitdata default_cpu = {
 	.c_init	= default_init,
 	.c_vendor = "Unknown",
 };
-static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
+static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
 
 static int __init cachesize_setup(char *str)
 {
-	get_option (&str, &cachesize_override);
+	get_option(&str, &cachesize_override);
 	return 1;
 }
 __setup("cachesize=", cachesize_setup);
@@ -107,12 +107,12 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c)
 	/* Intel chips right-justify this string for some dumb reason;
 	   undo that brain damage */
 	p = q = &c->x86_model_id[0];
-	while ( *p == ' ' )
+	while (*p == ' ')
 	     p++;
-	if ( p != q ) {
-	     while ( *p )
+	if (p != q) {
+	     while (*p)
 		  *q++ = *p++;
-	     while ( q <= &c->x86_model_id[48] )
+	     while (q <= &c->x86_model_id[48])
 		  *q++ = '\0';	/* Zero-pad the rest */
 	}
 
@@ -130,7 +130,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 		cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
 		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
 			edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
-		c->x86_cache_size=(ecx>>24)+(edx>>24);	
+		c->x86_cache_size = (ecx>>24)+(edx>>24);
 	}
 
 	if (n < 0x80000006)	/* Some chips just has a large L1. */
@@ -138,16 +138,16 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 
 	ecx = cpuid_ecx(0x80000006);
 	l2size = ecx >> 16;
-	
+
 	/* do processor-specific cache resizing */
 	if (this_cpu->c_size_cache)
-		l2size = this_cpu->c_size_cache(c,l2size);
+		l2size = this_cpu->c_size_cache(c, l2size);
 
 	/* Allow user to override all this if necessary. */
 	if (cachesize_override != -1)
 		l2size = cachesize_override;
 
-	if ( l2size == 0 )
+	if (l2size == 0)
 		return;		/* Again, no L2 cache is possible */
 
 	c->x86_cache_size = l2size;
@@ -156,16 +156,19 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 	       l2size, ecx & 0xFF);
 }
 
-/* Naming convention should be: <Name> [(<Codename>)] */
-/* This table only is used unless init_<vendor>() below doesn't set it; */
-/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
+/*
+ * Naming convention should be: <Name> [(<Codename>)]
+ * This table only is used unless init_<vendor>() below doesn't set it;
+ * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
+ *
+ */
 
 /* Look up CPU names by table lookup. */
 static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
 {
 	struct cpu_model_info *info;
 
-	if ( c->x86_model >= 16 )
+	if (c->x86_model >= 16)
 		return NULL;	/* Range check */
 
 	if (!this_cpu)
@@ -190,9 +193,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
 
 	for (i = 0; i < X86_VENDOR_NUM; i++) {
 		if (cpu_devs[i]) {
-			if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
-			    (cpu_devs[i]->c_ident[1] && 
-			     !strcmp(v,cpu_devs[i]->c_ident[1]))) {
+			if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
+			    (cpu_devs[i]->c_ident[1] &&
+			     !strcmp(v, cpu_devs[i]->c_ident[1]))) {
 				c->x86_vendor = i;
 				if (!early)
 					this_cpu = cpu_devs[i];
@@ -210,7 +213,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
 }
 
 
-static int __init x86_fxsr_setup(char * s)
+static int __init x86_fxsr_setup(char *s)
 {
 	setup_clear_cpu_cap(X86_FEATURE_FXSR);
 	setup_clear_cpu_cap(X86_FEATURE_XMM);
@@ -219,7 +222,7 @@ static int __init x86_fxsr_setup(char * s)
 __setup("nofxsr", x86_fxsr_setup);
 
 
-static int __init x86_sep_setup(char * s)
+static int __init x86_sep_setup(char *s)
 {
 	setup_clear_cpu_cap(X86_FEATURE_SEP);
 	return 1;
@@ -306,14 +309,30 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c)
 
 	}
 
-}
+	clear_cpu_cap(c, X86_FEATURE_PAT);
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		if (c->x86 >= 0xf && c->x86 <= 0x11)
+			set_cpu_cap(c, X86_FEATURE_PAT);
+		break;
+	case X86_VENDOR_INTEL:
+		if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
+			set_cpu_cap(c, X86_FEATURE_PAT);
+		break;
+	}
 
-/* Do minimum CPU detection early.
-   Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
-   The others are not touched to avoid unwanted side effects.
+}
 
-   WARNING: this function is only called on the BP.  Don't add code here
-   that is supposed to run on all CPUs. */
+/*
+ * Do minimum CPU detection early.
+ * Fields really needed: vendor, cpuid_level, family, model, mask,
+ * cache alignment.
+ * The others are not touched to avoid unwanted side effects.
+ *
+ * WARNING: this function is only called on the BP.  Don't add code here
+ * that is supposed to run on all CPUs.
+ */
 static void __init early_cpu_detect(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -328,19 +347,14 @@ static void __init early_cpu_detect(void)
 
 	get_cpu_vendor(c, 1);
 
-	switch (c->x86_vendor) {
-	case X86_VENDOR_AMD:
-		early_init_amd(c);
-		break;
-	case X86_VENDOR_INTEL:
-		early_init_intel(c);
-		break;
-	}
+	if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
+	    cpu_devs[c->x86_vendor]->c_early_init)
+		cpu_devs[c->x86_vendor]->c_early_init(c);
 
 	early_get_cap(c);
 }
 
-static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
+static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 {
 	u32 tfms, xlvl;
 	unsigned int ebx;
@@ -351,13 +365,12 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 		      (unsigned int *)&c->x86_vendor_id[0],
 		      (unsigned int *)&c->x86_vendor_id[8],
 		      (unsigned int *)&c->x86_vendor_id[4]);
-		
+
 		get_cpu_vendor(c, 0);
 		/* Initialize the standard set of capabilities */
 		/* Note that the vendor-specific code below might override */
-	
 		/* Intel-defined flags: level 0x00000001 */
-		if ( c->cpuid_level >= 0x00000001 ) {
+		if (c->cpuid_level >= 0x00000001) {
 			u32 capability, excap;
 			cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
 			c->x86_capability[0] = capability;
@@ -369,12 +382,14 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 			if (c->x86 >= 0x6)
 				c->x86_model += ((tfms >> 16) & 0xF) << 4;
 			c->x86_mask = tfms & 15;
+			c->initial_apicid = (ebx >> 24) & 0xFF;
 #ifdef CONFIG_X86_HT
-			c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+			c->apicid = phys_pkg_id(c->initial_apicid, 0);
+			c->phys_proc_id = c->initial_apicid;
 #else
-			c->apicid = (ebx >> 24) & 0xFF;
+			c->apicid = c->initial_apicid;
 #endif
-			if (c->x86_capability[0] & (1<<19))
+			if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
 				c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
 		} else {
 			/* Have CPUID level 0 only - unheard of */
@@ -383,33 +398,42 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 
 		/* AMD-defined flags: level 0x80000001 */
 		xlvl = cpuid_eax(0x80000000);
-		if ( (xlvl & 0xffff0000) == 0x80000000 ) {
-			if ( xlvl >= 0x80000001 ) {
+		if ((xlvl & 0xffff0000) == 0x80000000) {
+			if (xlvl >= 0x80000001) {
 				c->x86_capability[1] = cpuid_edx(0x80000001);
 				c->x86_capability[6] = cpuid_ecx(0x80000001);
 			}
-			if ( xlvl >= 0x80000004 )
+			if (xlvl >= 0x80000004)
 				get_model_name(c); /* Default name */
 		}
 
 		init_scattered_cpuid_features(c);
 	}
 
-#ifdef CONFIG_X86_HT
-	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
-#endif
+	clear_cpu_cap(c, X86_FEATURE_PAT);
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		if (c->x86 >= 0xf && c->x86 <= 0x11)
+			set_cpu_cap(c, X86_FEATURE_PAT);
+		break;
+	case X86_VENDOR_INTEL:
+		if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
+			set_cpu_cap(c, X86_FEATURE_PAT);
+		break;
+	}
 }
 
 static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 {
-	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
+	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
 		/* Disable processor serial number */
-		unsigned long lo,hi;
-		rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+		unsigned long lo, hi;
+		rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
 		lo |= 0x200000;
-		wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+		wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
 		printk(KERN_NOTICE "CPU serial number disabled.\n");
-		clear_bit(X86_FEATURE_PN, c->x86_capability);
+		clear_cpu_cap(c, X86_FEATURE_PN);
 
 		/* Disabling the serial number may affect the cpuid level */
 		c->cpuid_level = cpuid_eax(0);
@@ -444,9 +468,11 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
 	if (!have_cpuid_p()) {
-		/* First of all, decide if this is a 486 or higher */
-		/* It's a 486 if we can modify the AC flag */
-		if ( flag_is_changeable_p(X86_EFLAGS_AC) )
+		/*
+		 * First of all, decide if this is a 486 or higher
+		 * It's a 486 if we can modify the AC flag
+		 */
+		if (flag_is_changeable_p(X86_EFLAGS_AC))
 			c->x86 = 4;
 		else
 			c->x86 = 3;
@@ -479,10 +505,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	 */
 
 	/* If the model name is still unset, do table lookup. */
-	if ( !c->x86_model_id[0] ) {
+	if (!c->x86_model_id[0]) {
 		char *p;
 		p = table_lookup_model(c);
-		if ( p )
+		if (p)
 			strcpy(c->x86_model_id, p);
 		else
 			/* Last resort... */
@@ -496,9 +522,9 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	 * common between the CPUs.  The first time this routine gets
 	 * executed, c == &boot_cpu_data.
 	 */
-	if ( c != &boot_cpu_data ) {
+	if (c != &boot_cpu_data) {
 		/* AND the already accumulated flags with these */
-		for ( i = 0 ; i < NCAPINTS ; i++ )
+		for (i = 0 ; i < NCAPINTS ; i++)
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 	}
 
@@ -542,7 +568,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 
 	if (smp_num_siblings == 1) {
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-	} else if (smp_num_siblings > 1 ) {
+	} else if (smp_num_siblings > 1) {
 
 		if (smp_num_siblings > NR_CPUS) {
 			printk(KERN_WARNING "CPU: Unsupported number of the "
@@ -552,7 +578,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 		}
 
 		index_msb = get_count_order(smp_num_siblings);
-		c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+		c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
 
 		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
 		       c->phys_proc_id);
@@ -563,7 +589,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 
 		core_bits = get_count_order(c->x86_max_cores);
 
-		c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+		c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
 					       ((1 << core_bits) - 1);
 
 		if (c->x86_max_cores > 1)
@@ -597,7 +623,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 	else
 		printk("%s", c->x86_model_id);
 
-	if (c->x86_mask || c->cpuid_level >= 0) 
+	if (c->x86_mask || c->cpuid_level >= 0)
 		printk(" stepping %02x\n", c->x86_mask);
 	else
 		printk("\n");
@@ -616,23 +642,15 @@ __setup("clearcpuid=", setup_disablecpuid);
 
 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
 
-/* This is hacky. :)
- * We're emulating future behavior.
- * In the future, the cpu-specific init functions will be called implicitly
- * via the magic of initcalls.
- * They will insert themselves into the cpu_devs structure.
- * Then, when cpu_init() is called, we can just iterate over that array.
- */
 void __init early_cpu_init(void)
 {
-	intel_cpu_init();
-	cyrix_init_cpu();
-	nsc_init_cpu();
-	amd_init_cpu();
-	centaur_init_cpu();
-	transmeta_init_cpu();
-	nexgen_init_cpu();
-	umc_init_cpu();
+	struct cpu_vendor_dev *cvdev;
+
+	for (cvdev = __x86cpuvendor_start ;
+	     cvdev < __x86cpuvendor_end   ;
+	     cvdev++)
+		cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
+
 	early_cpu_detect();
 }
 
@@ -666,7 +684,7 @@ void __cpuinit cpu_init(void)
 {
 	int cpu = smp_processor_id();
 	struct task_struct *curr = current;
-	struct tss_struct * t = &per_cpu(init_tss, cpu);
+	struct tss_struct *t = &per_cpu(init_tss, cpu);
 	struct thread_struct *thread = &curr->thread;
 
 	if (cpu_test_and_set(cpu, cpu_initialized)) {
@@ -692,7 +710,7 @@ void __cpuinit cpu_init(void)
 	enter_lazy_tlb(&init_mm, curr);
 
 	load_sp0(t, thread);
-	set_tss_desc(cpu,t);
+	set_tss_desc(cpu, t);
 	load_TR_desc();
 	load_LDT(&init_mm.context);
 

+ 13 - 13
arch/x86/kernel/cpu/cpu.h

@@ -14,6 +14,7 @@ struct cpu_dev {
 
 	struct		cpu_model_info c_models[4];
 
+	void            (*c_early_init)(struct cpuinfo_x86 *c);
 	void		(*c_init)(struct cpuinfo_x86 * c);
 	void		(*c_identify)(struct cpuinfo_x86 * c);
 	unsigned int	(*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
@@ -21,18 +22,17 @@ struct cpu_dev {
 
 extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
 
+struct cpu_vendor_dev {
+	int vendor;
+	struct cpu_dev *cpu_dev;
+};
+
+#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \
+	static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \
+	__attribute__((__section__(".x86cpuvendor.init"))) = \
+	{ cpu_vendor_id, cpu_dev }
+
+extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
+
 extern int get_model_name(struct cpuinfo_x86 *c);
 extern void display_cacheinfo(struct cpuinfo_x86 *c);
-
-extern void early_init_intel(struct cpuinfo_x86 *c);
-extern void early_init_amd(struct cpuinfo_x86 *c);
-
-/* Specific CPU type init functions */
-int intel_cpu_init(void);
-int amd_init_cpu(void);
-int cyrix_init_cpu(void);
-int nsc_init_cpu(void);
-int centaur_init_cpu(void);
-int transmeta_init_cpu(void);
-int nexgen_init_cpu(void);
-int umc_init_cpu(void);

+ 63 - 73
arch/x86/kernel/cpu/cyrix.c

@@ -19,7 +19,7 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
 {
 	unsigned char ccr2, ccr3;
 	unsigned long flags;
-	
+
 	/* we test for DEVID by checking whether CCR3 is writable */
 	local_irq_save(flags);
 	ccr3 = getCx86(CX86_CCR3);
@@ -37,8 +37,7 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
 			setCx86(CX86_CCR2, ccr2);
 			*dir0 = 0xfe;
 		}
-	}
-	else {
+	} else {
 		setCx86(CX86_CCR3, ccr3);  /* restore CCR3 */
 
 		/* read DIR0 and DIR1 CPU registers */
@@ -86,7 +85,7 @@ static char cyrix_model_mult2[] __cpuinitdata = "12233445";
 static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
 {
 	unsigned long flags;
-	
+
 	if (Cx86_dir0_msb == 3) {
 		unsigned char ccr3, ccr5;
 
@@ -132,7 +131,7 @@ static void __cpuinit set_cx86_memwb(void)
 	/* set 'Not Write-through' */
 	write_cr0(read_cr0() | X86_CR0_NW);
 	/* CCR2 bit 2: lock NW bit and set WT1 */
-	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
+	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14);
 }
 
 static void __cpuinit set_cx86_inc(void)
@@ -148,7 +147,7 @@ static void __cpuinit set_cx86_inc(void)
 	setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
 	/* PCR0 -- Performance Control */
 	/* Incrementor Margin 10 */
-	setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); 
+	setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04);
 	setCx86(CX86_CCR3, ccr3);	/* disable MAPEN */
 }
 
@@ -167,16 +166,16 @@ static void __cpuinit geode_configure(void)
 
 	ccr3 = getCx86(CX86_CCR3);
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* enable MAPEN */
-	
+
 
 	/* FPU fast, DTE cache, Mem bypass */
 	setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38);
 	setCx86(CX86_CCR3, ccr3);			/* disable MAPEN */
-	
+
 	set_cx86_memwb();
-	set_cx86_reorder();	
+	set_cx86_reorder();
 	set_cx86_inc();
-	
+
 	local_irq_restore(flags);
 }
 
@@ -187,14 +186,16 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 	char *buf = c->x86_model_id;
 	const char *p = NULL;
 
-	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
-	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-	clear_bit(0*32+31, c->x86_capability);
+	/*
+	 * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
+	 */
+	clear_cpu_cap(c, 0*32+31);
 
 	/* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */
-	if ( test_bit(1*32+24, c->x86_capability) ) {
-		clear_bit(1*32+24, c->x86_capability);
-		set_bit(X86_FEATURE_CXMMX, c->x86_capability);
+	if (test_cpu_cap(c, 1*32+24)) {
+		clear_cpu_cap(c, 1*32+24);
+		set_cpu_cap(c, X86_FEATURE_CXMMX);
 	}
 
 	do_cyrix_devid(&dir0, &dir1);
@@ -213,7 +214,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 	 * the model, multiplier and stepping.  Black magic included,
 	 * to make the silicon step/rev numbers match the printed ones.
 	 */
-	 
+
 	switch (dir0_msn) {
 		unsigned char tmp;
 
@@ -241,7 +242,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 		} else             /* 686 */
 			p = Cx86_cb+1;
 		/* Emulate MTRRs using Cyrix's ARRs. */
-		set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
 		/* 6x86's contain this bug */
 		c->coma_bug = 1;
 		break;
@@ -250,17 +251,18 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 #ifdef CONFIG_PCI
 	{
 		u32 vendor, device;
-		/* It isn't really a PCI quirk directly, but the cure is the
-		   same. The MediaGX has deep magic SMM stuff that handles the
-		   SB emulation. It throws away the fifo on disable_dma() which
-		   is wrong and ruins the audio. 
-
-		   Bug2: VSA1 has a wrap bug so that using maximum sized DMA 
-		   causes bad things. According to NatSemi VSA2 has another
-		   bug to do with 'hlt'. I've not seen any boards using VSA2
-		   and X doesn't seem to support it either so who cares 8).
-		   VSA1 we work around however.
-		*/
+		/*
+		 * It isn't really a PCI quirk directly, but the cure is the
+		 * same. The MediaGX has deep magic SMM stuff that handles the
+		 * SB emulation. It throws away the fifo on disable_dma() which
+		 * is wrong and ruins the audio.
+		 *
+		 *  Bug2: VSA1 has a wrap bug so that using maximum sized DMA
+		 *  causes bad things. According to NatSemi VSA2 has another
+		 *  bug to do with 'hlt'. I've not seen any boards using VSA2
+		 *  and X doesn't seem to support it either so who cares 8).
+		 *  VSA1 we work around however.
+		 */
 
 		printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
 		isa_dma_bridge_buggy = 2;
@@ -273,55 +275,51 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 
 		/*
 		 *  The 5510/5520 companion chips have a funky PIT.
-		 */  
+		 */
 		if (vendor == PCI_VENDOR_ID_CYRIX &&
 	 (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520))
 			mark_tsc_unstable("cyrix 5510/5520 detected");
 	}
 #endif
-		c->x86_cache_size=16;	/* Yep 16K integrated cache thats it */
+		c->x86_cache_size = 16;	/* Yep 16K integrated cache thats it */
 
 		/* GXm supports extended cpuid levels 'ala' AMD */
 		if (c->cpuid_level == 2) {
 			/* Enable cxMMX extensions (GX1 Datasheet 54) */
 			setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
-			
+
 			/*
 			 * GXm : 0x30 ... 0x5f GXm  datasheet 51
 			 * GXlv: 0x6x          GXlv datasheet 54
 			 *  ?  : 0x7x
 			 * GX1 : 0x8x          GX1  datasheet 56
 			 */
-			if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f))
+			if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f))
 				geode_configure();
 			get_model_name(c);  /* get CPU marketing name */
 			return;
-		}
-		else {  /* MediaGX */
+		} else { /* MediaGX */
 			Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
 			p = Cx86_cb+2;
 			c->x86_model = (dir1 & 0x20) ? 1 : 2;
 		}
 		break;
 
-        case 5: /* 6x86MX/M II */
-		if (dir1 > 7)
-		{
+	case 5: /* 6x86MX/M II */
+		if (dir1 > 7) {
 			dir0_msn++;  /* M II */
 			/* Enable MMX extensions (App note 108) */
 			setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
-		}
-		else
-		{
+		} else {
 			c->coma_bug = 1;      /* 6x86MX, it has the bug. */
 		}
 		tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0;
 		Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7];
 		p = Cx86_cb+tmp;
-        	if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20))
+		if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20))
 			(c->x86_model)++;
 		/* Emulate MTRRs using Cyrix's ARRs. */
-		set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
 		break;
 
 	case 0xf:  /* Cyrix 486 without DEVID registers */
@@ -343,7 +341,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 		break;
 	}
 	strcpy(buf, Cx86_model[dir0_msn & 7]);
-	if (p) strcat(buf, p);
+	if (p)
+		strcat(buf, p);
 	return;
 }
 
@@ -352,7 +351,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
  */
 static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
 {
-	/* There may be GX1 processors in the wild that are branded
+	/*
+	 * There may be GX1 processors in the wild that are branded
 	 * NSC and not Cyrix.
 	 *
 	 * This function only handles the GX processor, and kicks every
@@ -377,7 +377,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
  * by the fact that they preserve the flags across the division of 5/2.
  * PII and PPro exhibit this behavior too, but they have cpuid available.
  */
- 
+
 /*
  * Perform the Cyrix 5/2 test. A Cyrix won't change
  * the flags, while other 486 chips will.
@@ -398,27 +398,26 @@ static inline int test_cyrix_52div(void)
 	return (unsigned char) (test >> 8) == 0x02;
 }
 
-static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c)
+static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
 {
 	/* Detect Cyrix with disabled CPUID */
-	if ( c->x86 == 4 && test_cyrix_52div() ) {
+	if (c->x86 == 4 && test_cyrix_52div()) {
 		unsigned char dir0, dir1;
-		
+
 		strcpy(c->x86_vendor_id, "CyrixInstead");
-	        c->x86_vendor = X86_VENDOR_CYRIX;
-	        
-	        /* Actually enable cpuid on the older cyrix */
-	    
-	    	/* Retrieve CPU revisions */
-	    	
+		c->x86_vendor = X86_VENDOR_CYRIX;
+
+		/* Actually enable cpuid on the older cyrix */
+
+		/* Retrieve CPU revisions */
+
 		do_cyrix_devid(&dir0, &dir1);
 
-		dir0>>=4;		
-		
+		dir0 >>= 4;
+
 		/* Check it is an affected model */
-		
-   	        if (dir0 == 5 || dir0 == 3)
-   	        {
+
+		if (dir0 == 5 || dir0 == 3) {
 			unsigned char ccr3;
 			unsigned long flags;
 			printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
@@ -434,26 +433,17 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c)
 
 static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
 	.c_vendor	= "Cyrix",
-	.c_ident 	= { "CyrixInstead" },
+	.c_ident	= { "CyrixInstead" },
 	.c_init		= init_cyrix,
 	.c_identify	= cyrix_identify,
 };
 
-int __init cyrix_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev;
-	return 0;
-}
+cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev);
 
 static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
 	.c_vendor	= "NSC",
-	.c_ident 	= { "Geode by NSC" },
+	.c_ident	= { "Geode by NSC" },
 	.c_init		= init_nsc,
 };
 
-int __init nsc_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev;
-	return 0;
-}
-
+cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev);

+ 1 - 1
arch/x86/kernel/cpu/feature_names.c

@@ -4,7 +4,7 @@
  * This file must not contain any executable code.
  */
 
-#include "asm/cpufeature.h"
+#include <asm/cpufeature.h>
 
 /*
  * These flag bits must match the definitions in <asm/cpufeature.h>.

+ 53 - 53
arch/x86/kernel/cpu/intel.c

@@ -30,7 +30,7 @@
 struct movsl_mask movsl_mask __read_mostly;
 #endif
 
-void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
+static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 {
 	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
 	if (c->x86 == 15 && c->x86_cache_alignment == 64)
@@ -45,7 +45,7 @@ void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
  *
  *	This is called before we do cpu ident work
  */
- 
+
 int __cpuinit ppro_with_ram_bug(void)
 {
 	/* Uses data from early_cpu_detect now */
@@ -58,7 +58,7 @@ int __cpuinit ppro_with_ram_bug(void)
 	}
 	return 0;
 }
-	
+
 
 /*
  * P4 Xeon errata 037 workaround.
@@ -69,7 +69,7 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
 	unsigned long lo, hi;
 
 	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
-		rdmsr (MSR_IA32_MISC_ENABLE, lo, hi);
+		rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
 		if ((lo & (1<<9)) == 0) {
 			printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
 			printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
@@ -127,10 +127,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	 */
 	c->f00f_bug = 0;
 	if (!paravirt_enabled() && c->x86 == 5) {
-		static int f00f_workaround_enabled = 0;
+		static int f00f_workaround_enabled;
 
 		c->f00f_bug = 1;
-		if ( !f00f_workaround_enabled ) {
+		if (!f00f_workaround_enabled) {
 			trap_init_f00f_bug();
 			printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
 			f00f_workaround_enabled = 1;
@@ -139,20 +139,22 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 #endif
 
 	l2 = init_intel_cacheinfo(c);
-	if (c->cpuid_level > 9 ) {
+	if (c->cpuid_level > 9) {
 		unsigned eax = cpuid_eax(10);
 		/* Check for version and the number of counters */
 		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
-			set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
 	}
 
 	/* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
 	if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
-		clear_bit(X86_FEATURE_SEP, c->x86_capability);
+		clear_cpu_cap(c, X86_FEATURE_SEP);
 
-	/* Names for the Pentium II/Celeron processors 
-	   detectable only by also checking the cache size.
-	   Dixon is NOT a Celeron. */
+	/*
+	 * Names for the Pentium II/Celeron processors
+	 * detectable only by also checking the cache size.
+	 * Dixon is NOT a Celeron.
+	 */
 	if (c->x86 == 6) {
 		switch (c->x86_model) {
 		case 5:
@@ -163,14 +165,14 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 					p = "Mobile Pentium II (Dixon)";
 			}
 			break;
-			
+
 		case 6:
 			if (l2 == 128)
 				p = "Celeron (Mendocino)";
 			else if (c->x86_mask == 0 || c->x86_mask == 5)
 				p = "Celeron-A";
 			break;
-			
+
 		case 8:
 			if (l2 == 128)
 				p = "Celeron (Coppermine)";
@@ -178,9 +180,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		}
 	}
 
-	if ( p )
+	if (p)
 		strcpy(c->x86_model_id, p);
-	
+
 	c->x86_max_cores = num_cpu_cores(c);
 
 	detect_ht(c);
@@ -207,28 +209,29 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 #endif
 
 	if (cpu_has_xmm2)
-		set_bit(X86_FEATURE_LFENCE_RDTSC, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 	if (c->x86 == 15) {
-		set_bit(X86_FEATURE_P4, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_P4);
 	}
-	if (c->x86 == 6) 
-		set_bit(X86_FEATURE_P3, c->x86_capability);
+	if (c->x86 == 6)
+		set_cpu_cap(c, X86_FEATURE_P3);
 	if (cpu_has_ds) {
 		unsigned int l1;
 		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
 		if (!(l1 & (1<<11)))
-			set_bit(X86_FEATURE_BTS, c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_BTS);
 		if (!(l1 & (1<<12)))
-			set_bit(X86_FEATURE_PEBS, c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_PEBS);
 	}
 
 	if (cpu_has_bts)
 		ds_init_intel(c);
 }
 
-static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 {
-	/* Intel PIII Tualatin. This comes in two flavours.
+	/*
+	 * Intel PIII Tualatin. This comes in two flavours.
 	 * One has 256kb of cache, the other 512. We have no way
 	 * to determine which, so we use a boottime override
 	 * for the 512kb model, and assume 256 otherwise.
@@ -240,42 +243,42 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned
 
 static struct cpu_dev intel_cpu_dev __cpuinitdata = {
 	.c_vendor	= "Intel",
-	.c_ident 	= { "GenuineIntel" },
+	.c_ident	= { "GenuineIntel" },
 	.c_models = {
-		{ .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = 
-		  { 
-			  [0] = "486 DX-25/33", 
-			  [1] = "486 DX-50", 
-			  [2] = "486 SX", 
-			  [3] = "486 DX/2", 
-			  [4] = "486 SL", 
-			  [5] = "486 SX/2", 
-			  [7] = "486 DX/2-WB", 
-			  [8] = "486 DX/4", 
+		{ .vendor = X86_VENDOR_INTEL, .family = 4, .model_names =
+		  {
+			  [0] = "486 DX-25/33",
+			  [1] = "486 DX-50",
+			  [2] = "486 SX",
+			  [3] = "486 DX/2",
+			  [4] = "486 SL",
+			  [5] = "486 SX/2",
+			  [7] = "486 DX/2-WB",
+			  [8] = "486 DX/4",
 			  [9] = "486 DX/4-WB"
 		  }
 		},
 		{ .vendor = X86_VENDOR_INTEL, .family = 5, .model_names =
-		  { 
-			  [0] = "Pentium 60/66 A-step", 
-			  [1] = "Pentium 60/66", 
+		  {
+			  [0] = "Pentium 60/66 A-step",
+			  [1] = "Pentium 60/66",
 			  [2] = "Pentium 75 - 200",
-			  [3] = "OverDrive PODP5V83", 
+			  [3] = "OverDrive PODP5V83",
 			  [4] = "Pentium MMX",
-			  [7] = "Mobile Pentium 75 - 200", 
+			  [7] = "Mobile Pentium 75 - 200",
 			  [8] = "Mobile Pentium MMX"
 		  }
 		},
 		{ .vendor = X86_VENDOR_INTEL, .family = 6, .model_names =
-		  { 
+		  {
 			  [0] = "Pentium Pro A-step",
-			  [1] = "Pentium Pro", 
-			  [3] = "Pentium II (Klamath)", 
-			  [4] = "Pentium II (Deschutes)", 
-			  [5] = "Pentium II (Deschutes)", 
+			  [1] = "Pentium Pro",
+			  [3] = "Pentium II (Klamath)",
+			  [4] = "Pentium II (Deschutes)",
+			  [5] = "Pentium II (Deschutes)",
 			  [6] = "Mobile Pentium II",
-			  [7] = "Pentium III (Katmai)", 
-			  [8] = "Pentium III (Coppermine)", 
+			  [7] = "Pentium III (Katmai)",
+			  [8] = "Pentium III (Coppermine)",
 			  [10] = "Pentium III (Cascades)",
 			  [11] = "Pentium III (Tualatin)",
 		  }
@@ -290,15 +293,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = {
 		  }
 		},
 	},
+	.c_early_init   = early_init_intel,
 	.c_init		= init_intel,
 	.c_size_cache	= intel_size_cache,
 };
 
-__init int intel_cpu_init(void)
-{
-	cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev;
-	return 0;
-}
+cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
 
 #ifndef CONFIG_X86_CMPXCHG
 unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
@@ -364,5 +364,5 @@ unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
 EXPORT_SYMBOL(cmpxchg_486_u64);
 #endif
 
-// arch_initcall(intel_cpu_init);
+/* arch_initcall(intel_cpu_init); */
 

+ 25 - 25
arch/x86/kernel/cpu/mcheck/mce_32.c

@@ -10,20 +10,20 @@
 #include <linux/smp.h>
 #include <linux/thread_info.h>
 
-#include <asm/processor.h> 
+#include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/mce.h>
 
 #include "mce.h"
 
-int mce_disabled = 0;
+int mce_disabled;
 int nr_mce_banks;
 
 EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
 
 /* Handle unconfigured int18 (should never happen) */
-static void unexpected_machine_check(struct pt_regs * regs, long error_code)
-{	
+static void unexpected_machine_check(struct pt_regs *regs, long error_code)
+{
 	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
 }
 
@@ -33,30 +33,30 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_mac
 /* This has to be run for each processor */
 void mcheck_init(struct cpuinfo_x86 *c)
 {
-	if (mce_disabled==1)
+	if (mce_disabled == 1)
 		return;
 
 	switch (c->x86_vendor) {
-		case X86_VENDOR_AMD:
-			amd_mcheck_init(c);
-			break;
-
-		case X86_VENDOR_INTEL:
-			if (c->x86==5)
-				intel_p5_mcheck_init(c);
-			if (c->x86==6)
-				intel_p6_mcheck_init(c);
-			if (c->x86==15)
-				intel_p4_mcheck_init(c);
-			break;
-
-		case X86_VENDOR_CENTAUR:
-			if (c->x86==5)
-				winchip_mcheck_init(c);
-			break;
-
-		default:
-			break;
+	case X86_VENDOR_AMD:
+		amd_mcheck_init(c);
+		break;
+
+	case X86_VENDOR_INTEL:
+		if (c->x86 == 5)
+			intel_p5_mcheck_init(c);
+		if (c->x86 == 6)
+			intel_p6_mcheck_init(c);
+		if (c->x86 == 15)
+			intel_p4_mcheck_init(c);
+		break;
+
+	case X86_VENDOR_CENTAUR:
+		if (c->x86 == 5)
+			winchip_mcheck_init(c);
+		break;
+
+	default:
+		break;
 	}
 }
 

+ 12 - 9
arch/x86/kernel/cpu/mcheck/non-fatal.c

@@ -16,7 +16,7 @@
 #include <linux/smp.h>
 #include <linux/module.h>
 
-#include <asm/processor.h> 
+#include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/msr.h>
 
@@ -26,23 +26,26 @@ static int firstbank;
 
 #define MCE_RATE	15*HZ	/* timer rate is 15s */
 
-static void mce_checkregs (void *info)
+static void mce_checkregs(void *info)
 {
 	u32 low, high;
 	int i;
 
-	for (i=firstbank; i<nr_mce_banks; i++) {
-		rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high);
+	for (i = firstbank; i < nr_mce_banks; i++) {
+		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
 
 		if (high & (1<<31)) {
 			printk(KERN_INFO "MCE: The hardware reports a non "
 				"fatal, correctable incident occurred on "
 				"CPU %d.\n",
 				smp_processor_id());
-			printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
+			printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
 
-			/* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
-			wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+			/*
+			 * Scrub the error so we don't pick it up in MCE_RATE
+			 * seconds time.
+			 */
+			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
 
 			/* Serialize */
 			wmb();
@@ -55,10 +58,10 @@ static void mce_work_fn(struct work_struct *work);
 static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
 
 static void mce_work_fn(struct work_struct *work)
-{ 
+{
 	on_each_cpu(mce_checkregs, NULL, 1, 1);
 	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
-} 
+}
 
 static int __init init_nonfatal_mce_checker(void)
 {

+ 8 - 8
arch/x86/kernel/cpu/mcheck/p5.c

@@ -9,20 +9,20 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 
-#include <asm/processor.h> 
+#include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/msr.h>
 
 #include "mce.h"
 
 /* Machine check handler for Pentium class Intel */
-static void pentium_machine_check(struct pt_regs * regs, long error_code)
+static void pentium_machine_check(struct pt_regs *regs, long error_code)
 {
 	u32 loaddr, hi, lotype;
 	rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
 	rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
 	printk(KERN_EMERG "CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
-	if(lotype&(1<<5))
+	if (lotype&(1<<5))
 		printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
 	add_taint(TAINT_MACHINE_CHECK);
 }
@@ -31,13 +31,13 @@ static void pentium_machine_check(struct pt_regs * regs, long error_code)
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
-	
+
 	/*Check for MCE support */
-	if( !cpu_has(c, X86_FEATURE_MCE) )
-		return;	
+	if (!cpu_has(c, X86_FEATURE_MCE))
+		return;
 
 	/* Default P5 to off as its often misconnected */
-	if(mce_disabled != -1)
+	if (mce_disabled != -1)
 		return;
 	machine_check_vector = pentium_machine_check;
 	wmb();
@@ -47,7 +47,7 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 	rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
 	printk(KERN_INFO "Intel old style machine check architecture supported.\n");
 
- 	/* Enable MCE */
+	/* Enable MCE */
 	set_in_cr4(X86_CR4_MCE);
 	printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
 }

+ 24 - 24
arch/x86/kernel/cpu/mcheck/p6.c

@@ -9,23 +9,23 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 
-#include <asm/processor.h> 
+#include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/msr.h>
 
 #include "mce.h"
 
 /* Machine Check Handler For PII/PIII */
-static void intel_machine_check(struct pt_regs * regs, long error_code)
+static void intel_machine_check(struct pt_regs *regs, long error_code)
 {
-	int recover=1;
+	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
 	int i;
 
-	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 	if (mcgstl & (1<<0))	/* Recoverable ? */
-		recover=0;
+		recover = 0;
 
 	printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
 		smp_processor_id(), mcgsth, mcgstl);
@@ -55,30 +55,30 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 	}
 
 	if (recover & 2)
-		panic ("CPU context corrupt");
+		panic("CPU context corrupt");
 	if (recover & 1)
-		panic ("Unable to continue");
+		panic("Unable to continue");
 
-	printk (KERN_EMERG "Attempting to continue.\n");
-	/* 
-	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+	printk(KERN_EMERG "Attempting to continue.\n");
+	/*
+	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
 	 * recoverable/continuable.This will allow BIOS to look at the MSRs
 	 * for errors if the OS could not log the error.
 	 */
-	for (i=0; i<nr_mce_banks; i++) {
+	for (i = 0; i < nr_mce_banks; i++) {
 		unsigned int msr;
 		msr = MSR_IA32_MC0_STATUS+i*4;
-		rdmsr (msr,low, high);
+		rdmsr(msr, low, high);
 		if (high & (1<<31)) {
 			/* Clear it */
-			wrmsr (msr, 0UL, 0UL);
+			wrmsr(msr, 0UL, 0UL);
 			/* Serialize */
 			wmb();
 			add_taint(TAINT_MACHINE_CHECK);
 		}
 	}
 	mcgstl &= ~(1<<2);
-	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }
 
 /* Set up machine check reporting for processors with Intel style MCE */
@@ -86,21 +86,21 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 	int i;
-	
+
 	/* Check for MCE support */
 	if (!cpu_has(c, X86_FEATURE_MCE))
 		return;
 
 	/* Check for PPro style MCA */
- 	if (!cpu_has(c, X86_FEATURE_MCA))
+	if (!cpu_has(c, X86_FEATURE_MCA))
 		return;
 
 	/* Ok machine check is available */
 	machine_check_vector = intel_machine_check;
 	wmb();
 
-	printk (KERN_INFO "Intel machine check architecture supported.\n");
-	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	printk(KERN_INFO "Intel machine check architecture supported.\n");
+	rdmsr(MSR_IA32_MCG_CAP, l, h);
 	if (l & (1<<8))	/* Control register present ? */
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 	nr_mce_banks = l & 0xff;
@@ -110,13 +110,13 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
 	 * - MC0_CTL should not be written
 	 * - Status registers on all banks should be cleared on reset
 	 */
-	for (i=1; i<nr_mce_banks; i++)
-		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+	for (i = 1; i < nr_mce_banks; i++)
+		wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
 
-	for (i=0; i<nr_mce_banks; i++)
-		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+	for (i = 0; i < nr_mce_banks; i++)
+		wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
 
-	set_in_cr4 (X86_CR4_MCE);
-	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+	set_in_cr4(X86_CR4_MCE);
+	printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
 		smp_processor_id());
 }

+ 4 - 4
arch/x86/kernel/cpu/mcheck/winchip.c

@@ -8,14 +8,14 @@
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 
-#include <asm/processor.h> 
+#include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/msr.h>
 
 #include "mce.h"
 
 /* Machine check handler for WinChip C6 */
-static void winchip_machine_check(struct pt_regs * regs, long error_code)
+static void winchip_machine_check(struct pt_regs *regs, long error_code)
 {
 	printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
 	add_taint(TAINT_MACHINE_CHECK);
@@ -28,8 +28,8 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
 	machine_check_vector = winchip_machine_check;
 	wmb();
 	rdmsr(MSR_IDT_FCR1, lo, hi);
-	lo|= (1<<2);	/* Enable EIERRINT (int 18 MCE) */
-	lo&= ~(1<<4);	/* Enable MCE */
+	lo |= (1<<2);	/* Enable EIERRINT (int 18 MCE) */
+	lo &= ~(1<<4);	/* Enable MCE */
 	wrmsr(MSR_IDT_FCR1, lo, hi);
 	set_in_cr4(X86_CR4_MCE);
 	printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");

+ 139 - 0
arch/x86/kernel/cpu/mtrr/generic.c

@@ -11,6 +11,7 @@
 #include <asm/cpufeature.h>
 #include <asm/processor-flags.h>
 #include <asm/tlbflush.h>
+#include <asm/pat.h>
 #include "mtrr.h"
 
 struct mtrr_state {
@@ -35,6 +36,8 @@ static struct fixed_range_block fixed_range_blocks[] = {
 
 static unsigned long smp_changes_mask;
 static struct mtrr_state mtrr_state = {};
+static int mtrr_state_set;
+static u64 tom2;
 
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
@@ -42,6 +45,111 @@ static struct mtrr_state mtrr_state = {};
 static int mtrr_show;
 module_param_named(show, mtrr_show, bool, 0);
 
+/*
+ * Returns the effective MTRR type for the region
+ * Error returns:
+ * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR
+ * - 0xFF - when MTRR is not enabled
+ */
+u8 mtrr_type_lookup(u64 start, u64 end)
+{
+	int i;
+	u64 base, mask;
+	u8 prev_match, curr_match;
+
+	if (!mtrr_state_set)
+		return 0xFF;
+
+	if (!mtrr_state.enabled)
+		return 0xFF;
+
+	/* Make end inclusive end, instead of exclusive */
+	end--;
+
+	/* Look in fixed ranges. Just return the type as per start */
+	if (mtrr_state.have_fixed && (start < 0x100000)) {
+		int idx;
+
+		if (start < 0x80000) {
+			idx = 0;
+			idx += (start >> 16);
+			return mtrr_state.fixed_ranges[idx];
+		} else if (start < 0xC0000) {
+			idx = 1 * 8;
+			idx += ((start - 0x80000) >> 14);
+			return mtrr_state.fixed_ranges[idx];
+		} else if (start < 0x1000000) {
+			idx = 3 * 8;
+			idx += ((start - 0xC0000) >> 12);
+			return mtrr_state.fixed_ranges[idx];
+		}
+	}
+
+	/*
+	 * Look in variable ranges
+	 * Look of multiple ranges matching this address and pick type
+	 * as per MTRR precedence
+	 */
+	if (!mtrr_state.enabled & 2) {
+		return mtrr_state.def_type;
+	}
+
+	prev_match = 0xFF;
+	for (i = 0; i < num_var_ranges; ++i) {
+		unsigned short start_state, end_state;
+
+		if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11)))
+			continue;
+
+		base = (((u64)mtrr_state.var_ranges[i].base_hi) << 32) +
+		       (mtrr_state.var_ranges[i].base_lo & PAGE_MASK);
+		mask = (((u64)mtrr_state.var_ranges[i].mask_hi) << 32) +
+		       (mtrr_state.var_ranges[i].mask_lo & PAGE_MASK);
+
+		start_state = ((start & mask) == (base & mask));
+		end_state = ((end & mask) == (base & mask));
+		if (start_state != end_state)
+			return 0xFE;
+
+		if ((start & mask) != (base & mask)) {
+			continue;
+		}
+
+		curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
+		if (prev_match == 0xFF) {
+			prev_match = curr_match;
+			continue;
+		}
+
+		if (prev_match == MTRR_TYPE_UNCACHABLE ||
+		    curr_match == MTRR_TYPE_UNCACHABLE) {
+			return MTRR_TYPE_UNCACHABLE;
+		}
+
+		if ((prev_match == MTRR_TYPE_WRBACK &&
+		     curr_match == MTRR_TYPE_WRTHROUGH) ||
+		    (prev_match == MTRR_TYPE_WRTHROUGH &&
+		     curr_match == MTRR_TYPE_WRBACK)) {
+			prev_match = MTRR_TYPE_WRTHROUGH;
+			curr_match = MTRR_TYPE_WRTHROUGH;
+		}
+
+		if (prev_match != curr_match) {
+			return MTRR_TYPE_UNCACHABLE;
+		}
+	}
+
+	if (tom2) {
+		if (start >= (1ULL<<32) && (end < tom2))
+			return MTRR_TYPE_WRBACK;
+	}
+
+	if (prev_match != 0xFF)
+		return prev_match;
+
+	return mtrr_state.def_type;
+}
+
 /*  Get the MSR pair relating to a var range  */
 static void
 get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
@@ -79,12 +187,16 @@ static void print_fixed(unsigned base, unsigned step, const mtrr_type*types)
 			base, base + step - 1, mtrr_attrib_to_str(*types));
 }
 
+static void prepare_set(void);
+static void post_set(void);
+
 /*  Grab all of the MTRR state for this CPU into *state  */
 void __init get_mtrr_state(void)
 {
 	unsigned int i;
 	struct mtrr_var_range *vrs;
 	unsigned lo, dummy;
+	unsigned long flags;
 
 	vrs = mtrr_state.var_ranges;
 
@@ -100,6 +212,15 @@ void __init get_mtrr_state(void)
 	mtrr_state.def_type = (lo & 0xff);
 	mtrr_state.enabled = (lo & 0xc00) >> 10;
 
+	if (amd_special_default_mtrr()) {
+		unsigned lo, hi;
+		/* TOP_MEM2 */
+		rdmsr(MSR_K8_TOP_MEM2, lo, hi);
+		tom2 = hi;
+		tom2 <<= 32;
+		tom2 |= lo;
+		tom2 &= 0xffffff8000000ULL;
+	}
 	if (mtrr_show) {
 		int high_width;
 
@@ -130,7 +251,22 @@ void __init get_mtrr_state(void)
 			else
 				printk(KERN_INFO "MTRR %u disabled\n", i);
 		}
+		if (tom2) {
+			printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
+					  tom2, tom2>>20);
+		}
 	}
+	mtrr_state_set = 1;
+
+	/* PAT setup for BP. We need to go through sync steps here */
+	local_irq_save(flags);
+	prepare_set();
+
+	pat_init();
+
+	post_set();
+	local_irq_restore(flags);
+
 }
 
 /*  Some BIOS's are fucked and don't set all MTRRs the same!  */
@@ -397,6 +533,9 @@ static void generic_set_all(void)
 	/* Actually set the state */
 	mask = set_mtrr_state();
 
+	/* also set PAT */
+	pat_init();
+
 	post_set();
 	local_irq_restore(flags);
 

+ 3 - 4
arch/x86/kernel/cpu/mtrr/if.c

@@ -424,11 +424,10 @@ static int __init mtrr_if_init(void)
 		return -ENODEV;
 
 	proc_root_mtrr =
-	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
-	if (proc_root_mtrr) {
+		proc_create("mtrr", S_IWUSR | S_IRUGO, &proc_root, &mtrr_fops);
+
+	if (proc_root_mtrr)
 		proc_root_mtrr->owner = THIS_MODULE;
-		proc_root_mtrr->proc_fops = &mtrr_fops;
-	}
 	return 0;
 }
 

+ 1 - 1
arch/x86/kernel/cpu/mtrr/main.c

@@ -627,7 +627,7 @@ early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
 #define Tom2Enabled (1U << 21)
 #define Tom2ForceMemTypeWB (1U << 22)
 
-static __init int amd_special_default_mtrr(void)
+int __init amd_special_default_mtrr(void)
 {
 	u32 l, h;
 

+ 8 - 6
arch/x86/kernel/cpu/mtrr/state.c

@@ -19,13 +19,15 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
 	if (use_intel() || is_cpu(CYRIX)) {
 
 		/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
-		if ( cpu_has_pge ) {
+		if (cpu_has_pge) {
 			ctxt->cr4val = read_cr4();
 			write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
 		}
 
-		/*  Disable and flush caches. Note that wbinvd flushes the TLBs as
-		    a side-effect  */
+		/*
+		 * Disable and flush caches. Note that wbinvd flushes the TLBs
+		 * as a side-effect
+		 */
 		cr0 = read_cr0() | X86_CR0_CD;
 		wbinvd();
 		write_cr0(cr0);
@@ -42,7 +44,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
 
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
 {
-	if (use_intel()) 
+	if (use_intel())
 		/*  Disable MTRRs, and set the default type to uncached  */
 		mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL,
 		      ctxt->deftype_hi);
@@ -66,12 +68,12 @@ void set_mtrr_done(struct set_mtrr_context *ctxt)
 		else
 			/* Cyrix ARRs - everything else was excluded at the top */
 			setCx86(CX86_CCR3, ctxt->ccr3);
-		
+
 		/*  Enable caches  */
 		write_cr0(read_cr0() & 0xbfffffff);
 
 		/*  Restore value of CR4  */
-		if ( cpu_has_pge )
+		if (cpu_has_pge)
 			write_cr4(ctxt->cr4val);
 	}
 	/*  Re-enable interrupts locally (if enabled previously)  */

+ 7 - 8
arch/x86/kernel/cpu/nexgen.c

@@ -9,11 +9,11 @@
  *	Detect a NexGen CPU running without BIOS hypercode new enough
  *	to have CPUID. (Thanks to Herbert Oppmann)
  */
- 
+
 static int __cpuinit deep_magic_nexgen_probe(void)
 {
 	int ret;
-	
+
 	__asm__ __volatile__ (
 		"	movw	$0x5555, %%ax\n"
 		"	xorw	%%dx,%%dx\n"
@@ -22,22 +22,21 @@ static int __cpuinit deep_magic_nexgen_probe(void)
 		"	movl	$0, %%eax\n"
 		"	jnz	1f\n"
 		"	movl	$1, %%eax\n"
-		"1:\n" 
-		: "=a" (ret) : : "cx", "dx" );
+		"1:\n"
+		: "=a" (ret) : : "cx", "dx");
 	return  ret;
 }
 
-static void __cpuinit init_nexgen(struct cpuinfo_x86 * c)
+static void __cpuinit init_nexgen(struct cpuinfo_x86 *c)
 {
 	c->x86_cache_size = 256; /* A few had 1 MB... */
 }
 
-static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c)
+static void __cpuinit nexgen_identify(struct cpuinfo_x86 *c)
 {
 	/* Detect NexGen with old hypercode */
-	if ( deep_magic_nexgen_probe() ) {
+	if (deep_magic_nexgen_probe())
 		strcpy(c->x86_vendor_id, "NexGenDriven");
-	}
 }
 
 static struct cpu_dev nexgen_cpu_dev __cpuinitdata = {

+ 116 - 54
arch/x86/kernel/cpu/proc.c

@@ -8,78 +8,139 @@
 /*
  *	Get CPU information for use by the procfs.
  */
+#ifdef CONFIG_X86_32
+static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
+			      unsigned int cpu)
+{
+#ifdef CONFIG_X86_HT
+	if (c->x86_max_cores * smp_num_siblings > 1) {
+		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
+		seq_printf(m, "siblings\t: %d\n",
+			   cpus_weight(per_cpu(cpu_core_map, cpu)));
+		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
+		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
+		seq_printf(m, "apicid\t\t: %d\n", c->apicid);
+		seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
+	}
+#endif
+}
+
+static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
+{
+	/*
+	 * We use exception 16 if we have hardware math and we've either seen
+	 * it or the CPU claims it is internal
+	 */
+	int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu);
+	seq_printf(m,
+		   "fdiv_bug\t: %s\n"
+		   "hlt_bug\t\t: %s\n"
+		   "f00f_bug\t: %s\n"
+		   "coma_bug\t: %s\n"
+		   "fpu\t\t: %s\n"
+		   "fpu_exception\t: %s\n"
+		   "cpuid level\t: %d\n"
+		   "wp\t\t: %s\n",
+		   c->fdiv_bug ? "yes" : "no",
+		   c->hlt_works_ok ? "no" : "yes",
+		   c->f00f_bug ? "yes" : "no",
+		   c->coma_bug ? "yes" : "no",
+		   c->hard_math ? "yes" : "no",
+		   fpu_exception ? "yes" : "no",
+		   c->cpuid_level,
+		   c->wp_works_ok ? "yes" : "no");
+}
+#else
+static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
+			      unsigned int cpu)
+{
+#ifdef CONFIG_SMP
+	if (c->x86_max_cores * smp_num_siblings > 1) {
+		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
+		seq_printf(m, "siblings\t: %d\n",
+			   cpus_weight(per_cpu(cpu_core_map, cpu)));
+		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
+		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
+		seq_printf(m, "apicid\t\t: %d\n", c->apicid);
+		seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
+	}
+#endif
+}
+
+static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
+{
+	seq_printf(m,
+		   "fpu\t\t: yes\n"
+		   "fpu_exception\t: yes\n"
+		   "cpuid level\t: %d\n"
+		   "wp\t\t: yes\n",
+		   c->cpuid_level);
+}
+#endif
+
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	struct cpuinfo_x86 *c = v;
-	int i, n = 0;
-	int fpu_exception;
+	unsigned int cpu = 0;
+	int i;
 
 #ifdef CONFIG_SMP
-	n = c->cpu_index;
+	cpu = c->cpu_index;
 #endif
-	seq_printf(m, "processor\t: %d\n"
-		"vendor_id\t: %s\n"
-		"cpu family\t: %d\n"
-		"model\t\t: %d\n"
-		"model name\t: %s\n",
-		n,
-		c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
-		c->x86,
-		c->x86_model,
-		c->x86_model_id[0] ? c->x86_model_id : "unknown");
+	seq_printf(m, "processor\t: %u\n"
+		   "vendor_id\t: %s\n"
+		   "cpu family\t: %d\n"
+		   "model\t\t: %u\n"
+		   "model name\t: %s\n",
+		   cpu,
+		   c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+		   c->x86,
+		   c->x86_model,
+		   c->x86_model_id[0] ? c->x86_model_id : "unknown");
 
 	if (c->x86_mask || c->cpuid_level >= 0)
 		seq_printf(m, "stepping\t: %d\n", c->x86_mask);
 	else
 		seq_printf(m, "stepping\t: unknown\n");
 
-	if ( cpu_has(c, X86_FEATURE_TSC) ) {
-		unsigned int freq = cpufreq_quick_get(n);
+	if (cpu_has(c, X86_FEATURE_TSC)) {
+		unsigned int freq = cpufreq_quick_get(cpu);
+
 		if (!freq)
 			freq = cpu_khz;
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-			freq / 1000, (freq % 1000));
+			   freq / 1000, (freq % 1000));
 	}
 
 	/* Cache size */
 	if (c->x86_cache_size >= 0)
 		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
-#ifdef CONFIG_X86_HT
-	if (c->x86_max_cores * smp_num_siblings > 1) {
-		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
-		seq_printf(m, "siblings\t: %d\n",
-				cpus_weight(per_cpu(cpu_core_map, n)));
-		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
-		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
-	}
-#endif
-	
-	/* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */
-	fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu);
-	seq_printf(m, "fdiv_bug\t: %s\n"
-			"hlt_bug\t\t: %s\n"
-			"f00f_bug\t: %s\n"
-			"coma_bug\t: %s\n"
-			"fpu\t\t: %s\n"
-			"fpu_exception\t: %s\n"
-			"cpuid level\t: %d\n"
-			"wp\t\t: %s\n"
-			"flags\t\t:",
-		     c->fdiv_bug ? "yes" : "no",
-		     c->hlt_works_ok ? "no" : "yes",
-		     c->f00f_bug ? "yes" : "no",
-		     c->coma_bug ? "yes" : "no",
-		     c->hard_math ? "yes" : "no",
-		     fpu_exception ? "yes" : "no",
-		     c->cpuid_level,
-		     c->wp_works_ok ? "yes" : "no");
-
-	for ( i = 0 ; i < 32*NCAPINTS ; i++ )
-		if ( test_bit(i, c->x86_capability) &&
-		     x86_cap_flags[i] != NULL )
+
+	show_cpuinfo_core(m, c, cpu);
+	show_cpuinfo_misc(m, c);
+
+	seq_printf(m, "flags\t\t:");
+	for (i = 0; i < 32*NCAPINTS; i++)
+		if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
 			seq_printf(m, " %s", x86_cap_flags[i]);
 
-	for (i = 0; i < 32; i++)
+	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
+		   c->loops_per_jiffy/(500000/HZ),
+		   (c->loops_per_jiffy/(5000/HZ)) % 100);
+
+#ifdef CONFIG_X86_64
+	if (c->x86_tlbsize > 0)
+		seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
+#endif
+	seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size);
+#ifdef CONFIG_X86_64
+	seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
+	seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
+		   c->x86_phys_bits, c->x86_virt_bits);
+#endif
+
+	seq_printf(m, "power management:");
+	for (i = 0; i < 32; i++) {
 		if (c->x86_power & (1 << i)) {
 			if (i < ARRAY_SIZE(x86_power_flags) &&
 			    x86_power_flags[i])
@@ -89,11 +150,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 			else
 				seq_printf(m, " [%d]", i);
 		}
+	}
 
-	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
-		     c->loops_per_jiffy/(500000/HZ),
-		     (c->loops_per_jiffy/(5000/HZ)) % 100);
-	seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size);
+	seq_printf(m, "\n\n");
 
 	return 0;
 }
@@ -106,14 +165,17 @@ static void *c_start(struct seq_file *m, loff_t *pos)
 		return &cpu_data(*pos);
 	return NULL;
 }
+
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	*pos = next_cpu(*pos, cpu_online_map);
 	return c_start(m, pos);
 }
+
 static void c_stop(struct seq_file *m, void *v)
 {
 }
+
 const struct seq_operations cpuinfo_op = {
 	.start	= c_start,
 	.next	= c_next,

+ 14 - 16
arch/x86/kernel/cpu/transmeta.c

@@ -18,8 +18,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
 	/* Print CMS and CPU revision */
 	max = cpuid_eax(0x80860000);
 	cpu_rev = 0;
-	if ( max >= 0x80860001 ) {
-		cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); 
+	if (max >= 0x80860001) {
+		cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
 		if (cpu_rev != 0x02000000) {
 			printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
 				(cpu_rev >> 24) & 0xff,
@@ -29,7 +29,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
 				cpu_freq);
 		}
 	}
-	if ( max >= 0x80860002 ) {
+	if (max >= 0x80860002) {
 		cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
 		if (cpu_rev == 0x02000000) {
 			printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
@@ -42,7 +42,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
 		       cms_rev1 & 0xff,
 		       cms_rev2);
 	}
-	if ( max >= 0x80860006 ) {
+	if (max >= 0x80860006) {
 		cpuid(0x80860003,
 		      (void *)&cpu_info[0],
 		      (void *)&cpu_info[4],
@@ -74,23 +74,25 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
 	wrmsr(0x80860004, cap_mask, uk);
 
 	/* All Transmeta CPUs have a constant TSC */
-	set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-	
+	set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
 #ifdef CONFIG_SYSCTL
-	/* randomize_va_space slows us down enormously;
-	   it probably triggers retranslation of x86->native bytecode */
+	/*
+	 * randomize_va_space slows us down enormously;
+	 * it probably triggers retranslation of x86->native bytecode
+	 */
 	randomize_va_space = 0;
 #endif
 }
 
-static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c)
+static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c)
 {
 	u32 xlvl;
 
 	/* Transmeta-defined flags: level 0x80860001 */
 	xlvl = cpuid_eax(0x80860000);
-	if ( (xlvl & 0xffff0000) == 0x80860000 ) {
-		if (  xlvl >= 0x80860001 )
+	if ((xlvl & 0xffff0000) == 0x80860000) {
+		if (xlvl >= 0x80860001)
 			c->x86_capability[2] = cpuid_edx(0x80860001);
 	}
 }
@@ -102,8 +104,4 @@ static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
 	.c_identify	= transmeta_identify,
 };
 
-int __init transmeta_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev;
-	return 0;
-}
+cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev);

+ 9 - 10
arch/x86/kernel/cpu/umc.c

@@ -3,24 +3,23 @@
 #include <asm/processor.h>
 #include "cpu.h"
 
-/* UMC chips appear to be only either 386 or 486, so no special init takes place.
+/*
+ * UMC chips appear to be only either 386 or 486,
+ * so no special init takes place.
  */
 
 static struct cpu_dev umc_cpu_dev __cpuinitdata = {
 	.c_vendor	= "UMC",
-	.c_ident 	= { "UMC UMC UMC" },
+	.c_ident	= { "UMC UMC UMC" },
 	.c_models = {
 		{ .vendor = X86_VENDOR_UMC, .family = 4, .model_names =
-		  { 
-			  [1] = "U5D", 
-			  [2] = "U5S", 
+		  {
+			  [1] = "U5D",
+			  [2] = "U5S",
 		  }
 		},
 	},
 };
 
-int __init umc_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev;
-	return 0;
-}
+cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev);
+

+ 0 - 4
arch/x86/kernel/crash.c

@@ -26,11 +26,7 @@
 #include <linux/kdebug.h>
 #include <asm/smp.h>
 
-#ifdef CONFIG_X86_32
 #include <mach_ipi.h>
-#else
-#include <asm/mach_apic.h>
-#endif
 
 /* This keeps a track of which one is crashing cpu. */
 static int crashing_cpu;

+ 4 - 4
arch/x86/kernel/ds.c

@@ -220,11 +220,11 @@ int ds_allocate(void **dsp, size_t bts_size_in_bytes)
 
 int ds_free(void **dsp)
 {
-	if (*dsp)
+	if (*dsp) {
 		kfree((void *)get_bts_buffer_base(*dsp));
-	kfree(*dsp);
-	*dsp = NULL;
-
+		kfree(*dsp);
+		*dsp = NULL;
+	}
 	return 0;
 }
 

+ 7 - 20
arch/x86/kernel/e820_32.c

@@ -450,38 +450,25 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
  * thinkpad 560x, for example, does not cooperate with the memory
  * detection code.)
  */
-int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
 {
 	/* Only one memory region (or negative)? Ignore it */
 	if (nr_map < 2)
 		return -1;
 
 	do {
-		unsigned long long start = biosmap->addr;
-		unsigned long long size = biosmap->size;
-		unsigned long long end = start + size;
-		unsigned long type = biosmap->type;
+		u64 start = biosmap->addr;
+		u64 size = biosmap->size;
+		u64 end = start + size;
+		u32 type = biosmap->type;
 
 		/* Overflow in 64 bits? Ignore the memory map. */
 		if (start > end)
 			return -1;
 
-		/*
-		 * Some BIOSes claim RAM in the 640k - 1M region.
-		 * Not right. Fix it up.
-		 */
-		if (type == E820_RAM) {
-			if (start < 0x100000ULL && end > 0xA0000ULL) {
-				if (start < 0xA0000ULL)
-					add_memory_region(start, 0xA0000ULL-start, type);
-				if (end <= 0x100000ULL)
-					continue;
-				start = 0x100000ULL;
-				size = end - start;
-			}
-		}
 		add_memory_region(start, size, type);
-	} while (biosmap++,--nr_map);
+	} while (biosmap++, --nr_map);
+
 	return 0;
 }
 

+ 107 - 49
arch/x86/kernel/e820_64.c

@@ -27,6 +27,7 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/kdebug.h>
+#include <asm/trampoline.h>
 
 struct e820map e820;
 
@@ -36,11 +37,11 @@ struct e820map e820;
 unsigned long end_pfn;
 
 /*
- * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
- * The direct mapping extends to end_pfn_map, so that we can directly access
+ * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
+ * The direct mapping extends to max_pfn_mapped, so that we can directly access
  * apertures, ACPI and other tables without having to play with fixmaps.
  */
-unsigned long end_pfn_map;
+unsigned long max_pfn_mapped;
 
 /*
  * Last pfn which the user wants to use.
@@ -58,8 +59,8 @@ struct early_res {
 };
 static struct early_res early_res[MAX_EARLY_RES] __initdata = {
 	{ 0, PAGE_SIZE, "BIOS data page" },			/* BIOS data page */
-#ifdef CONFIG_SMP
-	{ SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE, "SMP_TRAMPOLINE" },
+#ifdef CONFIG_X86_TRAMPOLINE
+	{ TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
 #endif
 	{}
 };
@@ -95,7 +96,8 @@ void __init early_res_to_bootmem(void)
 }
 
 /* Check for already reserved areas */
-static inline int bad_addr(unsigned long *addrp, unsigned long size)
+static inline int
+bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
 {
 	int i;
 	unsigned long addr = *addrp, last;
@@ -105,7 +107,7 @@ again:
 	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
 		struct early_res *r = &early_res[i];
 		if (last >= r->start && addr < r->end) {
-			*addrp = addr = r->end;
+			*addrp = addr = round_up(r->end, align);
 			changed = 1;
 			goto again;
 		}
@@ -113,6 +115,40 @@ again:
 	return changed;
 }
 
+/* Check for already reserved areas */
+static inline int
+bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
+{
+	int i;
+	unsigned long addr = *addrp, last;
+	unsigned long size = *sizep;
+	int changed = 0;
+again:
+	last = addr + size;
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		struct early_res *r = &early_res[i];
+		if (last > r->start && addr < r->start) {
+			size = r->start - addr;
+			changed = 1;
+			goto again;
+		}
+		if (last > r->end && addr < r->end) {
+			addr = round_up(r->end, align);
+			size = last - addr;
+			changed = 1;
+			goto again;
+		}
+		if (last <= r->end && addr >= r->start) {
+			(*sizep)++;
+			return 0;
+		}
+	}
+	if (changed) {
+		*addrp = addr;
+		*sizep = size;
+	}
+	return changed;
+}
 /*
  * This function checks if any part of the range <start,end> is mapped
  * with type.
@@ -174,26 +210,27 @@ int __init e820_all_mapped(unsigned long start, unsigned long end,
  * Find a free area with specified alignment in a specific range.
  */
 unsigned long __init find_e820_area(unsigned long start, unsigned long end,
-				    unsigned size, unsigned long align)
+				    unsigned long size, unsigned long align)
 {
 	int i;
-	unsigned long mask = ~(align - 1);
 
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
-		unsigned long addr = ei->addr, last;
+		unsigned long addr, last;
+		unsigned long ei_last;
 
 		if (ei->type != E820_RAM)
 			continue;
+		addr = round_up(ei->addr, align);
+		ei_last = ei->addr + ei->size;
 		if (addr < start)
-			addr = start;
-		if (addr > ei->addr + ei->size)
+			addr = round_up(start, align);
+		if (addr >= ei_last)
 			continue;
-		while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
+		while (bad_addr(&addr, size, align) && addr+size <= ei_last)
 			;
-		addr = (addr + align - 1) & mask;
 		last = addr + size;
-		if (last > ei->addr + ei->size)
+		if (last > ei_last)
 			continue;
 		if (last > end)
 			continue;
@@ -202,6 +239,40 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end,
 	return -1UL;
 }
 
+/*
+ * Find next free range after *start
+ */
+unsigned long __init find_e820_area_size(unsigned long start,
+					 unsigned long *sizep,
+					 unsigned long align)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		unsigned long addr, last;
+		unsigned long ei_last;
+
+		if (ei->type != E820_RAM)
+			continue;
+		addr = round_up(ei->addr, align);
+		ei_last = ei->addr + ei->size;
+		if (addr < start)
+			addr = round_up(start, align);
+		if (addr >= ei_last)
+			continue;
+		*sizep = ei_last - addr;
+		while (bad_addr_size(&addr, sizep, align) &&
+			addr + *sizep <= ei_last)
+			;
+		last = addr + *sizep;
+		if (last > ei_last)
+			continue;
+		return addr;
+	}
+	return -1UL;
+
+}
 /*
  * Find the highest page frame number we have available
  */
@@ -211,29 +282,29 @@ unsigned long __init e820_end_of_ram(void)
 
 	end_pfn = find_max_pfn_with_active_regions();
 
-	if (end_pfn > end_pfn_map)
-		end_pfn_map = end_pfn;
-	if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
-		end_pfn_map = MAXMEM>>PAGE_SHIFT;
+	if (end_pfn > max_pfn_mapped)
+		max_pfn_mapped = end_pfn;
+	if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
+		max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
 	if (end_pfn > end_user_pfn)
 		end_pfn = end_user_pfn;
-	if (end_pfn > end_pfn_map)
-		end_pfn = end_pfn_map;
+	if (end_pfn > max_pfn_mapped)
+		end_pfn = max_pfn_mapped;
 
-	printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map);
+	printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
 	return end_pfn;
 }
 
 /*
  * Mark e820 reserved areas as busy for the resource manager.
  */
-void __init e820_reserve_resources(struct resource *code_resource,
-		struct resource *data_resource, struct resource *bss_resource)
+void __init e820_reserve_resources(void)
 {
 	int i;
+	struct resource *res;
+
+	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
 	for (i = 0; i < e820.nr_map; i++) {
-		struct resource *res;
-		res = alloc_bootmem_low(sizeof(struct resource));
 		switch (e820.map[i].type) {
 		case E820_RAM:	res->name = "System RAM"; break;
 		case E820_ACPI:	res->name = "ACPI Tables"; break;
@@ -243,21 +314,8 @@ void __init e820_reserve_resources(struct resource *code_resource,
 		res->start = e820.map[i].addr;
 		res->end = res->start + e820.map[i].size - 1;
 		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-		request_resource(&iomem_resource, res);
-		if (e820.map[i].type == E820_RAM) {
-			/*
-			 * We don't know which RAM region contains kernel data,
-			 * so we try it repeatedly and let the resource manager
-			 * test it.
-			 */
-			request_resource(res, code_resource);
-			request_resource(res, data_resource);
-			request_resource(res, bss_resource);
-#ifdef CONFIG_KEXEC
-			if (crashk_res.start != crashk_res.end)
-				request_resource(res, &crashk_res);
-#endif
-		}
+		insert_resource(&iomem_resource, res);
+		res++;
 	}
 }
 
@@ -309,9 +367,9 @@ static int __init e820_find_active_region(const struct e820entry *ei,
 	if (*ei_startpfn >= *ei_endpfn)
 		return 0;
 
-	/* Check if end_pfn_map should be updated */
-	if (ei->type != E820_RAM && *ei_endpfn > end_pfn_map)
-		end_pfn_map = *ei_endpfn;
+	/* Check if max_pfn_mapped should be updated */
+	if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
+		max_pfn_mapped = *ei_endpfn;
 
 	/* Skip if map is outside the node */
 	if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
@@ -634,10 +692,10 @@ static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
 		return -1;
 
 	do {
-		unsigned long start = biosmap->addr;
-		unsigned long size = biosmap->size;
-		unsigned long end = start + size;
-		unsigned long type = biosmap->type;
+		u64 start = biosmap->addr;
+		u64 size = biosmap->size;
+		u64 end = start + size;
+		u32 type = biosmap->type;
 
 		/* Overflow in 64 bits? Ignore the memory map. */
 		if (start > end)
@@ -702,7 +760,7 @@ static int __init parse_memmap_opt(char *p)
 		saved_max_pfn = e820_end_of_ram();
 		remove_all_active_ranges();
 #endif
-		end_pfn_map = 0;
+		max_pfn_mapped = 0;
 		e820.nr_map = 0;
 		userdef = 1;
 		return 0;

+ 12 - 12
arch/x86/kernel/early_printk.c

@@ -13,7 +13,7 @@
 #define VGABASE		(__ISA_IO_base + 0xb8000)
 
 static int max_ypos = 25, max_xpos = 80;
-static int current_ypos = 25, current_xpos = 0;
+static int current_ypos = 25, current_xpos;
 
 static void early_vga_write(struct console *con, const char *str, unsigned n)
 {
@@ -108,12 +108,12 @@ static __init void early_serial_init(char *s)
 
 	if (*s) {
 		unsigned port;
-		if (!strncmp(s,"0x",2)) {
+		if (!strncmp(s, "0x", 2)) {
 			early_serial_base = simple_strtoul(s, &e, 16);
 		} else {
 			static int bases[] = { 0x3f8, 0x2f8 };
 
-			if (!strncmp(s,"ttyS",4))
+			if (!strncmp(s, "ttyS", 4))
 				s += 4;
 			port = simple_strtoul(s, &e, 10);
 			if (port > 1 || s == e)
@@ -194,7 +194,7 @@ static struct console simnow_console = {
 
 /* Direct interface for emergencies */
 static struct console *early_console = &early_vga_console;
-static int early_console_initialized = 0;
+static int early_console_initialized;
 
 void early_printk(const char *fmt, ...)
 {
@@ -202,9 +202,9 @@ void early_printk(const char *fmt, ...)
 	int n;
 	va_list ap;
 
-	va_start(ap,fmt);
-	n = vscnprintf(buf,512,fmt,ap);
-	early_console->write(early_console,buf,n);
+	va_start(ap, fmt);
+	n = vscnprintf(buf, 512, fmt, ap);
+	early_console->write(early_console, buf, n);
 	va_end(ap);
 }
 
@@ -229,15 +229,15 @@ static int __init setup_early_printk(char *buf)
 		early_serial_init(buf);
 		early_console = &early_serial_console;
 	} else if (!strncmp(buf, "vga", 3)
-	           && boot_params.screen_info.orig_video_isVGA == 1) {
+		&& boot_params.screen_info.orig_video_isVGA == 1) {
 		max_xpos = boot_params.screen_info.orig_video_cols;
 		max_ypos = boot_params.screen_info.orig_video_lines;
 		current_ypos = boot_params.screen_info.orig_y;
 		early_console = &early_vga_console;
- 	} else if (!strncmp(buf, "simnow", 6)) {
- 		simnow_init(buf + 6);
- 		early_console = &simnow_console;
- 		keep_early = 1;
+	} else if (!strncmp(buf, "simnow", 6)) {
+		simnow_init(buf + 6);
+		early_console = &simnow_console;
+		keep_early = 1;
 #ifdef CONFIG_HVC_XEN
 	} else if (!strncmp(buf, "xen", 3)) {
 		early_console = &xenboot_console;

+ 17 - 22
arch/x86/kernel/entry_32.S

@@ -51,6 +51,7 @@
 #include <asm/desc.h>
 #include <asm/percpu.h>
 #include <asm/dwarf2.h>
+#include <asm/processor-flags.h>
 #include "irq_vectors.h"
 
 /*
@@ -68,13 +69,6 @@
 
 #define nr_syscalls ((syscall_table_size)/4)
 
-CF_MASK		= 0x00000001
-TF_MASK		= 0x00000100
-IF_MASK		= 0x00000200
-DF_MASK		= 0x00000400 
-NT_MASK		= 0x00004000
-VM_MASK		= 0x00020000
-
 #ifdef CONFIG_PREEMPT
 #define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
 #else
@@ -84,7 +78,7 @@ VM_MASK		= 0x00020000
 
 .macro TRACE_IRQS_IRET
 #ifdef CONFIG_TRACE_IRQFLAGS
-	testl $IF_MASK,PT_EFLAGS(%esp)     # interrupts off?
+	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)     # interrupts off?
 	jz 1f
 	TRACE_IRQS_ON
 1:
@@ -246,7 +240,7 @@ ret_from_intr:
 check_userspace:
 	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
 	movb PT_CS(%esp), %al
-	andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
+	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
 	cmpl $USER_RPL, %eax
 	jb resume_kernel		# not returning to v8086 or userspace
 
@@ -271,7 +265,7 @@ need_resched:
 	movl TI_flags(%ebp), %ecx	# need_resched set ?
 	testb $_TIF_NEED_RESCHED, %cl
 	jz restore_all
-	testl $IF_MASK,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
+	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
 	jz restore_all
 	call preempt_schedule_irq
 	jmp need_resched
@@ -291,10 +285,10 @@ ENTRY(ia32_sysenter_target)
 	movl TSS_sysenter_sp0(%esp),%esp
 sysenter_past_esp:
 	/*
-	 * No need to follow this irqs on/off section: the syscall
-	 * disabled irqs and here we enable it straight after entry:
+	 * Interrupts are disabled here, but we can't trace it until
+	 * enough kernel state to call TRACE_IRQS_OFF can be called - but
+	 * we immediately enable interrupts at that point anyway.
 	 */
-	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushl $(__USER_DS)
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET ss, 0*/
@@ -302,6 +296,7 @@ sysenter_past_esp:
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET esp, 0
 	pushfl
+	orl $X86_EFLAGS_IF, (%esp)
 	CFI_ADJUST_CFA_OFFSET 4
 	pushl $(__USER_CS)
 	CFI_ADJUST_CFA_OFFSET 4
@@ -315,6 +310,11 @@ sysenter_past_esp:
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET eip, 0
 
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	SAVE_ALL
+	ENABLE_INTERRUPTS(CLBR_NONE)
+
 /*
  * Load the potential sixth argument from user stack.
  * Careful about security.
@@ -322,14 +322,12 @@ sysenter_past_esp:
 	cmpl $__PAGE_OFFSET-3,%ebp
 	jae syscall_fault
 1:	movl (%ebp),%ebp
+	movl %ebp,PT_EBP(%esp)
 .section __ex_table,"a"
 	.align 4
 	.long 1b,syscall_fault
 .previous
 
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
 
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
@@ -384,7 +382,7 @@ syscall_exit:
 					# setting need_resched or sigpending
 					# between sampling and the iret
 	TRACE_IRQS_OFF
-	testl $TF_MASK,PT_EFLAGS(%esp)	# If tracing set singlestep flag on exit
+	testl $X86_EFLAGS_TF,PT_EFLAGS(%esp)	# If tracing set singlestep flag on exit
 	jz no_singlestep
 	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
 no_singlestep:
@@ -399,7 +397,7 @@ restore_all:
 	# See comments in process.c:copy_thread() for details.
 	movb PT_OLDSS(%esp), %ah
 	movb PT_CS(%esp), %al
-	andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
+	andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
 	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
 	CFI_REMEMBER_STATE
 	je ldt_ss			# returning to user-space with LDT SS
@@ -486,7 +484,7 @@ work_resched:
 work_notifysig:				# deal with pending signals and
 					# notify-resume requests
 #ifdef CONFIG_VM86
-	testl $VM_MASK, PT_EFLAGS(%esp)
+	testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
 	movl %esp, %eax
 	jne work_notifysig_v86		# returning to kernel-space or
 					# vm86-space
@@ -543,9 +541,6 @@ END(syscall_exit_work)
 
 	RING0_INT_FRAME			# can't unwind into user space anyway
 syscall_fault:
-	pushl %eax			# save orig_eax
-	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
 	movl $-EFAULT,PT_EAX(%esp)
 	jmp resume_userspace

+ 3 - 5
arch/x86/kernel/entry_64.S

@@ -319,19 +319,17 @@ badsys:
 	/* Do syscall tracing */
 tracesys:			 
 	SAVE_REST
-	movq $-ENOSYS,RAX(%rsp)
+	movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
 	FIXUP_TOP_OF_STACK %rdi
 	movq %rsp,%rdi
 	call syscall_trace_enter
 	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
 	cmpq $__NR_syscall_max,%rax
-	movq $-ENOSYS,%rcx
-	cmova %rcx,%rax
-	ja  1f
+	ja   int_ret_from_sys_call	/* RAX(%rsp) set to -ENOSYS above */
 	movq %r10,%rcx	/* fixup for C */
 	call *sys_call_table(,%rax,8)
-1:	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX-ARGOFFSET(%rsp)
 	/* Use IRET because user could have changed frame */
 		
 /* 

+ 41 - 6
arch/x86/kernel/genapic_64.c

@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
+#include <linux/hardirq.h>
 
 #include <asm/smp.h>
 #include <asm/ipi.h>
@@ -24,20 +25,20 @@
 #include <acpi/acpi_bus.h>
 #endif
 
-/* which logical CPU number maps to which CPU (physical APIC ID) */
-u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata
-					= { [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_cpu_to_apicid_early_ptr;
-DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
-EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+DEFINE_PER_CPU(int, x2apic_extra_bits);
 
 struct genapic __read_mostly *genapic = &apic_flat;
 
+static enum uv_system_type uv_system_type;
+
 /*
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
  */
 void __init setup_apic_routing(void)
 {
+	if (uv_system_type == UV_NON_UNIQUE_APIC)
+		genapic = &apic_x2apic_uv_x;
+	else
 #ifdef CONFIG_ACPI
 	/*
 	 * Quirk: some x86_64 machines can only use physical APIC mode
@@ -64,3 +65,37 @@ void send_IPI_self(int vector)
 {
 	__send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
 }
+
+int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	if (!strcmp(oem_id, "SGI")) {
+		if (!strcmp(oem_table_id, "UVL"))
+			uv_system_type = UV_LEGACY_APIC;
+		else if (!strcmp(oem_table_id, "UVX"))
+			uv_system_type = UV_X2APIC;
+		else if (!strcmp(oem_table_id, "UVH"))
+			uv_system_type = UV_NON_UNIQUE_APIC;
+	}
+	return 0;
+}
+
+unsigned int read_apic_id(void)
+{
+	unsigned int id;
+
+	WARN_ON(preemptible());
+	id = apic_read(APIC_ID);
+	if (uv_system_type >= UV_X2APIC)
+		id  |= __get_cpu_var(x2apic_extra_bits);
+	return id;
+}
+
+enum uv_system_type get_uv_system_type(void)
+{
+	return uv_system_type;
+}
+
+int is_uv_system(void)
+{
+	return uv_system_type != UV_NONE;
+}

+ 2 - 5
arch/x86/kernel/genapic_flat_64.c

@@ -97,7 +97,7 @@ static void flat_send_IPI_all(int vector)
 
 static int flat_apic_id_registered(void)
 {
-	return physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map);
+	return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
 }
 
 static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -138,12 +138,9 @@ static cpumask_t physflat_target_cpus(void)
 
 static cpumask_t physflat_vector_allocation_domain(int cpu)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	return cpumask_of_cpu(cpu);
 }
 
-
 static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
 {
 	send_IPI_mask_sequence(cpumask, vector);

+ 245 - 0
arch/x86/kernel/genx2apic_uv_x.c

@@ -0,0 +1,245 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV APIC functions (note: not an Intel compatible APIC)
+ *
+ * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+
+DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
+EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
+
+struct uv_blade_info *uv_blade_info;
+EXPORT_SYMBOL_GPL(uv_blade_info);
+
+short *uv_node_to_blade;
+EXPORT_SYMBOL_GPL(uv_node_to_blade);
+
+short *uv_cpu_to_blade;
+EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
+
+short uv_possible_blades;
+EXPORT_SYMBOL_GPL(uv_possible_blades);
+
+/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+
+static cpumask_t uv_target_cpus(void)
+{
+	return cpumask_of_cpu(0);
+}
+
+static cpumask_t uv_vector_allocation_domain(int cpu)
+{
+	cpumask_t domain = CPU_MASK_NONE;
+	cpu_set(cpu, domain);
+	return domain;
+}
+
+int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
+{
+	unsigned long val;
+	int nasid;
+
+	nasid = uv_apicid_to_nasid(phys_apicid);
+	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+	    (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+	    (6 << UVH_IPI_INT_DELIVERY_MODE_SHFT);
+	uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
+	return 0;
+}
+
+static void uv_send_IPI_one(int cpu, int vector)
+{
+	unsigned long val, apicid;
+	int nasid;
+
+	apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
+	nasid = uv_apicid_to_nasid(apicid);
+	val =
+	    (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid <<
+					      UVH_IPI_INT_APIC_ID_SHFT) |
+	    (vector << UVH_IPI_INT_VECTOR_SHFT);
+	uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
+	printk(KERN_DEBUG
+	     "UV: IPI to cpu %d, apicid 0x%lx, vec %d, nasid%d, val 0x%lx\n",
+	     cpu, apicid, vector, nasid, val);
+}
+
+static void uv_send_IPI_mask(cpumask_t mask, int vector)
+{
+	unsigned int cpu;
+
+	for (cpu = 0; cpu < NR_CPUS; ++cpu)
+		if (cpu_isset(cpu, mask))
+			uv_send_IPI_one(cpu, vector);
+}
+
+static void uv_send_IPI_allbutself(int vector)
+{
+	cpumask_t mask = cpu_online_map;
+
+	cpu_clear(smp_processor_id(), mask);
+
+	if (!cpus_empty(mask))
+		uv_send_IPI_mask(mask, vector);
+}
+
+static void uv_send_IPI_all(int vector)
+{
+	uv_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int uv_apic_id_registered(void)
+{
+	return 1;
+}
+
+static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	cpu = first_cpu(cpumask);
+	if ((unsigned)cpu < NR_CPUS)
+		return per_cpu(x86_cpu_to_apicid, cpu);
+	else
+		return BAD_APICID;
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+	return GET_APIC_ID(read_apic_id()) >> index_msb;
+}
+
+#ifdef ZZZ		/* Needs x2apic patch */
+static void uv_send_IPI_self(int vector)
+{
+	apic_write(APIC_SELF_IPI, vector);
+}
+#endif
+
+struct genapic apic_x2apic_uv_x = {
+	.name = "UV large system",
+	.int_delivery_mode = dest_Fixed,
+	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+	.target_cpus = uv_target_cpus,
+	.vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
+	.apic_id_registered = uv_apic_id_registered,
+	.send_IPI_all = uv_send_IPI_all,
+	.send_IPI_allbutself = uv_send_IPI_allbutself,
+	.send_IPI_mask = uv_send_IPI_mask,
+	/* ZZZ.send_IPI_self = uv_send_IPI_self, */
+	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+	.phys_pkg_id = phys_pkg_id,	/* Fixme ZZZ */
+};
+
+static __cpuinit void set_x2apic_extra_bits(int nasid)
+{
+	__get_cpu_var(x2apic_extra_bits) = ((nasid >> 1) << 6);
+}
+
+/*
+ * Called on boot cpu.
+ */
+static __init void uv_system_init(void)
+{
+	union uvh_si_addr_map_config_u m_n_config;
+	int bytes, nid, cpu, lcpu, nasid, last_nasid, blade;
+	unsigned long mmr_base;
+
+	m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
+	mmr_base =
+	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
+	    ~UV_MMR_ENABLE;
+	printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
+
+	last_nasid = -1;
+	for_each_possible_cpu(cpu) {
+		nid = cpu_to_node(cpu);
+		nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
+		if (nasid != last_nasid)
+			uv_possible_blades++;
+		last_nasid = nasid;
+	}
+	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
+
+	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
+	uv_blade_info = alloc_bootmem_pages(bytes);
+
+	bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
+	uv_node_to_blade = alloc_bootmem_pages(bytes);
+	memset(uv_node_to_blade, 255, bytes);
+
+	bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
+	uv_cpu_to_blade = alloc_bootmem_pages(bytes);
+	memset(uv_cpu_to_blade, 255, bytes);
+
+	last_nasid = -1;
+	blade = -1;
+	lcpu = -1;
+	for_each_possible_cpu(cpu) {
+		nid = cpu_to_node(cpu);
+		nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
+		if (nasid != last_nasid) {
+			blade++;
+			lcpu = -1;
+			uv_blade_info[blade].nr_posible_cpus = 0;
+			uv_blade_info[blade].nr_online_cpus = 0;
+		}
+		last_nasid = nasid;
+		lcpu++;
+
+		uv_cpu_hub_info(cpu)->m_val = m_n_config.s.m_skt;
+		uv_cpu_hub_info(cpu)->n_val = m_n_config.s.n_skt;
+		uv_cpu_hub_info(cpu)->numa_blade_id = blade;
+		uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
+		uv_cpu_hub_info(cpu)->local_nasid = nasid;
+		uv_cpu_hub_info(cpu)->gnode_upper =
+		    nasid & ~((1 << uv_hub_info->n_val) - 1);
+		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
+		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+		uv_blade_info[blade].nasid = nasid;
+		uv_blade_info[blade].nr_posible_cpus++;
+		uv_node_to_blade[nid] = blade;
+		uv_cpu_to_blade[cpu] = blade;
+
+		printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, nasid %d, nid %d\n",
+		       cpu, per_cpu(x86_cpu_to_apicid, cpu), nasid, nid);
+		printk(KERN_DEBUG "UV   lcpu %d, blade %d\n", lcpu, blade);
+	}
+}
+
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ */
+void __cpuinit uv_cpu_init(void)
+{
+	if (!uv_node_to_blade)
+		uv_system_init();
+
+	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+		set_x2apic_extra_bits(uv_hub_info->local_nasid);
+}

+ 14 - 0
arch/x86/kernel/head32.c

@@ -0,0 +1,14 @@
+/*
+ *  linux/arch/i386/kernel/head32.c -- prepare to run common code
+ *
+ *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ *  Copyright (C) 2007 Eric Biederman <ebiederm@xmission.com>
+ */
+
+#include <linux/init.h>
+#include <linux/start_kernel.h>
+
+void __init i386_start_kernel(void)
+{
+	start_kernel();
+}

+ 57 - 21
arch/x86/kernel/head64.c

@@ -49,39 +49,75 @@ static void __init copy_bootdata(char *real_mode_data)
 	}
 }
 
-#define EBDA_ADDR_POINTER 0x40E
+#define BIOS_EBDA_SEGMENT 0x40E
+#define BIOS_LOWMEM_KILOBYTES 0x413
 
-static __init void reserve_ebda(void)
+/*
+ * The BIOS places the EBDA/XBDA at the top of conventional
+ * memory, and usually decreases the reported amount of
+ * conventional memory (int 0x12) too. This also contains a
+ * workaround for Dell systems that neglect to reserve EBDA.
+ * The same workaround also avoids a problem with the AMD768MPX
+ * chipset: reserve a page before VGA to prevent PCI prefetch
+ * into it (errata #56). Usually the page is reserved anyways,
+ * unless you have no PS/2 mouse plugged in.
+ */
+static void __init reserve_ebda_region(void)
 {
-	unsigned ebda_addr, ebda_size;
+	unsigned int lowmem, ebda_addr;
+
+	/* To determine the position of the EBDA and the */
+	/* end of conventional memory, we need to look at */
+	/* the BIOS data area. In a paravirtual environment */
+	/* that area is absent. We'll just have to assume */
+	/* that the paravirt case can handle memory setup */
+	/* correctly, without our help. */
+	if (paravirt_enabled())
+		return;
 
-	/*
-	 * there is a real-mode segmented pointer pointing to the
-	 * 4K EBDA area at 0x40E
-	 */
-	ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
+	/* end of low (conventional) memory */
+	lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
+	lowmem <<= 10;
+
+	/* start of EBDA area */
+	ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT);
 	ebda_addr <<= 4;
 
-	if (!ebda_addr)
-		return;
+	/* Fixup: bios puts an EBDA in the top 64K segment */
+	/* of conventional memory, but does not adjust lowmem. */
+	if ((lowmem - ebda_addr) <= 0x10000)
+		lowmem = ebda_addr;
 
-	ebda_size = *(unsigned short *)__va(ebda_addr);
+	/* Fixup: bios does not report an EBDA at all. */
+	/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
+	if ((ebda_addr == 0) && (lowmem >= 0x9f000))
+		lowmem = 0x9f000;
 
-	/* Round EBDA up to pages */
-	if (ebda_size == 0)
-		ebda_size = 1;
-	ebda_size <<= 10;
-	ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
-	if (ebda_size > 64*1024)
-		ebda_size = 64*1024;
+	/* Paranoia: should never happen, but... */
+	if ((lowmem == 0) || (lowmem >= 0x100000))
+		lowmem = 0x9f000;
 
-	reserve_early(ebda_addr, ebda_addr + ebda_size, "EBDA");
+	/* reserve all memory between lowmem and the 1MB mark */
+	reserve_early(lowmem, 0x100000, "BIOS reserved");
 }
 
 void __init x86_64_start_kernel(char * real_mode_data)
 {
 	int i;
 
+	/*
+	 * Build-time sanity checks on the kernel image and module
+	 * area mappings. (these are purely build-time and produce no code)
+	 */
+	BUILD_BUG_ON(MODULES_VADDR < KERNEL_IMAGE_START);
+	BUILD_BUG_ON(MODULES_VADDR-KERNEL_IMAGE_START < KERNEL_IMAGE_SIZE);
+	BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
+	BUILD_BUG_ON((KERNEL_IMAGE_START & ~PMD_MASK) != 0);
+	BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
+	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+				(__START_KERNEL & PGDIR_MASK)));
+
 	/* clear bss before set_intr_gate with early_idt_handler */
 	clear_bss();
 
@@ -91,7 +127,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
 	/* Cleanup the over mapped high alias */
 	cleanup_highmap();
 
-	for (i = 0; i < IDT_ENTRIES; i++) {
+	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
 #ifdef CONFIG_EARLY_PRINTK
 		set_intr_gate(i, &early_idt_handlers[i]);
 #else
@@ -118,7 +154,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
 		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
 	}
 
-	reserve_ebda();
+	reserve_ebda_region();
 
 	/*
 	 * At this point everything still needed from the boot loader

+ 1 - 1
arch/x86/kernel/head_32.S

@@ -450,7 +450,7 @@ is386:	movl $2,%ecx		# set MP
 	jmp initialize_secondary # all other CPUs call initialize_secondary
 1:
 #endif /* CONFIG_SMP */
-	jmp start_kernel
+	jmp i386_start_kernel
 
 /*
  * We depend on ET to be correct. This checks for 287/387.

+ 10 - 18
arch/x86/kernel/head_64.S

@@ -132,10 +132,6 @@ ident_complete:
 	addq	%rbp, trampoline_level4_pgt + 0(%rip)
 	addq	%rbp, trampoline_level4_pgt + (511*8)(%rip)
 #endif
-#ifdef CONFIG_ACPI_SLEEP
-	addq	%rbp, wakeup_level4_pgt + 0(%rip)
-	addq	%rbp, wakeup_level4_pgt + (511*8)(%rip)
-#endif
 
 	/* Due to ENTRY(), sometimes the empty space gets filled with
 	 * zeros. Better take a jmp than relying on empty space being
@@ -267,21 +263,16 @@ ENTRY(secondary_startup_64)
 bad_address:
 	jmp bad_address
 
+	.section ".init.text","ax"
 #ifdef CONFIG_EARLY_PRINTK
-.macro early_idt_tramp first, last
-	.ifgt \last-\first
-	early_idt_tramp \first, \last-1
-	.endif
-	movl $\last,%esi
-	jmp early_idt_handler
-.endm
-
 	.globl early_idt_handlers
 early_idt_handlers:
-	early_idt_tramp 0, 63
-	early_idt_tramp 64, 127
-	early_idt_tramp 128, 191
-	early_idt_tramp 192, 255
+	i = 0
+	.rept NUM_EXCEPTION_VECTORS
+	movl $i, %esi
+	jmp early_idt_handler
+	i = i + 1
+	.endr
 #endif
 
 ENTRY(early_idt_handler)
@@ -327,6 +318,7 @@ early_idt_msg:
 early_idt_ripmsg:
 	.asciz "RIP %s\n"
 #endif /* CONFIG_EARLY_PRINTK */
+	.previous
 
 .balign PAGE_SIZE
 
@@ -383,12 +375,12 @@ NEXT_PAGE(level2_ident_pgt)
 
 NEXT_PAGE(level2_kernel_pgt)
 	/*
-	 * 128 MB kernel mapping. We spend a full page on this pagetable
+	 * 512 MB kernel mapping. We spend a full page on this pagetable
 	 * anyway.
 	 *
 	 * The kernel code+data+bss must not be bigger than that.
 	 *
-	 * (NOTE: at +128MB starts the module area, see MODULES_VADDR.
+	 * (NOTE: at +512MB starts the module area, see MODULES_VADDR.
 	 *  If you want to increase this then increase MODULES_VADDR
 	 *  too.)
 	 */

+ 46 - 42
arch/x86/kernel/i387.c

@@ -5,45 +5,41 @@
  *  General FPU state handling cleanups
  *	Gareth Hughes <gareth@valinux.com>, May 2000
  */
-
-#include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/regset.h>
+#include <linux/sched.h>
+
+#include <asm/sigcontext.h>
 #include <asm/processor.h>
-#include <asm/i387.h>
 #include <asm/math_emu.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/ptrace.h>
 #include <asm/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/i387.h>
+#include <asm/user.h>
 
 #ifdef CONFIG_X86_64
-
-#include <asm/sigcontext32.h>
-#include <asm/user32.h>
-
+# include <asm/sigcontext32.h>
+# include <asm/user32.h>
 #else
-
-#define	save_i387_ia32		save_i387
-#define	restore_i387_ia32	restore_i387
-
-#define _fpstate_ia32 		_fpstate
-#define user_i387_ia32_struct	user_i387_struct
-#define user32_fxsr_struct	user_fxsr_struct
-
+# define save_i387_ia32		save_i387
+# define restore_i387_ia32	restore_i387
+# define _fpstate_ia32		_fpstate
+# define user_i387_ia32_struct	user_i387_struct
+# define user32_fxsr_struct	user_fxsr_struct
 #endif
 
 #ifdef CONFIG_MATH_EMULATION
-#define HAVE_HWFP (boot_cpu_data.hard_math)
+# define HAVE_HWFP		(boot_cpu_data.hard_math)
 #else
-#define HAVE_HWFP 1
+# define HAVE_HWFP		1
 #endif
 
-static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
+static unsigned int		mxcsr_feature_mask __read_mostly = 0xffffffffu;
 
 void mxcsr_feature_mask_init(void)
 {
 	unsigned long mask = 0;
+
 	clts();
 	if (cpu_has_fxsr) {
 		memset(&current->thread.i387.fxsave, 0,
@@ -69,10 +65,11 @@ void __cpuinit fpu_init(void)
 
 	if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
 		__bad_fxsave_alignment();
+
 	set_in_cr4(X86_CR4_OSFXSR);
 	set_in_cr4(X86_CR4_OSXMMEXCPT);
 
-	write_cr0(oldcr0 & ~((1UL<<3)|(1UL<<2))); /* clear TS and EM */
+	write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
 
 	mxcsr_feature_mask_init();
 	/* clean state in init */
@@ -178,6 +175,7 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
 	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
 	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
 	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+
 	return tmp;
 }
 
@@ -232,8 +230,8 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
  * FXSR floating point environment conversions.
  */
 
-static void convert_from_fxsr(struct user_i387_ia32_struct *env,
-			      struct task_struct *tsk)
+static void
+convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
 {
 	struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave;
 	struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
@@ -252,10 +250,11 @@ static void convert_from_fxsr(struct user_i387_ia32_struct *env,
 		 * should be actually ds/cs at fpu exception time, but
 		 * that information is not available in 64bit mode.
 		 */
-		asm("mov %%ds,%0" : "=r" (env->fos));
-		asm("mov %%cs,%0" : "=r" (env->fcs));
+		asm("mov %%ds, %[fos]" : [fos] "=r" (env->fos));
+		asm("mov %%cs, %[fcs]" : [fcs] "=r" (env->fcs));
 	} else {
 		struct pt_regs *regs = task_pt_regs(tsk);
+
 		env->fos = 0xffff0000 | tsk->thread.ds;
 		env->fcs = regs->cs;
 	}
@@ -309,9 +308,10 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 
 	init_fpu(target);
 
-	if (!cpu_has_fxsr)
+	if (!cpu_has_fxsr) {
 		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 					   &target->thread.i387.fsave, 0, -1);
+	}
 
 	if (kbuf && pos == 0 && count == sizeof(env)) {
 		convert_from_fxsr(kbuf, target);
@@ -319,6 +319,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 	}
 
 	convert_from_fxsr(&env, target);
+
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
 }
 
@@ -335,9 +336,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	init_fpu(target);
 	set_stopped_child_used_math(target);
 
-	if (!cpu_has_fxsr)
+	if (!cpu_has_fxsr) {
 		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 					  &target->thread.i387.fsave, 0, -1);
+	}
 
 	if (pos > 0 || count < sizeof(env))
 		convert_from_fxsr(&env, target);
@@ -392,28 +394,28 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf)
 {
 	if (!used_math())
 		return 0;
-
-	/* This will cause a "finit" to be triggered by the next
+	/*
+	 * This will cause a "finit" to be triggered by the next
 	 * attempted FPU operation by the 'current' process.
 	 */
 	clear_used_math();
 
-	if (HAVE_HWFP) {
-		if (cpu_has_fxsr) {
-			return save_i387_fxsave(buf);
-		} else {
-			return save_i387_fsave(buf);
-		}
-	} else {
+	if (!HAVE_HWFP) {
 		return fpregs_soft_get(current, NULL,
 				       0, sizeof(struct user_i387_ia32_struct),
 				       NULL, buf) ? -1 : 1;
 	}
+
+	if (cpu_has_fxsr)
+		return save_i387_fxsave(buf);
+	else
+		return save_i387_fsave(buf);
 }
 
 static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
 {
 	struct task_struct *tsk = current;
+
 	clear_fpu(tsk);
 	return __copy_from_user(&tsk->thread.i387.fsave, buf,
 				sizeof(struct i387_fsave_struct));
@@ -421,9 +423,10 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
 
 static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
 {
-	int err;
 	struct task_struct *tsk = current;
 	struct user_i387_ia32_struct env;
+	int err;
+
 	clear_fpu(tsk);
 	err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
 			       sizeof(struct i387_fxsave_struct));
@@ -432,6 +435,7 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
 	if (err || __copy_from_user(&env, buf, sizeof(env)))
 		return 1;
 	convert_to_fxsr(tsk, &env);
+
 	return 0;
 }
 
@@ -440,17 +444,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
 	int err;
 
 	if (HAVE_HWFP) {
-		if (cpu_has_fxsr) {
+		if (cpu_has_fxsr)
 			err = restore_i387_fxsave(buf);
-		} else {
+		else
 			err = restore_i387_fsave(buf);
-		}
 	} else {
 		err = fpregs_soft_set(current, NULL,
 				      0, sizeof(struct user_i387_ia32_struct),
 				      NULL, buf) != 0;
 	}
 	set_used_math();
+
 	return err;
 }
 
@@ -463,8 +467,8 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
  */
 int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
 {
-	int fpvalid;
 	struct task_struct *tsk = current;
+	int fpvalid;
 
 	fpvalid = !!used_math();
 	if (fpvalid)

+ 64 - 100
arch/x86/kernel/io_apic_32.c

@@ -71,6 +71,16 @@ int sis_apic_bug = -1;
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+int nr_ioapics;
+
+/* MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* # of MP IRQ source entries */
+int mp_irq_entries;
+
 static int disable_timer_pin_1 __initdata;
 
 /*
@@ -810,10 +820,7 @@ static int __init find_isa_irq_pin(int irq, int type)
 	for (i = 0; i < mp_irq_entries; i++) {
 		int lbus = mp_irqs[i].mpc_srcbus;
 
-		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
-		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
-		     mp_bus_id_to_type[lbus] == MP_BUS_MCA
-		    ) &&
+		if (test_bit(lbus, mp_bus_not_pci) &&
 		    (mp_irqs[i].mpc_irqtype == type) &&
 		    (mp_irqs[i].mpc_srcbusirq == irq))
 
@@ -829,10 +836,7 @@ static int __init find_isa_irq_apic(int irq, int type)
 	for (i = 0; i < mp_irq_entries; i++) {
 		int lbus = mp_irqs[i].mpc_srcbus;
 
-		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
-		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
-		     mp_bus_id_to_type[lbus] == MP_BUS_MCA
-		    ) &&
+		if (test_bit(lbus, mp_bus_not_pci) &&
 		    (mp_irqs[i].mpc_irqtype == type) &&
 		    (mp_irqs[i].mpc_srcbusirq == irq))
 			break;
@@ -872,7 +876,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 			    mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
 				break;
 
-		if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+		if (!test_bit(lbus, mp_bus_not_pci) &&
 		    !mp_irqs[i].mpc_irqtype &&
 		    (bus == lbus) &&
 		    (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
@@ -921,6 +925,7 @@ void __init setup_ioapic_dest(void)
 }
 #endif
 
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 /*
  * EISA Edge/Level control register, ELCR
  */
@@ -934,6 +939,13 @@ static int EISA_ELCR(unsigned int irq)
 			"Broken MPtable reports ISA irq %d\n", irq);
 	return 0;
 }
+#endif
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx)	(0)
+#define default_ISA_polarity(idx)	(0)
 
 /* EISA interrupts are always polarity zero and can be edge or level
  * trigger depending on the ELCR value.  If an interrupt is listed as
@@ -941,13 +953,7 @@ static int EISA_ELCR(unsigned int irq)
  * be read in from the ELCR */
 
 #define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
-#define default_EISA_polarity(idx)	(0)
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx)	(0)
-#define default_ISA_polarity(idx)	(0)
+#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
 
 /* PCI interrupts are always polarity one level triggered,
  * when listed as conforming in the MP table. */
@@ -959,7 +965,7 @@ static int EISA_ELCR(unsigned int irq)
  * when listed as conforming in the MP table. */
 
 #define default_MCA_trigger(idx)	(1)
-#define default_MCA_polarity(idx)	(0)
+#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
 
 static int MPBIOS_polarity(int idx)
 {
@@ -973,35 +979,9 @@ static int MPBIOS_polarity(int idx)
 	{
 		case 0: /* conforms, ie. bus-type dependent polarity */
 		{
-			switch (mp_bus_id_to_type[bus])
-			{
-				case MP_BUS_ISA: /* ISA pin */
-				{
-					polarity = default_ISA_polarity(idx);
-					break;
-				}
-				case MP_BUS_EISA: /* EISA pin */
-				{
-					polarity = default_EISA_polarity(idx);
-					break;
-				}
-				case MP_BUS_PCI: /* PCI pin */
-				{
-					polarity = default_PCI_polarity(idx);
-					break;
-				}
-				case MP_BUS_MCA: /* MCA pin */
-				{
-					polarity = default_MCA_polarity(idx);
-					break;
-				}
-				default:
-				{
-					printk(KERN_WARNING "broken BIOS!!\n");
-					polarity = 1;
-					break;
-				}
-			}
+			polarity = test_bit(bus, mp_bus_not_pci)?
+				default_ISA_polarity(idx):
+				default_PCI_polarity(idx);
 			break;
 		}
 		case 1: /* high active */
@@ -1042,11 +1022,15 @@ static int MPBIOS_trigger(int idx)
 	{
 		case 0: /* conforms, ie. bus-type dependent */
 		{
+			trigger = test_bit(bus, mp_bus_not_pci)?
+					default_ISA_trigger(idx):
+					default_PCI_trigger(idx);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 			switch (mp_bus_id_to_type[bus])
 			{
 				case MP_BUS_ISA: /* ISA pin */
 				{
-					trigger = default_ISA_trigger(idx);
+					/* set before the switch */
 					break;
 				}
 				case MP_BUS_EISA: /* EISA pin */
@@ -1056,7 +1040,7 @@ static int MPBIOS_trigger(int idx)
 				}
 				case MP_BUS_PCI: /* PCI pin */
 				{
-					trigger = default_PCI_trigger(idx);
+					/* set before the switch */
 					break;
 				}
 				case MP_BUS_MCA: /* MCA pin */
@@ -1071,6 +1055,7 @@ static int MPBIOS_trigger(int idx)
 					break;
 				}
 			}
+#endif
 			break;
 		}
 		case 1: /* edge */
@@ -1120,39 +1105,22 @@ static int pin_2_irq(int idx, int apic, int pin)
 	if (mp_irqs[idx].mpc_dstirq != pin)
 		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
 
-	switch (mp_bus_id_to_type[bus])
-	{
-		case MP_BUS_ISA: /* ISA pin */
-		case MP_BUS_EISA:
-		case MP_BUS_MCA:
-		{
-			irq = mp_irqs[idx].mpc_srcbusirq;
-			break;
-		}
-		case MP_BUS_PCI: /* PCI pin */
-		{
-			/*
-			 * PCI IRQs are mapped in order
-			 */
-			i = irq = 0;
-			while (i < apic)
-				irq += nr_ioapic_registers[i++];
-			irq += pin;
-
-			/*
-			 * For MPS mode, so far only needed by ES7000 platform
-			 */
-			if (ioapic_renumber_irq)
-				irq = ioapic_renumber_irq(apic, irq);
+	if (test_bit(bus, mp_bus_not_pci))
+		irq = mp_irqs[idx].mpc_srcbusirq;
+	else {
+		/*
+		 * PCI IRQs are mapped in order
+		 */
+		i = irq = 0;
+		while (i < apic)
+			irq += nr_ioapic_registers[i++];
+		irq += pin;
 
-			break;
-		}
-		default:
-		{
-			printk(KERN_ERR "unknown bus type %d.\n",bus); 
-			irq = 0;
-			break;
-		}
+		/*
+		 * For MPS mode, so far only needed by ES7000 platform
+		 */
+		if (ioapic_renumber_irq)
+			irq = ioapic_renumber_irq(apic, irq);
 	}
 
 	/*
@@ -1260,7 +1228,6 @@ static void __init setup_IO_APIC_irqs(void)
 {
 	struct IO_APIC_route_entry entry;
 	int apic, pin, idx, irq, first_notcon = 1, vector;
-	unsigned long flags;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
@@ -1326,9 +1293,7 @@ static void __init setup_IO_APIC_irqs(void)
 			if (!apic && (irq < 16))
 				disable_8259A_irq(irq);
 		}
-		spin_lock_irqsave(&ioapic_lock, flags);
-		__ioapic_write_entry(apic, pin, entry);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
+		ioapic_write_entry(apic, pin, entry);
 	}
 	}
 
@@ -1524,8 +1489,8 @@ void /*__init*/ print_local_APIC(void * dummy)
 
 	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
-	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
+	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
+			GET_APIC_ID(read_apic_id()));
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
@@ -1734,7 +1699,7 @@ void disable_IO_APIC(void)
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
 		entry.dest.physical.physical_dest =
-					GET_APIC_ID(apic_read(APIC_ID));
+					GET_APIC_ID(read_apic_id());
 
 		/*
 		 * Add it to the IO-APIC irq-routing table:
@@ -2031,8 +1996,7 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for (irq = 0; irq < NR_IRQS ; irq++) {
-		int tmp = irq;
-		if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
+		if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -2156,8 +2120,6 @@ static inline void unlock_ExtINT_logic(void)
 	ioapic_write_entry(apic, pin, entry0);
 }
 
-int timer_uses_ioapic_pin_0;
-
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
@@ -2168,10 +2130,14 @@ static inline void __init check_timer(void)
 {
 	int apic1, pin1, apic2, pin2;
 	int vector;
+	unsigned int ver;
 	unsigned long flags;
 
 	local_irq_save(flags);
 
+	ver = apic_read(APIC_LVR);
+	ver = GET_APIC_VERSION(ver);
+
 	/*
 	 * get/set the timer IRQ vector:
 	 */
@@ -2184,11 +2150,15 @@ static inline void __init check_timer(void)
 	 * mode for the 8259A whenever interrupts are routed
 	 * through I/O APICs.  Also IRQ0 has to be enabled in
 	 * the 8259A which implies the virtual wire has to be
-	 * disabled in the local APIC.
+	 * disabled in the local APIC.  Finally timer interrupts
+	 * need to be acknowledged manually in the 8259A for
+	 * timer_interrupt() and for the i82489DX when using
+	 * the NMI watchdog.
 	 */
 	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	init_8259A(1);
-	timer_ack = 1;
+	timer_ack = !cpu_has_tsc;
+	timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
 	if (timer_over_8254 > 0)
 		enable_8259A_irq(0);
 
@@ -2197,9 +2167,6 @@ static inline void __init check_timer(void)
 	pin2  = ioapic_i8259.pin;
 	apic2 = ioapic_i8259.apic;
 
-	if (pin1 == 0)
-		timer_uses_ioapic_pin_0 = 1;
-
 	printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
 		vector, apic1, pin1, apic2, pin2);
 
@@ -2789,7 +2756,6 @@ int __init io_apic_get_redir_entries (int ioapic)
 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
 {
 	struct IO_APIC_route_entry entry;
-	unsigned long flags;
 
 	if (!IO_APIC_IRQ(irq)) {
 		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
@@ -2830,9 +2796,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
 	if (!ioapic && (irq < 16))
 		disable_8259A_irq(irq);
 
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__ioapic_write_entry(ioapic, pin, entry);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	ioapic_write_entry(ioapic, pin, entry);
 
 	return 0;
 }

+ 30 - 33
arch/x86/kernel/io_apic_64.c

@@ -43,13 +43,15 @@
 #include <asm/smp.h>
 #include <asm/desc.h>
 #include <asm/proto.h>
-#include <asm/mach_apic.h>
 #include <asm/acpi.h>
 #include <asm/dma.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
 
+#include <mach_ipi.h>
+#include <mach_apic.h>
+
 struct irq_cfg {
 	cpumask_t domain;
 	cpumask_t old_domain;
@@ -101,6 +103,16 @@ DEFINE_SPINLOCK(vector_lock);
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+int nr_ioapics;
+
+/* MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* # of MP IRQ source entries */
+int mp_irq_entries;
+
 /*
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
@@ -155,11 +167,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int value)
 	writel(value, &io_apic->data);
 }
 
-static int io_apic_level_ack_pending(unsigned int irq)
+static bool io_apic_level_ack_pending(unsigned int irq)
 {
 	struct irq_pin_list *entry;
 	unsigned long flags;
-	int pending = 0;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	entry = irq_2_pin + irq;
@@ -172,13 +183,17 @@ static int io_apic_level_ack_pending(unsigned int irq)
 			break;
 		reg = io_apic_read(entry->apic, 0x10 + pin*2);
 		/* Is the remote IRR bit set? */
-		pending |= (reg >> 14) & 1;
+		if ((reg >> 14) & 1) {
+			spin_unlock_irqrestore(&ioapic_lock, flags);
+			return true;
+		}
 		if (!entry->next)
 			break;
 		entry = irq_2_pin + entry->next;
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
-	return pending;
+
+	return false;
 }
 
 /*
@@ -902,9 +917,8 @@ static void __init setup_IO_APIC_irqs(void)
 static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
 {
 	struct IO_APIC_route_entry entry;
-	unsigned long flags;
 
-	memset(&entry,0,sizeof(entry));
+	memset(&entry, 0, sizeof(entry));
 
 	disable_8259A_irq(0);
 
@@ -932,10 +946,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
 	/*
 	 * Add it to the IO-APIC irq-routing table:
 	 */
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
-	io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	ioapic_write_entry(apic, pin, entry);
 
 	enable_8259A_irq(0);
 }
@@ -1066,8 +1077,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
 
 	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
-	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
+	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
@@ -1261,7 +1271,7 @@ void disable_IO_APIC(void)
 		entry.dest_mode       = 0; /* Physical */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
-		entry.dest          = GET_APIC_ID(apic_read(APIC_ID));
+		entry.dest          = GET_APIC_ID(read_apic_id());
 
 		/*
 		 * Add it to the IO-APIC irq-routing table:
@@ -1352,9 +1362,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	cpus_clear(mask);
-	cpu_set(first_cpu(cfg->domain), mask);
-
+	mask = cpumask_of_cpu(first_cpu(cfg->domain));
 	send_IPI_mask(mask, cfg->vector);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
@@ -1517,8 +1525,7 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for (irq = 0; irq < NR_IRQS ; irq++) {
-		int tmp = irq;
-		if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) {
+		if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -1597,17 +1604,14 @@ static inline void unlock_ExtINT_logic(void)
 	int apic, pin, i;
 	struct IO_APIC_route_entry entry0, entry1;
 	unsigned char save_control, save_freq_select;
-	unsigned long flags;
 
 	pin  = find_isa_irq_pin(8, mp_INT);
 	apic = find_isa_irq_apic(8, mp_INT);
 	if (pin == -1)
 		return;
 
-	spin_lock_irqsave(&ioapic_lock, flags);
-	*(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
-	*(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	entry0 = ioapic_read_entry(apic, pin);
+
 	clear_IO_APIC_pin(apic, pin);
 
 	memset(&entry1, 0, sizeof(entry1));
@@ -1620,10 +1624,7 @@ static inline void unlock_ExtINT_logic(void)
 	entry1.trigger = 0;
 	entry1.vector = 0;
 
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
-	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	ioapic_write_entry(apic, pin, entry1);
 
 	save_control = CMOS_READ(RTC_CONTROL);
 	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
@@ -1642,10 +1643,7 @@ static inline void unlock_ExtINT_logic(void)
 	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
 	clear_IO_APIC_pin(apic, pin);
 
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
-	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	ioapic_write_entry(apic, pin, entry0);
 }
 
 /*
@@ -2314,7 +2312,6 @@ static struct resource * __init ioapic_setup_resources(void)
 	res = (void *)mem;
 
 	if (mem != NULL) {
-		memset(mem, 0, n);
 		mem += sizeof(struct resource) * nr_ioapics;
 
 		for (i = 0; i < nr_ioapics; i++) {

+ 178 - 0
arch/x86/kernel/ipi.c

@@ -0,0 +1,178 @@
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/apic.h>
+#include <asm/proto.h>
+
+#ifdef CONFIG_X86_32
+#include <mach_apic.h>
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
+
+static inline int __prepare_ICR(unsigned int shortcut, int vector)
+{
+	unsigned int icr = shortcut | APIC_DEST_LOGICAL;
+
+	switch (vector) {
+	default:
+		icr |= APIC_DM_FIXED | vector;
+		break;
+	case NMI_VECTOR:
+		icr |= APIC_DM_NMI;
+		break;
+	}
+	return icr;
+}
+
+static inline int __prepare_ICR2(unsigned int mask)
+{
+	return SET_APIC_DEST_FIELD(mask);
+}
+
+void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+	/*
+	 * Subtle. In the case of the 'never do double writes' workaround
+	 * we have to lock out interrupts to be safe.  As we don't care
+	 * of the value read we use an atomic rmw access to avoid costly
+	 * cli/sti.  Otherwise we use an even cheaper single atomic write
+	 * to the APIC.
+	 */
+	unsigned int cfg;
+
+	/*
+	 * Wait for idle.
+	 */
+	apic_wait_icr_idle();
+
+	/*
+	 * No need to touch the target chip field
+	 */
+	cfg = __prepare_ICR(shortcut, vector);
+
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	apic_write_around(APIC_ICR, cfg);
+}
+
+void send_IPI_self(int vector)
+{
+	__send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+/*
+ * This is used to send an IPI with no shorthand notation (the destination is
+ * specified in bits 56 to 63 of the ICR).
+ */
+static inline void __send_IPI_dest_field(unsigned long mask, int vector)
+{
+	unsigned long cfg;
+
+	/*
+	 * Wait for idle.
+	 */
+	if (unlikely(vector == NMI_VECTOR))
+		safe_apic_wait_icr_idle();
+	else
+		apic_wait_icr_idle();
+
+	/*
+	 * prepare target chip field
+	 */
+	cfg = __prepare_ICR2(mask);
+	apic_write_around(APIC_ICR2, cfg);
+
+	/*
+	 * program the ICR
+	 */
+	cfg = __prepare_ICR(0, vector);
+
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	apic_write_around(APIC_ICR, cfg);
+}
+
+/*
+ * This is only used on smaller machines.
+ */
+void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+{
+	unsigned long mask = cpus_addr(cpumask)[0];
+	unsigned long flags;
+
+	local_irq_save(flags);
+	WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+	__send_IPI_dest_field(mask, vector);
+	local_irq_restore(flags);
+}
+
+void send_IPI_mask_sequence(cpumask_t mask, int vector)
+{
+	unsigned long flags;
+	unsigned int query_cpu;
+
+	/*
+	 * Hack. The clustered APIC addressing mode doesn't allow us to send
+	 * to an arbitrary mask, so I do a unicasts to each CPU instead. This
+	 * should be modified to do 1 message per cluster ID - mbligh
+	 */
+
+	local_irq_save(flags);
+	for_each_possible_cpu(query_cpu) {
+		if (cpu_isset(query_cpu, mask)) {
+			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
+					      vector);
+		}
+	}
+	local_irq_restore(flags);
+}
+
+/* must come after the send_IPI functions above for inlining */
+#include <mach_ipi.h>
+static int convert_apicid_to_cpu(int apic_id)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
+			return i;
+	}
+	return -1;
+}
+
+int safe_smp_processor_id(void)
+{
+	int apicid, cpuid;
+
+	if (!boot_cpu_has(X86_FEATURE_APIC))
+		return 0;
+
+	apicid = hard_smp_processor_id();
+	if (apicid == BAD_APICID)
+		return 0;
+
+	cpuid = convert_apicid_to_cpu(apicid);
+
+	return cpuid >= 0 ? cpuid : 0;
+}
+#endif

+ 1 - 1
arch/x86/kernel/irq_32.c

@@ -79,7 +79,7 @@ unsigned int do_IRQ(struct pt_regs *regs)
 
 	if (unlikely((unsigned)irq >= NR_IRQS)) {
 		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
-					__FUNCTION__, irq);
+					__func__, irq);
 		BUG();
 	}
 

+ 7 - 7
arch/x86/kernel/kprobes.c

@@ -410,13 +410,13 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 static void __kprobes clear_btf(void)
 {
 	if (test_thread_flag(TIF_DEBUGCTLMSR))
-		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+		update_debugctlmsr(0);
 }
 
 static void __kprobes restore_btf(void)
 {
 	if (test_thread_flag(TIF_DEBUGCTLMSR))
-		wrmsrl(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr);
+		update_debugctlmsr(current->thread.debugctlmsr);
 }
 
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
@@ -489,7 +489,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 		break;
 	case KPROBE_HIT_SS:
 		if (p == kprobe_running()) {
-			regs->flags &= ~TF_MASK;
+			regs->flags &= ~X86_EFLAGS_TF;
 			regs->flags |= kcb->kprobe_saved_flags;
 			return 0;
 		} else {
@@ -858,15 +858,15 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
 	if (!cur)
 		return 0;
 
+	resume_execution(cur, regs, kcb);
+	regs->flags |= kcb->kprobe_saved_flags;
+	trace_hardirqs_fixup_flags(regs->flags);
+
 	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
 		kcb->kprobe_status = KPROBE_HIT_SSDONE;
 		cur->post_handler(cur, regs, 0);
 	}
 
-	resume_execution(cur, regs, kcb);
-	regs->flags |= kcb->kprobe_saved_flags;
-	trace_hardirqs_fixup_flags(regs->flags);
-
 	/* Restore back the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
 		restore_previous_kprobe(kcb);

+ 53 - 43
arch/x86/kernel/mca_32.c

@@ -53,9 +53,9 @@
 #include <linux/init.h>
 #include <asm/arch_hooks.h>
 
-static unsigned char which_scsi = 0;
+static unsigned char which_scsi;
 
-int MCA_bus = 0;
+int MCA_bus;
 EXPORT_SYMBOL(MCA_bus);
 
 /*
@@ -68,15 +68,17 @@ static DEFINE_SPINLOCK(mca_lock);
 
 /* Build the status info for the adapter */
 
-static void mca_configure_adapter_status(struct mca_device *mca_dev) {
+static void mca_configure_adapter_status(struct mca_device *mca_dev)
+{
 	mca_dev->status = MCA_ADAPTER_NONE;
 
 	mca_dev->pos_id = mca_dev->pos[0]
 		+ (mca_dev->pos[1] << 8);
 
-	if(!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) {
+	if (!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) {
 
-		/* id = 0x0000 usually indicates hardware failure,
+		/*
+		 * id = 0x0000 usually indicates hardware failure,
 		 * however, ZP Gu (zpg@castle.net> reports that his 9556
 		 * has 0x0000 as id and everything still works. There
 		 * also seem to be an adapter with id = 0x0000; the
@@ -87,9 +89,10 @@ static void mca_configure_adapter_status(struct mca_device *mca_dev) {
 		mca_dev->status = MCA_ADAPTER_ERROR;
 
 		return;
-	} else if(mca_dev->pos_id != 0xffff) {
+	} else if (mca_dev->pos_id != 0xffff) {
 
-		/* 0xffff usually indicates that there's no adapter,
+		/*
+		 * 0xffff usually indicates that there's no adapter,
 		 * however, some integrated adapters may have 0xffff as
 		 * their id and still be valid. Examples are on-board
 		 * VGA of the 55sx, the integrated SCSI of the 56 & 57,
@@ -99,19 +102,19 @@ static void mca_configure_adapter_status(struct mca_device *mca_dev) {
 		mca_dev->status = MCA_ADAPTER_NORMAL;
 	}
 
-	if((mca_dev->pos_id == 0xffff ||
+	if ((mca_dev->pos_id == 0xffff ||
 	    mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) {
 		int j;
 
-		for(j = 2; j < 8; j++) {
-			if(mca_dev->pos[j] != 0xff) {
+		for (j = 2; j < 8; j++) {
+			if (mca_dev->pos[j] != 0xff) {
 				mca_dev->status = MCA_ADAPTER_NORMAL;
 				break;
 			}
 		}
 	}
 
-	if(!(mca_dev->pos[2] & MCA_ENABLED)) {
+	if (!(mca_dev->pos[2] & MCA_ENABLED)) {
 
 		/* enabled bit is in POS 2 */
 
@@ -133,7 +136,7 @@ static struct resource mca_standard_resources[] = {
 
 #define MCA_STANDARD_RESOURCES	ARRAY_SIZE(mca_standard_resources)
 
-/**
+/*
  *	mca_read_and_store_pos - read the POS registers into a memory buffer
  *      @pos: a char pointer to 8 bytes, contains the POS register value on
  *            successful return
@@ -141,12 +144,14 @@ static struct resource mca_standard_resources[] = {
  *	Returns 1 if a card actually exists (i.e. the pos isn't
  *	all 0xff) or 0 otherwise
  */
-static int mca_read_and_store_pos(unsigned char *pos) {
+static int mca_read_and_store_pos(unsigned char *pos)
+{
 	int j;
 	int found = 0;
 
-	for(j=0; j<8; j++) {
-		if((pos[j] = inb_p(MCA_POS_REG(j))) != 0xff) {
+	for (j = 0; j < 8; j++) {
+		pos[j] = inb_p(MCA_POS_REG(j));
+		if (pos[j] != 0xff) {
 			/* 0xff all across means no device. 0x00 means
 			 * something's broken, but a device is
 			 * probably there.  However, if you get 0x00
@@ -167,11 +172,11 @@ static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg)
 	unsigned char byte;
 	unsigned long flags;
 
-	if(reg < 0 || reg >= 8)
+	if (reg < 0 || reg >= 8)
 		return 0;
 
 	spin_lock_irqsave(&mca_lock, flags);
-	if(mca_dev->pos_register) {
+	if (mca_dev->pos_register) {
 		/* Disable adapter setup, enable motherboard setup */
 
 		outb_p(0, MCA_ADAPTER_SETUP_REG);
@@ -203,7 +208,7 @@ static void mca_pc_write_pos(struct mca_device *mca_dev, int reg,
 {
 	unsigned long flags;
 
-	if(reg < 0 || reg >= 8)
+	if (reg < 0 || reg >= 8)
 		return;
 
 	spin_lock_irqsave(&mca_lock, flags);
@@ -227,17 +232,17 @@ static void mca_pc_write_pos(struct mca_device *mca_dev, int reg,
 }
 
 /* for the primary MCA bus, we have identity transforms */
-static int mca_dummy_transform_irq(struct mca_device * mca_dev, int irq)
+static int mca_dummy_transform_irq(struct mca_device *mca_dev, int irq)
 {
 	return irq;
 }
 
-static int mca_dummy_transform_ioport(struct mca_device * mca_dev, int port)
+static int mca_dummy_transform_ioport(struct mca_device *mca_dev, int port)
 {
 	return port;
 }
 
-static void *mca_dummy_transform_memory(struct mca_device * mca_dev, void *mem)
+static void *mca_dummy_transform_memory(struct mca_device *mca_dev, void *mem)
 {
 	return mem;
 }
@@ -251,7 +256,8 @@ static int __init mca_init(void)
 	short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00};
 	struct mca_bus *bus;
 
-	/* WARNING: Be careful when making changes here. Putting an adapter
+	/*
+	 * WARNING: Be careful when making changes here. Putting an adapter
 	 * and the motherboard simultaneously into setup mode may result in
 	 * damage to chips (according to The Indispensible PC Hardware Book
 	 * by Hans-Peter Messmer). Also, we disable system interrupts (so
@@ -283,7 +289,7 @@ static int __init mca_init(void)
 
 	/* get the motherboard device */
 	mca_dev = kzalloc(sizeof(struct mca_device), GFP_KERNEL);
-	if(unlikely(!mca_dev))
+	if (unlikely(!mca_dev))
 		goto out_nomem;
 
 	/*
@@ -309,7 +315,7 @@ static int __init mca_init(void)
 	mca_register_device(MCA_PRIMARY_BUS, mca_dev);
 
 	mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
-	if(unlikely(!mca_dev))
+	if (unlikely(!mca_dev))
 		goto out_unlock_nomem;
 
 	/* Put motherboard into video setup mode, read integrated video
@@ -326,7 +332,8 @@ static int __init mca_init(void)
 	mca_dev->slot = MCA_INTEGVIDEO;
 	mca_register_device(MCA_PRIMARY_BUS, mca_dev);
 
-	/* Put motherboard into scsi setup mode, read integrated scsi
+	/*
+	 * Put motherboard into scsi setup mode, read integrated scsi
 	 * POS registers, and turn motherboard setup off.
 	 *
 	 * It seems there are two possible SCSI registers. Martin says that
@@ -338,18 +345,18 @@ static int __init mca_init(void)
 	 * machine.
 	 */
 
-	for(i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) {
+	for (i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) {
 		outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG);
-		if(mca_read_and_store_pos(pos))
+		if (mca_read_and_store_pos(pos))
 			break;
 	}
-	if(which_scsi) {
+	if (which_scsi) {
 		/* found a scsi card */
 		mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
-		if(unlikely(!mca_dev))
+		if (unlikely(!mca_dev))
 			goto out_unlock_nomem;
 
-		for(j = 0; j < 8; j++)
+		for (j = 0; j < 8; j++)
 			mca_dev->pos[j] = pos[j];
 
 		mca_configure_adapter_status(mca_dev);
@@ -364,21 +371,22 @@ static int __init mca_init(void)
 
 	outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
 
-	/* Now loop over MCA slots: put each adapter into setup mode, and
+	/*
+	 * Now loop over MCA slots: put each adapter into setup mode, and
 	 * read its POS registers. Then put adapter setup off.
 	 */
 
-	for(i=0; i<MCA_MAX_SLOT_NR; i++) {
+	for (i = 0; i < MCA_MAX_SLOT_NR; i++) {
 		outb_p(0x8|(i&0xf), MCA_ADAPTER_SETUP_REG);
-		if(!mca_read_and_store_pos(pos))
+		if (!mca_read_and_store_pos(pos))
 			continue;
 
 		mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
-		if(unlikely(!mca_dev))
+		if (unlikely(!mca_dev))
 			goto out_unlock_nomem;
 
-		for(j=0; j<8; j++)
-			mca_dev->pos[j]=pos[j];
+		for (j = 0; j < 8; j++)
+			mca_dev->pos[j] = pos[j];
 
 		mca_dev->driver_loaded = 0;
 		mca_dev->slot = i;
@@ -414,20 +422,20 @@ mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
 {
 	int slot = mca_dev->slot;
 
-	if(slot == MCA_INTEGSCSI) {
+	if (slot == MCA_INTEGSCSI) {
 		printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n",
 			mca_dev->name);
-	} else if(slot == MCA_INTEGVIDEO) {
+	} else if (slot == MCA_INTEGVIDEO) {
 		printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n",
 			mca_dev->name);
-	} else if(slot == MCA_MOTHERBOARD) {
+	} else if (slot == MCA_MOTHERBOARD) {
 		printk(KERN_CRIT "NMI: caused by motherboard (%s)\n",
 			mca_dev->name);
 	}
 
 	/* More info available in POS 6 and 7? */
 
-	if(check_flag) {
+	if (check_flag) {
 		unsigned char pos6, pos7;
 
 		pos6 = mca_device_read_pos(mca_dev, 6);
@@ -447,8 +455,9 @@ static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data)
 
 	pos5 = mca_device_read_pos(mca_dev, 5);
 
-	if(!(pos5 & 0x80)) {
-		/* Bit 7 of POS 5 is reset when this adapter has a hardware
+	if (!(pos5 & 0x80)) {
+		/*
+		 *  Bit 7 of POS 5 is reset when this adapter has a hardware
 		 * error. Bit 7 it reset if there's error information
 		 * available in POS 6 and 7.
 		 */
@@ -460,7 +469,8 @@ static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data)
 
 void __kprobes mca_handle_nmi(void)
 {
-	/* First try - scan the various adapters and see if a specific
+	/*
+	 *  First try - scan the various adapters and see if a specific
 	 * adapter was responsible for the error.
 	 */
 	bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback);

+ 8 - 8
arch/x86/kernel/microcode.c

@@ -290,7 +290,7 @@ static int get_maching_microcode(void *mc, int cpu)
 	}
 	return 0;
 find:
-	pr_debug("microcode: CPU %d found a matching microcode update with"
+	pr_debug("microcode: CPU%d found a matching microcode update with"
 		" version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev);
 	new_mc = vmalloc(total_size);
 	if (!new_mc) {
@@ -336,11 +336,11 @@ static void apply_microcode(int cpu)
 
 	spin_unlock_irqrestore(&microcode_update_lock, flags);
 	if (val[1] != uci->mc->hdr.rev) {
-		printk(KERN_ERR "microcode: CPU%d updated from revision "
+		printk(KERN_ERR "microcode: CPU%d update from revision "
 			"0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]);
 		return;
 	}
-	pr_debug("microcode: CPU%d updated from revision "
+	printk(KERN_INFO "microcode: CPU%d updated from revision "
 	       "0x%x to 0x%x, date = %08x \n", 
 	       cpu_num, uci->rev, val[1], uci->mc->hdr.date);
 	uci->rev = val[1];
@@ -534,7 +534,7 @@ static int cpu_request_microcode(int cpu)
 		c->x86, c->x86_model, c->x86_mask);
 	error = request_firmware(&firmware, name, &microcode_pdev->dev);
 	if (error) {
-		pr_debug("ucode data file %s load failed\n", name);
+		pr_debug("microcode: ucode data file %s load failed\n", name);
 		return error;
 	}
 	buf = firmware->data;
@@ -709,7 +709,7 @@ static int __mc_sysdev_add(struct sys_device *sys_dev, int resume)
 	if (!cpu_online(cpu))
 		return 0;
 
-	pr_debug("Microcode:CPU %d added\n", cpu);
+	pr_debug("microcode: CPU%d added\n", cpu);
 	memset(uci, 0, sizeof(*uci));
 
 	err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
@@ -733,7 +733,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
 	if (!cpu_online(cpu))
 		return 0;
 
-	pr_debug("Microcode:CPU %d removed\n", cpu);
+	pr_debug("microcode: CPU%d removed\n", cpu);
 	microcode_fini_cpu(cpu);
 	sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
 	return 0;
@@ -745,7 +745,7 @@ static int mc_sysdev_resume(struct sys_device *dev)
 
 	if (!cpu_online(cpu))
 		return 0;
-	pr_debug("Microcode:CPU %d resumed\n", cpu);
+	pr_debug("microcode: CPU%d resumed\n", cpu);
 	/* only CPU 0 will apply ucode here */
 	apply_microcode(0);
 	return 0;
@@ -783,7 +783,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 		}
 	case CPU_DOWN_FAILED_FROZEN:
 		if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
-			printk(KERN_ERR "Microcode: Failed to create the sysfs "
+			printk(KERN_ERR "microcode: Failed to create the sysfs "
 				"group for CPU%d\n", cpu);
 		break;
 	case CPU_DOWN_PREPARE:

Разлика између датотеке није приказан због своје велике величине
+ 353 - 405
arch/x86/kernel/mpparse.c


+ 0 - 867
arch/x86/kernel/mpparse_64.c

@@ -1,867 +0,0 @@
-/*
- *	Intel Multiprocessor Specification 1.1 and 1.4
- *	compliant MP-table parsing routines.
- *
- *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *	Fixes
- *		Erich Boleyn	:	MP v1.4 and additional changes.
- *		Alan Cox	:	Added EBDA scanning
- *		Ingo Molnar	:	various cleanups and rewrites
- *		Maciej W. Rozycki:	Bits for default MP configurations
- *		Paul Diefenbaugh:	Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/acpi.h>
-#include <linux/module.h>
-
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/pgalloc.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/acpi.h>
-
-/* Have we found an MP table */
-int smp_found_config;
-
-/*
- * Various Linux-internal data structures created from the
- * MP-table.
- */
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
-
-static int mp_current_pci_id = 0;
-/* I/O APIC entries */
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* MP IRQ source entries */
-int mp_irq_entries;
-
-int nr_ioapics;
-unsigned long mp_lapic_addr = 0;
-
-
-
-/* Processor that is doing the boot up */
-unsigned int boot_cpu_id = -1U;
-EXPORT_SYMBOL(boot_cpu_id);
-
-/* Internal processor count */
-unsigned int num_processors;
-
-unsigned disabled_cpus __cpuinitdata;
-
-/* Bitmask of physically existing CPUs */
-physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
-
-u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
-				= { [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_bios_cpu_apicid_early_ptr;
-DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
-EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
-
-
-/*
- * Intel MP BIOS table parsing routines:
- */
-
-/*
- * Checksum an MP configuration block.
- */
-
-static int __init mpf_checksum(unsigned char *mp, int len)
-{
-	int sum = 0;
-
-	while (len--)
-		sum += *mp++;
-
-	return sum & 0xFF;
-}
-
-static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
-{
-	int cpu;
-	cpumask_t tmp_map;
-	char *bootup_cpu = "";
-
-	if (!(m->mpc_cpuflag & CPU_ENABLED)) {
-		disabled_cpus++;
-		return;
-	}
-	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
-		bootup_cpu = " (Bootup-CPU)";
-		boot_cpu_id = m->mpc_apicid;
-	}
-
-	printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
-
-	if (num_processors >= NR_CPUS) {
-		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-			" Processor ignored.\n", NR_CPUS);
-		return;
-	}
-
-	num_processors++;
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
-
-	physid_set(m->mpc_apicid, phys_cpu_present_map);
- 	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
- 		/*
-		 * x86_bios_cpu_apicid is required to have processors listed
- 		 * in same order as logical cpu numbers. Hence the first
- 		 * entry is BSP, and so on.
- 		 */
-		cpu = 0;
- 	}
-	/* are we being called early in kernel startup? */
-	if (x86_cpu_to_apicid_early_ptr) {
-		u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
-		u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
-
-		cpu_to_apicid[cpu] = m->mpc_apicid;
-		bios_cpu_apicid[cpu] = m->mpc_apicid;
-	} else {
-		per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
-		per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid;
-	}
-
-	cpu_set(cpu, cpu_possible_map);
-	cpu_set(cpu, cpu_present_map);
-}
-
-static void __init MP_bus_info (struct mpc_config_bus *m)
-{
-	char str[7];
-
-	memcpy(str, m->mpc_bustype, 6);
-	str[6] = 0;
-	Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
-
-	if (strncmp(str, "ISA", 3) == 0) {
-		set_bit(m->mpc_busid, mp_bus_not_pci);
-	} else if (strncmp(str, "PCI", 3) == 0) {
-		clear_bit(m->mpc_busid, mp_bus_not_pci);
-		mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
-		mp_current_pci_id++;
-	} else {
-		printk(KERN_ERR "Unknown bustype %s\n", str);
-	}
-}
-
-static int bad_ioapic(unsigned long address)
-{
-	if (nr_ioapics >= MAX_IO_APICS) {
-		printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
-			"(found %d)\n", MAX_IO_APICS, nr_ioapics);
-		panic("Recompile kernel with bigger MAX_IO_APICS!\n");
-	}
-	if (!address) {
-		printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
-			" found in table, skipping!\n");
-		return 1;
-	}
-	return 0;
-}
-
-static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
-{
-	if (!(m->mpc_flags & MPC_APIC_USABLE))
-		return;
-
-	printk("I/O APIC #%d at 0x%X.\n",
-		m->mpc_apicid, m->mpc_apicaddr);
-
-	if (bad_ioapic(m->mpc_apicaddr))
-		return;
-
-	mp_ioapics[nr_ioapics] = *m;
-	nr_ioapics++;
-}
-
-static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
-{
-	mp_irqs [mp_irq_entries] = *m;
-	Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
-		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
-			m->mpc_irqtype, m->mpc_irqflag & 3,
-			(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
-			m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
-	if (++mp_irq_entries >= MAX_IRQ_SOURCES)
-		panic("Max # of irq sources exceeded!!\n");
-}
-
-static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
-{
-	Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
-		" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
-			m->mpc_irqtype, m->mpc_irqflag & 3,
-			(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
-			m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
-}
-
-/*
- * Read/parse the MPC
- */
-
-static int __init smp_read_mpc(struct mp_config_table *mpc)
-{
-	char str[16];
-	int count=sizeof(*mpc);
-	unsigned char *mpt=((unsigned char *)mpc)+count;
-
-	if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
-		printk("MPTABLE: bad signature [%c%c%c%c]!\n",
-			mpc->mpc_signature[0],
-			mpc->mpc_signature[1],
-			mpc->mpc_signature[2],
-			mpc->mpc_signature[3]);
-		return 0;
-	}
-	if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
-		printk("MPTABLE: checksum error!\n");
-		return 0;
-	}
-	if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
-		printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
-			mpc->mpc_spec);
-		return 0;
-	}
-	if (!mpc->mpc_lapic) {
-		printk(KERN_ERR "MPTABLE: null local APIC address!\n");
-		return 0;
-	}
-	memcpy(str,mpc->mpc_oem,8);
-	str[8] = 0;
-	printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
-
-	memcpy(str,mpc->mpc_productid,12);
-	str[12] = 0;
-	printk("MPTABLE: Product ID: %s ",str);
-
-	printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
-
-	/* save the local APIC address, it might be non-default */
-	if (!acpi_lapic)
-		mp_lapic_addr = mpc->mpc_lapic;
-
-	/*
-	 *	Now process the configuration blocks.
-	 */
-	while (count < mpc->mpc_length) {
-		switch(*mpt) {
-			case MP_PROCESSOR:
-			{
-				struct mpc_config_processor *m=
-					(struct mpc_config_processor *)mpt;
-				if (!acpi_lapic)
-					MP_processor_info(m);
-				mpt += sizeof(*m);
-				count += sizeof(*m);
-				break;
-			}
-			case MP_BUS:
-			{
-				struct mpc_config_bus *m=
-					(struct mpc_config_bus *)mpt;
-				MP_bus_info(m);
-				mpt += sizeof(*m);
-				count += sizeof(*m);
-				break;
-			}
-			case MP_IOAPIC:
-			{
-				struct mpc_config_ioapic *m=
-					(struct mpc_config_ioapic *)mpt;
-				MP_ioapic_info(m);
-				mpt += sizeof(*m);
-				count += sizeof(*m);
-				break;
-			}
-			case MP_INTSRC:
-			{
-				struct mpc_config_intsrc *m=
-					(struct mpc_config_intsrc *)mpt;
-
-				MP_intsrc_info(m);
-				mpt += sizeof(*m);
-				count += sizeof(*m);
-				break;
-			}
-			case MP_LINTSRC:
-			{
-				struct mpc_config_lintsrc *m=
-					(struct mpc_config_lintsrc *)mpt;
-				MP_lintsrc_info(m);
-				mpt += sizeof(*m);
-				count += sizeof(*m);
-				break;
-			}
-		}
-	}
-	setup_apic_routing();
-	if (!num_processors)
-		printk(KERN_ERR "MPTABLE: no processors registered!\n");
-	return num_processors;
-}
-
-static int __init ELCR_trigger(unsigned int irq)
-{
-	unsigned int port;
-
-	port = 0x4d0 + (irq >> 3);
-	return (inb(port) >> (irq & 7)) & 1;
-}
-
-static void __init construct_default_ioirq_mptable(int mpc_default_type)
-{
-	struct mpc_config_intsrc intsrc;
-	int i;
-	int ELCR_fallback = 0;
-
-	intsrc.mpc_type = MP_INTSRC;
-	intsrc.mpc_irqflag = 0;			/* conforming */
-	intsrc.mpc_srcbus = 0;
-	intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
-
-	intsrc.mpc_irqtype = mp_INT;
-
-	/*
-	 *  If true, we have an ISA/PCI system with no IRQ entries
-	 *  in the MP table. To prevent the PCI interrupts from being set up
-	 *  incorrectly, we try to use the ELCR. The sanity check to see if
-	 *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
-	 *  never be level sensitive, so we simply see if the ELCR agrees.
-	 *  If it does, we assume it's valid.
-	 */
-	if (mpc_default_type == 5) {
-		printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
-
-		if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
-			printk(KERN_ERR "ELCR contains invalid data... not using ELCR\n");
-		else {
-			printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
-			ELCR_fallback = 1;
-		}
-	}
-
-	for (i = 0; i < 16; i++) {
-		switch (mpc_default_type) {
-		case 2:
-			if (i == 0 || i == 13)
-				continue;	/* IRQ0 & IRQ13 not connected */
-			/* fall through */
-		default:
-			if (i == 2)
-				continue;	/* IRQ2 is never connected */
-		}
-
-		if (ELCR_fallback) {
-			/*
-			 *  If the ELCR indicates a level-sensitive interrupt, we
-			 *  copy that information over to the MP table in the
-			 *  irqflag field (level sensitive, active high polarity).
-			 */
-			if (ELCR_trigger(i))
-				intsrc.mpc_irqflag = 13;
-			else
-				intsrc.mpc_irqflag = 0;
-		}
-
-		intsrc.mpc_srcbusirq = i;
-		intsrc.mpc_dstirq = i ? i : 2;		/* IRQ0 to INTIN2 */
-		MP_intsrc_info(&intsrc);
-	}
-
-	intsrc.mpc_irqtype = mp_ExtINT;
-	intsrc.mpc_srcbusirq = 0;
-	intsrc.mpc_dstirq = 0;				/* 8259A to INTIN0 */
-	MP_intsrc_info(&intsrc);
-}
-
-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
-{
-	struct mpc_config_processor processor;
-	struct mpc_config_bus bus;
-	struct mpc_config_ioapic ioapic;
-	struct mpc_config_lintsrc lintsrc;
-	int linttypes[2] = { mp_ExtINT, mp_NMI };
-	int i;
-
-	/*
-	 * local APIC has default address
-	 */
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
-	/*
-	 * 2 CPUs, numbered 0 & 1.
-	 */
-	processor.mpc_type = MP_PROCESSOR;
-	processor.mpc_apicver = 0;
-	processor.mpc_cpuflag = CPU_ENABLED;
-	processor.mpc_cpufeature = 0;
-	processor.mpc_featureflag = 0;
-	processor.mpc_reserved[0] = 0;
-	processor.mpc_reserved[1] = 0;
-	for (i = 0; i < 2; i++) {
-		processor.mpc_apicid = i;
-		MP_processor_info(&processor);
-	}
-
-	bus.mpc_type = MP_BUS;
-	bus.mpc_busid = 0;
-	switch (mpc_default_type) {
-		default:
-			printk(KERN_ERR "???\nUnknown standard configuration %d\n",
-				mpc_default_type);
-			/* fall through */
-		case 1:
-		case 5:
-			memcpy(bus.mpc_bustype, "ISA   ", 6);
-			break;
-	}
-	MP_bus_info(&bus);
-	if (mpc_default_type > 4) {
-		bus.mpc_busid = 1;
-		memcpy(bus.mpc_bustype, "PCI   ", 6);
-		MP_bus_info(&bus);
-	}
-
-	ioapic.mpc_type = MP_IOAPIC;
-	ioapic.mpc_apicid = 2;
-	ioapic.mpc_apicver = 0;
-	ioapic.mpc_flags = MPC_APIC_USABLE;
-	ioapic.mpc_apicaddr = 0xFEC00000;
-	MP_ioapic_info(&ioapic);
-
-	/*
-	 * We set up most of the low 16 IO-APIC pins according to MPS rules.
-	 */
-	construct_default_ioirq_mptable(mpc_default_type);
-
-	lintsrc.mpc_type = MP_LINTSRC;
-	lintsrc.mpc_irqflag = 0;		/* conforming */
-	lintsrc.mpc_srcbusid = 0;
-	lintsrc.mpc_srcbusirq = 0;
-	lintsrc.mpc_destapic = MP_APIC_ALL;
-	for (i = 0; i < 2; i++) {
-		lintsrc.mpc_irqtype = linttypes[i];
-		lintsrc.mpc_destapiclint = i;
-		MP_lintsrc_info(&lintsrc);
-	}
-}
-
-static struct intel_mp_floating *mpf_found;
-
-/*
- * Scan the memory blocks for an SMP configuration block.
- */
-void __init get_smp_config (void)
-{
-	struct intel_mp_floating *mpf = mpf_found;
-
-	/*
- 	 * ACPI supports both logical (e.g. Hyper-Threading) and physical 
- 	 * processors, where MPS only supports physical.
- 	 */
- 	if (acpi_lapic && acpi_ioapic) {
- 		printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
- 		return;
-	}
- 	else if (acpi_lapic)
- 		printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
-
-	printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
-
-	/*
-	 * Now see if we need to read further.
-	 */
-	if (mpf->mpf_feature1 != 0) {
-
-		printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
-		construct_default_ISA_mptable(mpf->mpf_feature1);
-
-	} else if (mpf->mpf_physptr) {
-
-		/*
-		 * Read the physical hardware table.  Anything here will
-		 * override the defaults.
-		 */
-		if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) {
-			smp_found_config = 0;
-			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
-			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
-			return;
-		}
-		/*
-		 * If there are no explicit MP IRQ entries, then we are
-		 * broken.  We set up most of the low 16 IO-APIC pins to
-		 * ISA defaults and hope it will work.
-		 */
-		if (!mp_irq_entries) {
-			struct mpc_config_bus bus;
-
-			printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
-
-			bus.mpc_type = MP_BUS;
-			bus.mpc_busid = 0;
-			memcpy(bus.mpc_bustype, "ISA   ", 6);
-			MP_bus_info(&bus);
-
-			construct_default_ioirq_mptable(0);
-		}
-
-	} else
-		BUG();
-
-	printk(KERN_INFO "Processors: %d\n", num_processors);
-	/*
-	 * Only use the first configuration found.
-	 */
-}
-
-static int __init smp_scan_config (unsigned long base, unsigned long length)
-{
-	extern void __bad_mpf_size(void); 
-	unsigned int *bp = phys_to_virt(base);
-	struct intel_mp_floating *mpf;
-
-	Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
-	if (sizeof(*mpf) != 16)
-		__bad_mpf_size();
-
-	while (length > 0) {
-		mpf = (struct intel_mp_floating *)bp;
-		if ((*bp == SMP_MAGIC_IDENT) &&
-			(mpf->mpf_length == 1) &&
-			!mpf_checksum((unsigned char *)bp, 16) &&
-			((mpf->mpf_specification == 1)
-				|| (mpf->mpf_specification == 4)) ) {
-
-			smp_found_config = 1;
-			reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
-			if (mpf->mpf_physptr)
-				reserve_bootmem_generic(mpf->mpf_physptr, PAGE_SIZE);
-			mpf_found = mpf;
-			return 1;
-		}
-		bp += 4;
-		length -= 16;
-	}
-	return 0;
-}
-
-void __init find_smp_config(void)
-{
-	unsigned int address;
-
-	/*
-	 * FIXME: Linux assumes you have 640K of base ram..
-	 * this continues the error...
-	 *
-	 * 1) Scan the bottom 1K for a signature
-	 * 2) Scan the top 1K of base RAM
-	 * 3) Scan the 64K of bios
-	 */
-	if (smp_scan_config(0x0,0x400) ||
-		smp_scan_config(639*0x400,0x400) ||
-			smp_scan_config(0xF0000,0x10000))
-		return;
-	/*
-	 * If it is an SMP machine we should know now.
-	 *
-	 * there is a real-mode segmented pointer pointing to the
-	 * 4K EBDA area at 0x40E, calculate and scan it here.
-	 *
-	 * NOTE! There are Linux loaders that will corrupt the EBDA
-	 * area, and as such this kind of SMP config may be less
-	 * trustworthy, simply because the SMP table may have been
-	 * stomped on during early boot. These loaders are buggy and
-	 * should be fixed.
-	 */
-
-	address = *(unsigned short *)phys_to_virt(0x40E);
-	address <<= 4;
-	if (smp_scan_config(address, 0x1000))
-		return;
-
-	/* If we have come this far, we did not find an MP table  */
-	 printk(KERN_INFO "No mptable found.\n");
-}
-
-/* --------------------------------------------------------------------------
-                            ACPI-based MP Configuration
-   -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-void __init mp_register_lapic_address(u64 address)
-{
-	mp_lapic_addr = (unsigned long) address;
-	set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
-	if (boot_cpu_id == -1U)
-		boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
-}
-
-void __cpuinit mp_register_lapic (u8 id, u8 enabled)
-{
-	struct mpc_config_processor processor;
-	int			boot_cpu = 0;
-	
-	if (id == boot_cpu_id)
-		boot_cpu = 1;
-
-	processor.mpc_type = MP_PROCESSOR;
-	processor.mpc_apicid = id;
-	processor.mpc_apicver = 0;
-	processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
-	processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
-	processor.mpc_cpufeature = 0;
-	processor.mpc_featureflag = 0;
-	processor.mpc_reserved[0] = 0;
-	processor.mpc_reserved[1] = 0;
-
-	MP_processor_info(&processor);
-}
-
-#define MP_ISA_BUS		0
-#define MP_MAX_IOAPIC_PIN	127
-
-static struct mp_ioapic_routing {
-	int			apic_id;
-	int			gsi_start;
-	int			gsi_end;
-	u32			pin_programmed[4];
-} mp_ioapic_routing[MAX_IO_APICS];
-
-static int mp_find_ioapic(int gsi)
-{
-	int i = 0;
-
-	/* Find the IOAPIC that manages this GSI. */
-	for (i = 0; i < nr_ioapics; i++) {
-		if ((gsi >= mp_ioapic_routing[i].gsi_start)
-			&& (gsi <= mp_ioapic_routing[i].gsi_end))
-			return i;
-	}
-
-	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
-	return -1;
-}
-
-static u8 uniq_ioapic_id(u8 id)
-{
-	int i;
-	DECLARE_BITMAP(used, 256);
-	bitmap_zero(used, 256);
-	for (i = 0; i < nr_ioapics; i++) {
-		struct mpc_config_ioapic *ia = &mp_ioapics[i];
-		__set_bit(ia->mpc_apicid, used);
-	}
-	if (!test_bit(id, used))
-		return id;
-	return find_first_zero_bit(used, 256);
-}
-
-void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
-{
-	int idx = 0;
-
-	if (bad_ioapic(address))
-		return;
-
-	idx = nr_ioapics;
-
-	mp_ioapics[idx].mpc_type = MP_IOAPIC;
-	mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
-	mp_ioapics[idx].mpc_apicaddr = address;
-
-	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-	mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
-	mp_ioapics[idx].mpc_apicver = 0;
-	
-	/* 
-	 * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
-	 * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
-	 */
-	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
-	mp_ioapic_routing[idx].gsi_start = gsi_base;
-	mp_ioapic_routing[idx].gsi_end = gsi_base + 
-		io_apic_get_redir_entries(idx);
-
-	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
-		"GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
-		mp_ioapics[idx].mpc_apicaddr,
-		mp_ioapic_routing[idx].gsi_start,
-		mp_ioapic_routing[idx].gsi_end);
-
-	nr_ioapics++;
-}
-
-void __init
-mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32	gsi)
-{
-	struct mpc_config_intsrc intsrc;
-	int			ioapic = -1;
-	int			pin = -1;
-
-	/* 
-	 * Convert 'gsi' to 'ioapic.pin'.
-	 */
-	ioapic = mp_find_ioapic(gsi);
-	if (ioapic < 0)
-		return;
-	pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
-
-	/*
-	 * TBD: This check is for faulty timer entries, where the override
-	 *      erroneously sets the trigger to level, resulting in a HUGE 
-	 *      increase of timer interrupts!
-	 */
-	if ((bus_irq == 0) && (trigger == 3))
-		trigger = 1;
-
-	intsrc.mpc_type = MP_INTSRC;
-	intsrc.mpc_irqtype = mp_INT;
-	intsrc.mpc_irqflag = (trigger << 2) | polarity;
-	intsrc.mpc_srcbus = MP_ISA_BUS;
-	intsrc.mpc_srcbusirq = bus_irq;				       /* IRQ */
-	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;	   /* APIC ID */
-	intsrc.mpc_dstirq = pin;				    /* INTIN# */
-
-	Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", 
-		intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
-		(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
-		intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
-
-	mp_irqs[mp_irq_entries] = intsrc;
-	if (++mp_irq_entries == MAX_IRQ_SOURCES)
-		panic("Max # of irq sources exceeded!\n");
-}
-
-void __init mp_config_acpi_legacy_irqs(void)
-{
-	struct mpc_config_intsrc intsrc;
-	int i = 0;
-	int ioapic = -1;
-
-	/* 
-	 * Fabricate the legacy ISA bus (bus #31).
-	 */
-	set_bit(MP_ISA_BUS, mp_bus_not_pci);
-
-	/* 
-	 * Locate the IOAPIC that manages the ISA IRQs (0-15). 
-	 */
-	ioapic = mp_find_ioapic(0);
-	if (ioapic < 0)
-		return;
-
-	intsrc.mpc_type = MP_INTSRC;
-	intsrc.mpc_irqflag = 0;					/* Conforming */
-	intsrc.mpc_srcbus = MP_ISA_BUS;
-	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
-
-	/* 
-	 * Use the default configuration for the IRQs 0-15.  Unless
-	 * overridden by (MADT) interrupt source override entries.
-	 */
-	for (i = 0; i < 16; i++) {
-		int idx;
-
-		for (idx = 0; idx < mp_irq_entries; idx++) {
-			struct mpc_config_intsrc *irq = mp_irqs + idx;
-
-			/* Do we already have a mapping for this ISA IRQ? */
-			if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
-				break;
-
-			/* Do we already have a mapping for this IOAPIC pin */
-			if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
-				(irq->mpc_dstirq == i))
-				break;
-		}
-
-		if (idx != mp_irq_entries) {
-			printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
-			continue;			/* IRQ already used */
-		}
-
-		intsrc.mpc_irqtype = mp_INT;
-		intsrc.mpc_srcbusirq = i;		   /* Identity mapped */
-		intsrc.mpc_dstirq = i;
-
-		Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
-			"%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
-			(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
-			intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
-			intsrc.mpc_dstirq);
-
-		mp_irqs[mp_irq_entries] = intsrc;
-		if (++mp_irq_entries == MAX_IRQ_SOURCES)
-			panic("Max # of irq sources exceeded!\n");
-	}
-}
-
-int mp_register_gsi(u32 gsi, int triggering, int polarity)
-{
-	int ioapic = -1;
-	int ioapic_pin = 0;
-	int idx, bit = 0;
-
-	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
-		return gsi;
-
-	/* Don't set up the ACPI SCI because it's already set up */
-	if (acpi_gbl_FADT.sci_interrupt == gsi)
-		return gsi;
-
-	ioapic = mp_find_ioapic(gsi);
-	if (ioapic < 0) {
-		printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
-		return gsi;
-	}
-
-	ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
-
-	/* 
-	 * Avoid pin reprogramming.  PRTs typically include entries  
-	 * with redundant pin->gsi mappings (but unique PCI devices);
-	 * we only program the IOAPIC on the first.
-	 */
-	bit = ioapic_pin % 32;
-	idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
-	if (idx > 3) {
-		printk(KERN_ERR "Invalid reference to IOAPIC pin "
-			"%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
-			ioapic_pin);
-		return gsi;
-	}
-	if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
-		Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
-			mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
-		return gsi;
-	}
-
-	mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
-
-	io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
-		triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
-		polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
-	return gsi;
-}
-#endif /*CONFIG_ACPI*/

+ 2 - 2
arch/x86/kernel/msr.c

@@ -65,8 +65,8 @@ static loff_t msr_seek(struct file *file, loff_t offset, int orig)
 	return ret;
 }
 
-static ssize_t msr_read(struct file *file, char __user * buf,
-			size_t count, loff_t * ppos)
+static ssize_t msr_read(struct file *file, char __user *buf,
+			size_t count, loff_t *ppos)
 {
 	u32 __user *tmp = (u32 __user *) buf;
 	u32 data[2];

+ 9 - 5
arch/x86/kernel/nmi_32.c

@@ -22,9 +22,11 @@
 #include <linux/cpumask.h>
 #include <linux/kernel_stat.h>
 #include <linux/kdebug.h>
+#include <linux/slab.h>
 
 #include <asm/smp.h>
 #include <asm/nmi.h>
+#include <asm/timer.h>
 
 #include "mach_traps.h"
 
@@ -67,7 +69,7 @@ static __init void nmi_cpu_busy(void *data)
 }
 #endif
 
-static int __init check_nmi_watchdog(void)
+int __init check_nmi_watchdog(void)
 {
 	unsigned int *prev_nmi_count;
 	int cpu;
@@ -80,7 +82,7 @@ static int __init check_nmi_watchdog(void)
 
 	prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
 	if (!prev_nmi_count)
-		return -1;
+		goto error;
 
 	printk(KERN_INFO "Testing NMI watchdog ... ");
 
@@ -117,7 +119,7 @@ static int __init check_nmi_watchdog(void)
 	if (!atomic_read(&nmi_active)) {
 		kfree(prev_nmi_count);
 		atomic_set(&nmi_active, -1);
-		return -1;
+		goto error;
 	}
 	printk("OK.\n");
 
@@ -128,9 +130,11 @@ static int __init check_nmi_watchdog(void)
 
 	kfree(prev_nmi_count);
 	return 0;
+error:
+	timer_ack = !cpu_has_tsc;
+
+	return -1;
 }
-/* This needs to happen later in boot so counters are working */
-late_initcall(check_nmi_watchdog);
 
 static int __init setup_nmi_watchdog(char *str)
 {

+ 2 - 0
arch/x86/kernel/nmi_64.c

@@ -26,6 +26,8 @@
 #include <asm/proto.h>
 #include <asm/mce.h>
 
+#include <mach_traps.h>
+
 int unknown_nmi_panic;
 int nmi_watchdog_enabled;
 int panic_on_unrecovered_nmi;

+ 1 - 17
arch/x86/kernel/paravirt.c

@@ -206,13 +206,6 @@ static struct resource reserve_ioports = {
 	.flags = IORESOURCE_IO | IORESOURCE_BUSY,
 };
 
-static struct resource reserve_iomem = {
-	.start = 0,
-	.end = -1,
-	.name = "paravirt-iomem",
-	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
-};
-
 /*
  * Reserve the whole legacy IO space to prevent any legacy drivers
  * from wasting time probing for their hardware.  This is a fairly
@@ -222,16 +215,7 @@ static struct resource reserve_iomem = {
  */
 int paravirt_disable_iospace(void)
 {
-	int ret;
-
-	ret = request_resource(&ioport_resource, &reserve_ioports);
-	if (ret == 0) {
-		ret = request_resource(&iomem_resource, &reserve_iomem);
-		if (ret)
-			release_resource(&reserve_ioports);
-	}
-
-	return ret;
+	return request_resource(&ioport_resource, &reserve_ioports);
 }
 
 static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;

Неке датотеке нису приказане због велике количине промена