Pārlūkot izejas kodu

Merge branch 'tip/perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace into perf/urgent

Ingo Molnar 14 gadi atpakaļ
vecāks
revīzija
4385428a47
100 mainītis faili ar 3095 papildinājumiem un 827 dzēšanām
  1. 128 16
      Documentation/RCU/trace.txt
  2. 26 0
      Documentation/dontdiff
  3. 2 25
      Documentation/kernel-docs.txt
  4. 7 4
      Documentation/kernel-parameters.txt
  5. 1 0
      Documentation/x86/boot.txt
  6. 16 0
      MAINTAINERS
  7. 3 0
      arch/Kconfig
  8. 1 0
      arch/s390/Kconfig
  9. 2 0
      arch/s390/include/asm/mutex.h
  10. 35 6
      arch/x86/Kconfig
  11. 11 0
      arch/x86/Kconfig.debug
  12. 1 1
      arch/x86/boot/compressed/head_64.S
  13. 1 0
      arch/x86/include/asm/alternative.h
  14. 33 16
      arch/x86/include/asm/amd_nb.h
  15. 1 0
      arch/x86/include/asm/apic.h
  16. 1 0
      arch/x86/include/asm/apicdef.h
  17. 1 0
      arch/x86/include/asm/bootparam.h
  18. 4 0
      arch/x86/include/asm/fixmap.h
  19. 24 0
      arch/x86/include/asm/i387.h
  20. 3 5
      arch/x86/include/asm/io_apic.h
  21. 3 0
      arch/x86/include/asm/mce.h
  22. 6 0
      arch/x86/include/asm/microcode.h
  23. 12 19
      arch/x86/include/asm/mpspec.h
  24. 0 7
      arch/x86/include/asm/mpspec_def.h
  25. 9 0
      arch/x86/include/asm/mrst-vrtc.h
  26. 13 1
      arch/x86/include/asm/mrst.h
  27. 12 0
      arch/x86/include/asm/msr-index.h
  28. 1 1
      arch/x86/include/asm/paravirt.h
  29. 1 0
      arch/x86/include/asm/pci.h
  30. 6 0
      arch/x86/include/asm/setup.h
  31. 5 4
      arch/x86/include/asm/uv/uv_bau.h
  32. 0 1
      arch/x86/kernel/Makefile
  33. 8 3
      arch/x86/kernel/acpi/boot.c
  34. 2 1
      arch/x86/kernel/alternative.c
  35. 80 55
      arch/x86/kernel/amd_nb.c
  36. 1 0
      arch/x86/kernel/apb_timer.c
  37. 5 5
      arch/x86/kernel/aperture_64.c
  38. 63 41
      arch/x86/kernel/apic/apic.c
  39. 22 15
      arch/x86/kernel/apic/io_apic.c
  40. 34 27
      arch/x86/kernel/apic/x2apic_uv_x.c
  41. 63 84
      arch/x86/kernel/cpu/intel_cacheinfo.c
  42. 77 58
      arch/x86/kernel/cpu/mcheck/mce_amd.c
  43. 40 0
      arch/x86/kernel/cpu/mcheck/therm_throt.c
  44. 1 2
      arch/x86/kernel/early_printk.c
  45. 3 0
      arch/x86/kernel/ftrace.c
  46. 3 0
      arch/x86/kernel/head32.c
  47. 45 38
      arch/x86/kernel/head_32.S
  48. 10 24
      arch/x86/kernel/microcode_amd.c
  49. 17 17
      arch/x86/kernel/pci-gart_64.c
  50. 16 0
      arch/x86/kernel/reboot_fixups_32.c
  51. 5 8
      arch/x86/kernel/setup.c
  52. 14 0
      arch/x86/kernel/smpboot.c
  53. 1 1
      arch/x86/kernel/trampoline_64.S
  54. 91 5
      arch/x86/kernel/tsc.c
  55. 41 8
      arch/x86/kernel/verify_cpu.S
  56. 6 2
      arch/x86/kernel/vmlinux.lds.S
  57. 0 105
      arch/x86/lguest/i386_head.S
  58. 1 1
      arch/x86/mm/Makefile
  59. 6 6
      arch/x86/mm/amdtopology_64.c
  60. 2 1
      arch/x86/mm/init.c
  61. 19 1
      arch/x86/mm/init_32.c
  62. 11 11
      arch/x86/mm/numa_64.c
  63. 22 11
      arch/x86/mm/pageattr.c
  64. 1 1
      arch/x86/mm/setup_nx.c
  65. 1 0
      arch/x86/mm/srat_32.c
  66. 10 0
      arch/x86/mm/srat_64.c
  67. 1 0
      arch/x86/oprofile/op_model_amd.c
  68. 1 0
      arch/x86/pci/Makefile
  69. 315 0
      arch/x86/pci/ce4100.c
  70. 23 0
      arch/x86/pci/pcbios.c
  71. 2 0
      arch/x86/platform/Makefile
  72. 1 0
      arch/x86/platform/ce4100/Makefile
  73. 132 0
      arch/x86/platform/ce4100/ce4100.c
  74. 1 0
      arch/x86/platform/iris/Makefile
  75. 91 0
      arch/x86/platform/iris/iris.c
  76. 2 0
      arch/x86/platform/mrst/Makefile
  77. 0 0
      arch/x86/platform/mrst/early_printk_mrst.c
  78. 535 11
      arch/x86/platform/mrst/mrst.c
  79. 165 0
      arch/x86/platform/mrst/vrtc.c
  80. 2 2
      arch/x86/platform/sfi/sfi.c
  81. 18 4
      arch/x86/platform/uv/tlb_uv.c
  82. 1 1
      arch/x86/platform/visws/visws_quirks.c
  83. 12 2
      drivers/acpi/numa.c
  84. 16 17
      drivers/char/agp/amd64-agp.c
  85. 2 2
      drivers/edac/amd64_edac.c
  86. 5 0
      drivers/platform/x86/intel_scu_ipc.c
  87. 12 0
      drivers/rtc/Kconfig
  88. 1 0
      drivers/rtc/Makefile
  89. 582 0
      drivers/rtc/rtc-mrst.c
  90. 8 3
      fs/gfs2/bmap.c
  91. 34 37
      fs/gfs2/glock.c
  92. 6 22
      fs/gfs2/glock.h
  93. 0 1
      fs/gfs2/glops.c
  94. 7 5
      fs/gfs2/incore.h
  95. 0 9
      fs/gfs2/inode.c
  96. 4 11
      fs/gfs2/lock_dlm.c
  97. 1 17
      fs/gfs2/ops_inode.c
  98. 11 2
      fs/gfs2/quota.c
  99. 13 44
      fs/gfs2/rgrp.c
  100. 1 0
      fs/gfs2/rgrp.h

+ 128 - 16
Documentation/RCU/trace.txt

@@ -1,18 +1,22 @@
 CONFIG_RCU_TRACE debugfs Files and Formats
 CONFIG_RCU_TRACE debugfs Files and Formats
 
 
 
 
-The rcutree implementation of RCU provides debugfs trace output that
-summarizes counters and state.  This information is useful for debugging
-RCU itself, and can sometimes also help to debug abuses of RCU.
-The following sections describe the debugfs files and formats.
+The rcutree and rcutiny implementations of RCU provide debugfs trace
+output that summarizes counters and state.  This information is useful for
+debugging RCU itself, and can sometimes also help to debug abuses of RCU.
+The following sections describe the debugfs files and formats, first
+for rcutree and next for rcutiny.
 
 
 
 
-Hierarchical RCU debugfs Files and Formats
+CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats
 
 
-This implementation of RCU provides three debugfs files under the
+These implementations of RCU provides five debugfs files under the
 top-level directory RCU: rcu/rcudata (which displays fields in struct
 top-level directory RCU: rcu/rcudata (which displays fields in struct
-rcu_data), rcu/rcugp (which displays grace-period counters), and
-rcu/rcuhier (which displays the struct rcu_node hierarchy).
+rcu_data), rcu/rcudata.csv (which is a .csv spreadsheet version of
+rcu/rcudata), rcu/rcugp (which displays grace-period counters),
+rcu/rcuhier (which displays the struct rcu_node hierarchy), and
+rcu/rcu_pending (which displays counts of the reasons that the
+rcu_pending() function decided that there was core RCU work to do).
 
 
 The output of "cat rcu/rcudata" looks as follows:
 The output of "cat rcu/rcudata" looks as follows:
 
 
@@ -130,7 +134,8 @@ o	"ci" is the number of RCU callbacks that have been invoked for
 	been registered in absence of CPU-hotplug activity.
 	been registered in absence of CPU-hotplug activity.
 
 
 o	"co" is the number of RCU callbacks that have been orphaned due to
 o	"co" is the number of RCU callbacks that have been orphaned due to
-	this CPU going offline.
+	this CPU going offline.  These orphaned callbacks have been moved
+	to an arbitrarily chosen online CPU.
 
 
 o	"ca" is the number of RCU callbacks that have been adopted due to
 o	"ca" is the number of RCU callbacks that have been adopted due to
 	other CPUs going offline.  Note that ci+co-ca+ql is the number of
 	other CPUs going offline.  Note that ci+co-ca+ql is the number of
@@ -168,12 +173,12 @@ o	"gpnum" is the number of grace periods that have started.  It is
 
 
 The output of "cat rcu/rcuhier" looks as follows, with very long lines:
 The output of "cat rcu/rcuhier" looks as follows, with very long lines:
 
 
-c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0
+c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
 1/1 .>. 0:127 ^0    
 1/1 .>. 0:127 ^0    
 3/3 .>. 0:35 ^0    0/0 .>. 36:71 ^1    0/0 .>. 72:107 ^2    0/0 .>. 108:127 ^3    
 3/3 .>. 0:35 ^0    0/0 .>. 36:71 ^1    0/0 .>. 72:107 ^2    0/0 .>. 108:127 ^3    
 3/3f .>. 0:5 ^0    2/3 .>. 6:11 ^1    0/0 .>. 12:17 ^2    0/0 .>. 18:23 ^3    0/0 .>. 24:29 ^4    0/0 .>. 30:35 ^5    0/0 .>. 36:41 ^0    0/0 .>. 42:47 ^1    0/0 .>. 48:53 ^2    0/0 .>. 54:59 ^3    0/0 .>. 60:65 ^4    0/0 .>. 66:71 ^5    0/0 .>. 72:77 ^0    0/0 .>. 78:83 ^1    0/0 .>. 84:89 ^2    0/0 .>. 90:95 ^3    0/0 .>. 96:101 ^4    0/0 .>. 102:107 ^5    0/0 .>. 108:113 ^0    0/0 .>. 114:119 ^1    0/0 .>. 120:125 ^2    0/0 .>. 126:127 ^3    
 3/3f .>. 0:5 ^0    2/3 .>. 6:11 ^1    0/0 .>. 12:17 ^2    0/0 .>. 18:23 ^3    0/0 .>. 24:29 ^4    0/0 .>. 30:35 ^5    0/0 .>. 36:41 ^0    0/0 .>. 42:47 ^1    0/0 .>. 48:53 ^2    0/0 .>. 54:59 ^3    0/0 .>. 60:65 ^4    0/0 .>. 66:71 ^5    0/0 .>. 72:77 ^0    0/0 .>. 78:83 ^1    0/0 .>. 84:89 ^2    0/0 .>. 90:95 ^3    0/0 .>. 96:101 ^4    0/0 .>. 102:107 ^5    0/0 .>. 108:113 ^0    0/0 .>. 114:119 ^1    0/0 .>. 120:125 ^2    0/0 .>. 126:127 ^3    
 rcu_bh:
 rcu_bh:
-c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0
+c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
 0/1 .>. 0:127 ^0    
 0/1 .>. 0:127 ^0    
 0/3 .>. 0:35 ^0    0/0 .>. 36:71 ^1    0/0 .>. 72:107 ^2    0/0 .>. 108:127 ^3    
 0/3 .>. 0:35 ^0    0/0 .>. 36:71 ^1    0/0 .>. 72:107 ^2    0/0 .>. 108:127 ^3    
 0/3f .>. 0:5 ^0    0/3 .>. 6:11 ^1    0/0 .>. 12:17 ^2    0/0 .>. 18:23 ^3    0/0 .>. 24:29 ^4    0/0 .>. 30:35 ^5    0/0 .>. 36:41 ^0    0/0 .>. 42:47 ^1    0/0 .>. 48:53 ^2    0/0 .>. 54:59 ^3    0/0 .>. 60:65 ^4    0/0 .>. 66:71 ^5    0/0 .>. 72:77 ^0    0/0 .>. 78:83 ^1    0/0 .>. 84:89 ^2    0/0 .>. 90:95 ^3    0/0 .>. 96:101 ^4    0/0 .>. 102:107 ^5    0/0 .>. 108:113 ^0    0/0 .>. 114:119 ^1    0/0 .>. 120:125 ^2    0/0 .>. 126:127 ^3
 0/3f .>. 0:5 ^0    0/3 .>. 6:11 ^1    0/0 .>. 12:17 ^2    0/0 .>. 18:23 ^3    0/0 .>. 24:29 ^4    0/0 .>. 30:35 ^5    0/0 .>. 36:41 ^0    0/0 .>. 42:47 ^1    0/0 .>. 48:53 ^2    0/0 .>. 54:59 ^3    0/0 .>. 60:65 ^4    0/0 .>. 66:71 ^5    0/0 .>. 72:77 ^0    0/0 .>. 78:83 ^1    0/0 .>. 84:89 ^2    0/0 .>. 90:95 ^3    0/0 .>. 96:101 ^4    0/0 .>. 102:107 ^5    0/0 .>. 108:113 ^0    0/0 .>. 114:119 ^1    0/0 .>. 120:125 ^2    0/0 .>. 126:127 ^3
@@ -212,11 +217,6 @@ o	"fqlh" is the number of calls to force_quiescent_state() that
 	exited immediately (without even being counted in nfqs above)
 	exited immediately (without even being counted in nfqs above)
 	due to contention on ->fqslock.
 	due to contention on ->fqslock.
 
 
-o	"oqlen" is the number of callbacks on the "orphan" callback
-	list.  RCU callbacks are placed on this list by CPUs going
-	offline, and are "adopted" either by the CPU helping the outgoing
-	CPU or by the next rcu_barrier*() call, whichever comes first.
-
 o	Each element of the form "1/1 0:127 ^0" represents one struct
 o	Each element of the form "1/1 0:127 ^0" represents one struct
 	rcu_node.  Each line represents one level of the hierarchy, from
 	rcu_node.  Each line represents one level of the hierarchy, from
 	root to leaves.  It is best to think of the rcu_data structures
 	root to leaves.  It is best to think of the rcu_data structures
@@ -326,3 +326,115 @@ o	"nn" is the number of times that this CPU needed nothing.  Alert
 	readers will note that the rcu "nn" number for a given CPU very
 	readers will note that the rcu "nn" number for a given CPU very
 	closely matches the rcu_bh "np" number for that same CPU.  This
 	closely matches the rcu_bh "np" number for that same CPU.  This
 	is due to short-circuit evaluation in rcu_pending().
 	is due to short-circuit evaluation in rcu_pending().
+
+
+CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats
+
+These implementations of RCU provides a single debugfs file under the
+top-level directory RCU, namely rcu/rcudata, which displays fields in
+rcu_bh_ctrlblk, rcu_sched_ctrlblk and, for CONFIG_TINY_PREEMPT_RCU,
+rcu_preempt_ctrlblk.
+
+The output of "cat rcu/rcudata" is as follows:
+
+rcu_preempt: qlen=24 gp=1097669 g197/p197/c197 tasks=...
+             ttb=. btg=no ntb=184 neb=0 nnb=183 j=01f7 bt=0274
+             normal balk: nt=1097669 gt=0 bt=371 b=0 ny=25073378 nos=0
+             exp balk: bt=0 nos=0
+rcu_sched: qlen: 0
+rcu_bh: qlen: 0
+
+This is split into rcu_preempt, rcu_sched, and rcu_bh sections, with the
+rcu_preempt section appearing only in CONFIG_TINY_PREEMPT_RCU builds.
+The last three lines of the rcu_preempt section appear only in
+CONFIG_RCU_BOOST kernel builds.  The fields are as follows:
+
+o	"qlen" is the number of RCU callbacks currently waiting either
+	for an RCU grace period or waiting to be invoked.  This is the
+	only field present for rcu_sched and rcu_bh, due to the
+	short-circuiting of grace period in those two cases.
+
+o	"gp" is the number of grace periods that have completed.
+
+o	"g197/p197/c197" displays the grace-period state, with the
+	"g" number being the number of grace periods that have started
+	(mod 256), the "p" number being the number of grace periods
+	that the CPU has responded to (also mod 256), and the "c"
+	number being the number of grace periods that have completed
+	(once again mode 256).
+
+	Why have both "gp" and "g"?  Because the data flowing into
+	"gp" is only present in a CONFIG_RCU_TRACE kernel.
+
+o	"tasks" is a set of bits.  The first bit is "T" if there are
+	currently tasks that have recently blocked within an RCU
+	read-side critical section, the second bit is "N" if any of the
+	aforementioned tasks are blocking the current RCU grace period,
+	and the third bit is "E" if any of the aforementioned tasks are
+	blocking the current expedited grace period.  Each bit is "."
+	if the corresponding condition does not hold.
+
+o	"ttb" is a single bit.  It is "B" if any of the blocked tasks
+	need to be priority boosted and "." otherwise.
+
+o	"btg" indicates whether boosting has been carried out during
+	the current grace period, with "exp" indicating that boosting
+	is in progress for an expedited grace period, "no" indicating
+	that boosting has not yet started for a normal grace period,
+	"begun" indicating that boosting has bebug for a normal grace
+	period, and "done" indicating that boosting has completed for
+	a normal grace period.
+
+o	"ntb" is the total number of tasks subjected to RCU priority boosting
+	periods since boot.
+
+o	"neb" is the number of expedited grace periods that have had
+	to resort to RCU priority boosting since boot.
+
+o	"nnb" is the number of normal grace periods that have had
+	to resort to RCU priority boosting since boot.
+
+o	"j" is the low-order 12 bits of the jiffies counter in hexadecimal.
+
+o	"bt" is the low-order 12 bits of the value that the jiffies counter
+	will have at the next time that boosting is scheduled to begin.
+
+o	In the line beginning with "normal balk", the fields are as follows:
+
+	o	"nt" is the number of times that the system balked from
+		boosting because there were no blocked tasks to boost.
+		Note that the system will balk from boosting even if the
+		grace period is overdue when the currently running task
+		is looping within an RCU read-side critical section.
+		There is no point in boosting in this case, because
+		boosting a running task won't make it run any faster.
+
+	o	"gt" is the number of times that the system balked
+		from boosting because, although there were blocked tasks,
+		none of them were preventing the current grace period
+		from completing.
+
+	o	"bt" is the number of times that the system balked
+		from boosting because boosting was already in progress.
+
+	o	"b" is the number of times that the system balked from
+		boosting because boosting had already completed for
+		the grace period in question.
+
+	o	"ny" is the number of times that the system balked from
+		boosting because it was not yet time to start boosting
+		the grace period in question.
+
+	o	"nos" is the number of times that the system balked from
+		boosting for inexplicable ("not otherwise specified")
+		reasons.  This can actually happen due to races involving
+		increments of the jiffies counter.
+
+o	In the line beginning with "exp balk", the fields are as follows:
+
+	o	"bt" is the number of times that the system balked from
+		boosting because there were no blocked tasks to boost.
+
+	o	"nos" is the number of times that the system balked from
+		 boosting for inexplicable ("not otherwise specified")
+		 reasons.

+ 26 - 0
Documentation/dontdiff

@@ -62,6 +62,10 @@ aic7*reg_print.c*
 aic7*seq.h*
 aic7*seq.h*
 aicasm
 aicasm
 aicdb.h*
 aicdb.h*
+altivec1.c
+altivec2.c
+altivec4.c
+altivec8.c
 asm-offsets.h
 asm-offsets.h
 asm_offsets.h
 asm_offsets.h
 autoconf.h*
 autoconf.h*
@@ -76,6 +80,7 @@ btfixupprep
 build
 build
 bvmlinux
 bvmlinux
 bzImage*
 bzImage*
+capflags.c
 classlist.h*
 classlist.h*
 comp*.log
 comp*.log
 compile.h*
 compile.h*
@@ -94,6 +99,7 @@ devlist.h*
 docproc
 docproc
 elf2ecoff
 elf2ecoff
 elfconfig.h*
 elfconfig.h*
+evergreen_reg_safe.h
 fixdep
 fixdep
 flask.h
 flask.h
 fore200e_mkfirm
 fore200e_mkfirm
@@ -108,9 +114,16 @@ genksyms
 *_gray256.c
 *_gray256.c
 ihex2fw
 ihex2fw
 ikconfig.h*
 ikconfig.h*
+inat-tables.c
 initramfs_data.cpio
 initramfs_data.cpio
 initramfs_data.cpio.gz
 initramfs_data.cpio.gz
 initramfs_list
 initramfs_list
+int16.c
+int1.c
+int2.c
+int32.c
+int4.c
+int8.c
 kallsyms
 kallsyms
 kconfig
 kconfig
 keywords.c
 keywords.c
@@ -140,6 +153,7 @@ mkprep
 mktables
 mktables
 mktree
 mktree
 modpost
 modpost
+modules.builtin
 modules.order
 modules.order
 modversions.h*
 modversions.h*
 ncscope.*
 ncscope.*
@@ -153,14 +167,23 @@ pca200e.bin
 pca200e_ecd.bin2
 pca200e_ecd.bin2
 piggy.gz
 piggy.gz
 piggyback
 piggyback
+piggy.S
 pnmtologo
 pnmtologo
 ppc_defs.h*
 ppc_defs.h*
 pss_boot.h
 pss_boot.h
 qconf
 qconf
+r100_reg_safe.h
+r200_reg_safe.h
+r300_reg_safe.h
+r420_reg_safe.h
+r600_reg_safe.h
 raid6altivec*.c
 raid6altivec*.c
 raid6int*.c
 raid6int*.c
 raid6tables.c
 raid6tables.c
 relocs
 relocs
+rn50_reg_safe.h
+rs600_reg_safe.h
+rv515_reg_safe.h
 series
 series
 setup
 setup
 setup.bin
 setup.bin
@@ -169,6 +192,7 @@ sImage
 sm_tbl*
 sm_tbl*
 split-include
 split-include
 syscalltab.h
 syscalltab.h
+tables.c
 tags
 tags
 tftpboot.img
 tftpboot.img
 timeconst.h
 timeconst.h
@@ -190,6 +214,7 @@ vmlinux
 vmlinux-*
 vmlinux-*
 vmlinux.aout
 vmlinux.aout
 vmlinux.lds
 vmlinux.lds
+voffset.h
 vsyscall.lds
 vsyscall.lds
 vsyscall_32.lds
 vsyscall_32.lds
 wanxlfw.inc
 wanxlfw.inc
@@ -200,3 +225,4 @@ wakeup.elf
 wakeup.lds
 wakeup.lds
 zImage*
 zImage*
 zconf.hash.c
 zconf.hash.c
+zoffset.h

+ 2 - 25
Documentation/kernel-docs.txt

@@ -537,7 +537,7 @@
        Notes: Further information in
        Notes: Further information in
        http://www.oreilly.com/catalog/linuxdrive2/
        http://www.oreilly.com/catalog/linuxdrive2/
 
 
-     * Title: "Linux Device Drivers, 3nd Edition"
+     * Title: "Linux Device Drivers, 3rd Edition"
        Authors: Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman
        Authors: Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman
        Publisher: O'Reilly & Associates.
        Publisher: O'Reilly & Associates.
        Date: 2005.
        Date: 2005.
@@ -592,14 +592,6 @@
        Pages: 600.
        Pages: 600.
        ISBN: 0-13-101908-2
        ISBN: 0-13-101908-2
 
 
-     * Title:  "The  Design  and Implementation of the 4.4 BSD UNIX
-       Operating System"
-       Author: Marshall Kirk McKusick, Keith Bostic, Michael J. Karels,
-       John S. Quarterman.
-       Publisher: Addison-Wesley.
-       Date: 1996.
-       ISBN: 0-201-54979-4
-
      * Title: "Programming for the real world - POSIX.4"
      * Title: "Programming for the real world - POSIX.4"
        Author: Bill O. Gallmeister.
        Author: Bill O. Gallmeister.
        Publisher: O'Reilly & Associates, Inc..
        Publisher: O'Reilly & Associates, Inc..
@@ -610,28 +602,13 @@
        POSIX. Good reference.
        POSIX. Good reference.
 
 
      * Title:  "UNIX  Systems  for  Modern Architectures: Symmetric
      * Title:  "UNIX  Systems  for  Modern Architectures: Symmetric
-       Multiprocesssing and Caching for Kernel Programmers"
+       Multiprocessing and Caching for Kernel Programmers"
        Author: Curt Schimmel.
        Author: Curt Schimmel.
        Publisher: Addison Wesley.
        Publisher: Addison Wesley.
        Date: June, 1994.
        Date: June, 1994.
        Pages: 432.
        Pages: 432.
        ISBN: 0-201-63338-8
        ISBN: 0-201-63338-8
 
 
-     * Title:  "The  Design  and Implementation of the 4.3 BSD UNIX
-       Operating System"
-       Author: Samuel J. Leffler, Marshall Kirk McKusick, Michael J.
-       Karels, John S. Quarterman.
-       Publisher: Addison-Wesley.
-       Date: 1989 (reprinted with corrections on October, 1990).
-       ISBN: 0-201-06196-1
-
-     * Title: "The Design of the UNIX Operating System"
-       Author: Maurice J. Bach.
-       Publisher: Prentice Hall.
-       Date: 1986.
-       Pages: 471.
-       ISBN: 0-13-201757-1
-
      MISCELLANEOUS:
      MISCELLANEOUS:
 
 
      * Name: linux/Documentation
      * Name: linux/Documentation

+ 7 - 4
Documentation/kernel-parameters.txt

@@ -1614,6 +1614,8 @@ and is between 256 and 4096 characters. It is defined in the file
 	noapic		[SMP,APIC] Tells the kernel to not make use of any
 	noapic		[SMP,APIC] Tells the kernel to not make use of any
 			IOAPICs that may be present in the system.
 			IOAPICs that may be present in the system.
 
 
+	noautogroup	Disable scheduler automatic task group creation.
+
 	nobats		[PPC] Do not use BATs for mapping kernel lowmem
 	nobats		[PPC] Do not use BATs for mapping kernel lowmem
 			on "Classic" PPC cores.
 			on "Classic" PPC cores.
 
 
@@ -2459,12 +2461,13 @@ and is between 256 and 4096 characters. It is defined in the file
 			to facilitate early boot debugging.
 			to facilitate early boot debugging.
 			See also Documentation/trace/events.txt
 			See also Documentation/trace/events.txt
 
 
-	tsc=		Disable clocksource-must-verify flag for TSC.
+	tsc=		Disable clocksource stability checks for TSC.
 			Format: <string>
 			Format: <string>
 			[x86] reliable: mark tsc clocksource as reliable, this
 			[x86] reliable: mark tsc clocksource as reliable, this
-			disables clocksource verification at runtime.
-			Used to enable high-resolution timer mode on older
-			hardware, and in virtualized environment.
+			disables clocksource verification at runtime, as well
+			as the stability checks done at bootup.	Used to enable
+			high-resolution timer mode on older hardware, and in
+			virtualized environment.
 			[x86] noirqtime: Do not use TSC to do irq accounting.
 			[x86] noirqtime: Do not use TSC to do irq accounting.
 			Used to run time disable IRQ_TIME_ACCOUNTING on any
 			Used to run time disable IRQ_TIME_ACCOUNTING on any
 			platforms where RDTSC is slow and this accounting
 			platforms where RDTSC is slow and this accounting

+ 1 - 0
Documentation/x86/boot.txt

@@ -600,6 +600,7 @@ Protocol:	2.07+
   0x00000001	lguest
   0x00000001	lguest
   0x00000002	Xen
   0x00000002	Xen
   0x00000003	Moorestown MID
   0x00000003	Moorestown MID
+  0x00000004	CE4100 TV Platform
 
 
 Field name:	hardware_subarch_data
 Field name:	hardware_subarch_data
 Type:		write (subarch-dependent)
 Type:		write (subarch-dependent)

+ 16 - 0
MAINTAINERS

@@ -2812,6 +2812,10 @@ M:	Thomas Gleixner <tglx@linutronix.de>
 S:	Maintained
 S:	Maintained
 F:	Documentation/timers/
 F:	Documentation/timers/
 F:	kernel/hrtimer.c
 F:	kernel/hrtimer.c
+F:	kernel/time/clockevents.c
+F:	kernel/time/tick*.*
+F:	kernel/time/timer_*.c
+F	include/linux/clockevents.h
 F:	include/linux/hrtimer.h
 F:	include/linux/hrtimer.h
 
 
 HIGH-SPEED SCC DRIVER FOR AX.25
 HIGH-SPEED SCC DRIVER FOR AX.25
@@ -5142,6 +5146,18 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Supported
 S:	Supported
 F:	sound/soc/s3c24xx
 F:	sound/soc/s3c24xx
 
 
+TIMEKEEPING, NTP
+M:	John Stultz <johnstul@us.ibm.com>
+M:	Thomas Gleixner <tglx@linutronix.de>
+S:	Supported
+F:	include/linux/clocksource.h
+F:	include/linux/time.h
+F:	include/linux/timex.h
+F:	include/linux/timekeeping.h
+F:	kernel/time/clocksource.c
+F:	kernel/time/time*.c
+F:	kernel/time/ntp.c
+
 TLG2300 VIDEO4LINUX-2 DRIVER
 TLG2300 VIDEO4LINUX-2 DRIVER
 M:	Huang Shijie <shijie8@gmail.com>
 M:	Huang Shijie <shijie8@gmail.com>
 M:	Kang Yong <kangyong@telegent.com>
 M:	Kang Yong <kangyong@telegent.com>

+ 3 - 0
arch/Kconfig

@@ -175,4 +175,7 @@ config HAVE_PERF_EVENTS_NMI
 config HAVE_ARCH_JUMP_LABEL
 config HAVE_ARCH_JUMP_LABEL
 	bool
 	bool
 
 
+config HAVE_ARCH_MUTEX_CPU_RELAX
+	bool
+
 source "kernel/gcov/Kconfig"
 source "kernel/gcov/Kconfig"

+ 1 - 0
arch/s390/Kconfig

@@ -99,6 +99,7 @@ config S390
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_LZO
 	select HAVE_GET_USER_PAGES_FAST
 	select HAVE_GET_USER_PAGES_FAST
+	select HAVE_ARCH_MUTEX_CPU_RELAX
 	select ARCH_INLINE_SPIN_TRYLOCK
 	select ARCH_INLINE_SPIN_TRYLOCK
 	select ARCH_INLINE_SPIN_TRYLOCK_BH
 	select ARCH_INLINE_SPIN_TRYLOCK_BH
 	select ARCH_INLINE_SPIN_LOCK
 	select ARCH_INLINE_SPIN_LOCK

+ 2 - 0
arch/s390/include/asm/mutex.h

@@ -7,3 +7,5 @@
  */
  */
 
 
 #include <asm-generic/mutex-dec.h>
 #include <asm-generic/mutex-dec.h>
+
+#define arch_mutex_cpu_relax()	barrier()

+ 35 - 6
arch/x86/Kconfig

@@ -377,6 +377,18 @@ config X86_ELAN
 
 
 	  If unsure, choose "PC-compatible" instead.
 	  If unsure, choose "PC-compatible" instead.
 
 
+config X86_INTEL_CE
+	bool "CE4100 TV platform"
+	depends on PCI
+	depends on PCI_GODIRECT
+	depends on X86_32
+	depends on X86_EXTENDED_PLATFORM
+	select X86_REBOOTFIXUPS
+	---help---
+	  Select for the Intel CE media processor (CE4100) SOC.
+	  This option compiles in support for the CE4100 SOC for settop
+	  boxes and media devices.
+
 config X86_MRST
 config X86_MRST
        bool "Moorestown MID platform"
        bool "Moorestown MID platform"
 	depends on PCI
 	depends on PCI
@@ -385,6 +397,10 @@ config X86_MRST
 	depends on X86_EXTENDED_PLATFORM
 	depends on X86_EXTENDED_PLATFORM
 	depends on X86_IO_APIC
 	depends on X86_IO_APIC
 	select APB_TIMER
 	select APB_TIMER
+	select I2C
+	select SPI
+	select INTEL_SCU_IPC
+	select X86_PLATFORM_DEVICES
 	---help---
 	---help---
 	  Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
 	  Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
 	  Internet Device(MID) platform. Moorestown consists of two chips:
 	  Internet Device(MID) platform. Moorestown consists of two chips:
@@ -466,6 +482,19 @@ config X86_ES7000
 	  Support for Unisys ES7000 systems.  Say 'Y' here if this kernel is
 	  Support for Unisys ES7000 systems.  Say 'Y' here if this kernel is
 	  supposed to run on an IA32-based Unisys ES7000 system.
 	  supposed to run on an IA32-based Unisys ES7000 system.
 
 
+config X86_32_IRIS
+	tristate "Eurobraille/Iris poweroff module"
+	depends on X86_32
+	---help---
+	  The Iris machines from EuroBraille do not have APM or ACPI support
+	  to shut themselves down properly.  A special I/O sequence is
+	  needed to do so, which is what this module does at
+	  kernel shutdown.
+
+	  This is only for Iris machines from EuroBraille.
+
+	  If unused, say N.
+
 config SCHED_OMIT_FRAME_POINTER
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
 	def_bool y
 	prompt "Single-depth WCHAN output"
 	prompt "Single-depth WCHAN output"
@@ -1141,16 +1170,16 @@ config NUMA
 comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
 comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
 	depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
 	depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
 
 
-config K8_NUMA
+config AMD_NUMA
 	def_bool y
 	def_bool y
 	prompt "Old style AMD Opteron NUMA detection"
 	prompt "Old style AMD Opteron NUMA detection"
 	depends on X86_64 && NUMA && PCI
 	depends on X86_64 && NUMA && PCI
 	---help---
 	---help---
-	  Enable K8 NUMA node topology detection.  You should say Y here if
-	  you have a multi processor AMD K8 system. This uses an old
-	  method to read the NUMA configuration directly from the builtin
-	  Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
-	  instead, which also takes priority if both are compiled in.
+	  Enable AMD NUMA node topology detection.  You should say Y here if
+	  you have a multi processor AMD system. This uses an old method to
+	  read the NUMA configuration directly from the builtin Northbridge
+	  of Opteron. It is recommended to use X86_64_ACPI_NUMA instead,
+	  which also takes priority if both are compiled in.
 
 
 config X86_64_ACPI_NUMA
 config X86_64_ACPI_NUMA
 	def_bool y
 	def_bool y

+ 11 - 0
arch/x86/Kconfig.debug

@@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST
 	  feature as well as for the change_page_attr() infrastructure.
 	  feature as well as for the change_page_attr() infrastructure.
 	  If in doubt, say "N"
 	  If in doubt, say "N"
 
 
+config DEBUG_SET_MODULE_RONX
+	bool "Set loadable kernel module data as NX and text as RO"
+	depends on MODULES
+	---help---
+	  This option helps catch unintended modifications to loadable
+	  kernel module's text and read-only data. It also prevents execution
+	  of module data. Such protection may interfere with run-time code
+	  patching and dynamic kernel tracing - and they might also protect
+	  against certain classes of kernel exploits.
+	  If in doubt, say "N".
+
 config DEBUG_NX_TEST
 config DEBUG_NX_TEST
 	tristate "Testcase for the NX non-executable stack feature"
 	tristate "Testcase for the NX non-executable stack feature"
 	depends on DEBUG_KERNEL && m
 	depends on DEBUG_KERNEL && m

+ 1 - 1
arch/x86/boot/compressed/head_64.S

@@ -182,7 +182,7 @@ no_longmode:
 	hlt
 	hlt
 	jmp     1b
 	jmp     1b
 
 
-#include "../../kernel/verify_cpu_64.S"
+#include "../../kernel/verify_cpu.S"
 
 
 	/*
 	/*
 	 * Be careful here startup_64 needs to be at a predictable
 	 * Be careful here startup_64 needs to be at a predictable

+ 1 - 0
arch/x86/include/asm/alternative.h

@@ -66,6 +66,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
 extern void alternatives_smp_module_del(struct module *mod);
 extern void alternatives_smp_module_del(struct module *mod);
 extern void alternatives_smp_switch(int smp);
 extern void alternatives_smp_switch(int smp);
 extern int alternatives_text_reserved(void *start, void *end);
 extern int alternatives_text_reserved(void *start, void *end);
+extern bool skip_smp_alternatives;
 #else
 #else
 static inline void alternatives_smp_module_add(struct module *mod, char *name,
 static inline void alternatives_smp_module_add(struct module *mod, char *name,
 					       void *locks, void *locks_end,
 					       void *locks, void *locks_end,

+ 33 - 16
arch/x86/include/asm/amd_nb.h

@@ -3,36 +3,53 @@
 
 
 #include <linux/pci.h>
 #include <linux/pci.h>
 
 
-extern struct pci_device_id k8_nb_ids[];
+extern struct pci_device_id amd_nb_misc_ids[];
 struct bootnode;
 struct bootnode;
 
 
-extern int early_is_k8_nb(u32 value);
-extern int cache_k8_northbridges(void);
-extern void k8_flush_garts(void);
-extern int k8_get_nodes(struct bootnode *nodes);
-extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
-extern int k8_scan_nodes(void);
+extern int early_is_amd_nb(u32 value);
+extern int amd_cache_northbridges(void);
+extern void amd_flush_garts(void);
+extern int amd_get_nodes(struct bootnode *nodes);
+extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
+extern int amd_scan_nodes(void);
 
 
-struct k8_northbridge_info {
+struct amd_northbridge {
+	struct pci_dev *misc;
+};
+
+struct amd_northbridge_info {
 	u16 num;
 	u16 num;
-	u8 gart_supported;
-	struct pci_dev **nb_misc;
+	u64 flags;
+	struct amd_northbridge *nb;
 };
 };
-extern struct k8_northbridge_info k8_northbridges;
+extern struct amd_northbridge_info amd_northbridges;
+
+#define AMD_NB_GART			0x1
+#define AMD_NB_L3_INDEX_DISABLE		0x2
 
 
 #ifdef CONFIG_AMD_NB
 #ifdef CONFIG_AMD_NB
 
 
-static inline struct pci_dev *node_to_k8_nb_misc(int node)
+static inline int amd_nb_num(void)
 {
 {
-	return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL;
+	return amd_northbridges.num;
 }
 }
 
 
-#else
+static inline int amd_nb_has_feature(int feature)
+{
+	return ((amd_northbridges.flags & feature) == feature);
+}
 
 
-static inline struct pci_dev *node_to_k8_nb_misc(int node)
+static inline struct amd_northbridge *node_to_amd_nb(int node)
 {
 {
-	return NULL;
+	return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
 }
 }
+
+#else
+
+#define amd_nb_num(x)		0
+#define amd_nb_has_feature(x)	false
+#define node_to_amd_nb(x)	NULL
+
 #endif
 #endif
 
 
 
 

+ 1 - 0
arch/x86/include/asm/apic.h

@@ -238,6 +238,7 @@ extern void setup_boot_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);
 extern int APIC_init_uniprocessor(void);
 extern int APIC_init_uniprocessor(void);
 extern void enable_NMI_through_LVT0(void);
 extern void enable_NMI_through_LVT0(void);
+extern int apic_force_enable(void);
 
 
 /*
 /*
  * On 32bit this is mach-xxx local
  * On 32bit this is mach-xxx local

+ 1 - 0
arch/x86/include/asm/apicdef.h

@@ -145,6 +145,7 @@
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 # define MAX_IO_APICS 64
 # define MAX_IO_APICS 64
+# define MAX_LOCAL_APIC 256
 #else
 #else
 # define MAX_IO_APICS 128
 # define MAX_IO_APICS 128
 # define MAX_LOCAL_APIC 32768
 # define MAX_LOCAL_APIC 32768

+ 1 - 0
arch/x86/include/asm/bootparam.h

@@ -124,6 +124,7 @@ enum {
 	X86_SUBARCH_LGUEST,
 	X86_SUBARCH_LGUEST,
 	X86_SUBARCH_XEN,
 	X86_SUBARCH_XEN,
 	X86_SUBARCH_MRST,
 	X86_SUBARCH_MRST,
+	X86_SUBARCH_CE4100,
 	X86_NR_SUBARCHS,
 	X86_NR_SUBARCHS,
 };
 };
 
 

+ 4 - 0
arch/x86/include/asm/fixmap.h

@@ -117,6 +117,10 @@ enum fixed_addresses {
 	FIX_TEXT_POKE1,	/* reserve 2 pages for text_poke() */
 	FIX_TEXT_POKE1,	/* reserve 2 pages for text_poke() */
 	FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
 	FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
 	__end_of_permanent_fixed_addresses,
 	__end_of_permanent_fixed_addresses,
+
+#ifdef	CONFIG_X86_MRST
+	FIX_LNW_VRTC,
+#endif
 	/*
 	/*
 	 * 256 temporary boot-time mappings, used by early_ioremap(),
 	 * 256 temporary boot-time mappings, used by early_ioremap(),
 	 * before ioremap() is functional.
 	 * before ioremap() is functional.

+ 24 - 0
arch/x86/include/asm/i387.h

@@ -93,6 +93,17 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 	int err;
 	int err;
 
 
 	/* See comment in fxsave() below. */
 	/* See comment in fxsave() below. */
+#ifdef CONFIG_AS_FXSAVEQ
+	asm volatile("1:  fxrstorq %[fx]\n\t"
+		     "2:\n"
+		     ".section .fixup,\"ax\"\n"
+		     "3:  movl $-1,%[err]\n"
+		     "    jmp  2b\n"
+		     ".previous\n"
+		     _ASM_EXTABLE(1b, 3b)
+		     : [err] "=r" (err)
+		     : [fx] "m" (*fx), "0" (0));
+#else
 	asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
 	asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
 		     "2:\n"
 		     "2:\n"
 		     ".section .fixup,\"ax\"\n"
 		     ".section .fixup,\"ax\"\n"
@@ -102,6 +113,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 		     _ASM_EXTABLE(1b, 3b)
 		     _ASM_EXTABLE(1b, 3b)
 		     : [err] "=r" (err)
 		     : [err] "=r" (err)
 		     : [fx] "R" (fx), "m" (*fx), "0" (0));
 		     : [fx] "R" (fx), "m" (*fx), "0" (0));
+#endif
 	return err;
 	return err;
 }
 }
 
 
@@ -119,6 +131,17 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 		return -EFAULT;
 		return -EFAULT;
 
 
 	/* See comment in fxsave() below. */
 	/* See comment in fxsave() below. */
+#ifdef CONFIG_AS_FXSAVEQ
+	asm volatile("1:  fxsaveq %[fx]\n\t"
+		     "2:\n"
+		     ".section .fixup,\"ax\"\n"
+		     "3:  movl $-1,%[err]\n"
+		     "    jmp  2b\n"
+		     ".previous\n"
+		     _ASM_EXTABLE(1b, 3b)
+		     : [err] "=r" (err), [fx] "=m" (*fx)
+		     : "0" (0));
+#else
 	asm volatile("1:  rex64/fxsave (%[fx])\n\t"
 	asm volatile("1:  rex64/fxsave (%[fx])\n\t"
 		     "2:\n"
 		     "2:\n"
 		     ".section .fixup,\"ax\"\n"
 		     ".section .fixup,\"ax\"\n"
@@ -128,6 +151,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 		     _ASM_EXTABLE(1b, 3b)
 		     _ASM_EXTABLE(1b, 3b)
 		     : [err] "=r" (err), "=m" (*fx)
 		     : [err] "=r" (err), "=m" (*fx)
 		     : [fx] "R" (fx), "0" (0));
 		     : [fx] "R" (fx), "0" (0));
+#endif
 	if (unlikely(err) &&
 	if (unlikely(err) &&
 	    __clear_user(fx, sizeof(struct i387_fxsave_struct)))
 	    __clear_user(fx, sizeof(struct i387_fxsave_struct)))
 		err = -EFAULT;
 		err = -EFAULT;

+ 3 - 5
arch/x86/include/asm/io_apic.h

@@ -159,7 +159,7 @@ struct io_apic_irq_attr;
 extern int io_apic_set_pci_routing(struct device *dev, int irq,
 extern int io_apic_set_pci_routing(struct device *dev, int irq,
 		 struct io_apic_irq_attr *irq_attr);
 		 struct io_apic_irq_attr *irq_attr);
 void setup_IO_APIC_irq_extra(u32 gsi);
 void setup_IO_APIC_irq_extra(u32 gsi);
-extern void ioapic_init_mappings(void);
+extern void ioapic_and_gsi_init(void);
 extern void ioapic_insert_resources(void);
 extern void ioapic_insert_resources(void);
 
 
 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
@@ -168,10 +168,9 @@ extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 
 
-extern void probe_nr_irqs_gsi(void);
 extern int get_nr_irqs_gsi(void);
 extern int get_nr_irqs_gsi(void);
-
 extern void setup_ioapic_ids_from_mpc(void);
 extern void setup_ioapic_ids_from_mpc(void);
+extern void setup_ioapic_ids_from_mpc_nocheck(void);
 
 
 struct mp_ioapic_gsi{
 struct mp_ioapic_gsi{
 	u32 gsi_base;
 	u32 gsi_base;
@@ -189,9 +188,8 @@ extern void __init pre_init_apic_IRQ0(void);
 #define io_apic_assign_pci_irqs 0
 #define io_apic_assign_pci_irqs 0
 #define setup_ioapic_ids_from_mpc x86_init_noop
 #define setup_ioapic_ids_from_mpc x86_init_noop
 static const int timer_through_8259 = 0;
 static const int timer_through_8259 = 0;
-static inline void ioapic_init_mappings(void)	{ }
+static inline void ioapic_and_gsi_init(void) { }
 static inline void ioapic_insert_resources(void) { }
 static inline void ioapic_insert_resources(void) { }
-static inline void probe_nr_irqs_gsi(void)	{ }
 #define gsi_top (NR_IRQS_LEGACY)
 #define gsi_top (NR_IRQS_LEGACY)
 static inline int mp_find_ioapic(u32 gsi) { return 0; }
 static inline int mp_find_ioapic(u32 gsi) { return 0; }
 
 

+ 3 - 0
arch/x86/include/asm/mce.h

@@ -223,6 +223,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c);
 
 
 void mce_log_therm_throt_event(__u64 status);
 void mce_log_therm_throt_event(__u64 status);
 
 
+/* Interrupt Handler for core thermal thresholds */
+extern int (*platform_thermal_notify)(__u64 msr_val);
+
 #ifdef CONFIG_X86_THERMAL_VECTOR
 #ifdef CONFIG_X86_THERMAL_VECTOR
 extern void mcheck_intel_therm_init(void);
 extern void mcheck_intel_therm_init(void);
 #else
 #else

+ 6 - 0
arch/x86/include/asm/microcode.h

@@ -48,6 +48,12 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
 
 
 #ifdef CONFIG_MICROCODE_AMD
 #ifdef CONFIG_MICROCODE_AMD
 extern struct microcode_ops * __init init_amd_microcode(void);
 extern struct microcode_ops * __init init_amd_microcode(void);
+
+static inline void get_ucode_data(void *to, const u8 *from, size_t n)
+{
+	memcpy(to, from, n);
+}
+
 #else
 #else
 static inline struct microcode_ops * __init init_amd_microcode(void)
 static inline struct microcode_ops * __init init_amd_microcode(void)
 {
 {

+ 12 - 19
arch/x86/include/asm/mpspec.h

@@ -5,8 +5,9 @@
 
 
 #include <asm/mpspec_def.h>
 #include <asm/mpspec_def.h>
 #include <asm/x86_init.h>
 #include <asm/x86_init.h>
+#include <asm/apicdef.h>
 
 
-extern int apic_version[MAX_APICS];
+extern int apic_version[];
 extern int pic_mode;
 extern int pic_mode;
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
@@ -107,7 +108,7 @@ extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
 				 int active_high_low);
 				 int active_high_low);
 #endif /* CONFIG_ACPI */
 #endif /* CONFIG_ACPI */
 
 
-#define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_APICS)
+#define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_LOCAL_APIC)
 
 
 struct physid_mask {
 struct physid_mask {
 	unsigned long mask[PHYSID_ARRAY_SIZE];
 	unsigned long mask[PHYSID_ARRAY_SIZE];
@@ -122,31 +123,31 @@ typedef struct physid_mask physid_mask_t;
 	test_and_set_bit(physid, (map).mask)
 	test_and_set_bit(physid, (map).mask)
 
 
 #define physids_and(dst, src1, src2)					\
 #define physids_and(dst, src1, src2)					\
-	bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+	bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
 
 
 #define physids_or(dst, src1, src2)					\
 #define physids_or(dst, src1, src2)					\
-	bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+	bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
 
 
 #define physids_clear(map)					\
 #define physids_clear(map)					\
-	bitmap_zero((map).mask, MAX_APICS)
+	bitmap_zero((map).mask, MAX_LOCAL_APIC)
 
 
 #define physids_complement(dst, src)				\
 #define physids_complement(dst, src)				\
-	bitmap_complement((dst).mask, (src).mask, MAX_APICS)
+	bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC)
 
 
 #define physids_empty(map)					\
 #define physids_empty(map)					\
-	bitmap_empty((map).mask, MAX_APICS)
+	bitmap_empty((map).mask, MAX_LOCAL_APIC)
 
 
 #define physids_equal(map1, map2)				\
 #define physids_equal(map1, map2)				\
-	bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
+	bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC)
 
 
 #define physids_weight(map)					\
 #define physids_weight(map)					\
-	bitmap_weight((map).mask, MAX_APICS)
+	bitmap_weight((map).mask, MAX_LOCAL_APIC)
 
 
 #define physids_shift_right(d, s, n)				\
 #define physids_shift_right(d, s, n)				\
-	bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
+	bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC)
 
 
 #define physids_shift_left(d, s, n)				\
 #define physids_shift_left(d, s, n)				\
-	bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
+	bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC)
 
 
 static inline unsigned long physids_coerce(physid_mask_t *map)
 static inline unsigned long physids_coerce(physid_mask_t *map)
 {
 {
@@ -159,14 +160,6 @@ static inline void physids_promote(unsigned long physids, physid_mask_t *map)
 	map->mask[0] = physids;
 	map->mask[0] = physids;
 }
 }
 
 
-/* Note: will create very large stack frames if physid_mask_t is big */
-#define physid_mask_of_physid(physid)					\
-	({								\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
-		physid_set(physid, __physid_mask);			\
-		__physid_mask;						\
-	})
-
 static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
 static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
 {
 {
 	physids_clear(*map);
 	physids_clear(*map);

+ 0 - 7
arch/x86/include/asm/mpspec_def.h

@@ -15,13 +15,6 @@
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 # define MAX_MPC_ENTRY 1024
 # define MAX_MPC_ENTRY 1024
-# define MAX_APICS      256
-#else
-# if NR_CPUS <= 255
-#  define MAX_APICS     255
-# else
-#  define MAX_APICS   32768
-# endif
 #endif
 #endif
 
 
 /* Intel MP Floating Pointer Structure */
 /* Intel MP Floating Pointer Structure */

+ 9 - 0
arch/x86/include/asm/mrst-vrtc.h

@@ -0,0 +1,9 @@
+#ifndef _MRST_VRTC_H
+#define _MRST_VRTC_H
+
+extern unsigned char vrtc_cmos_read(unsigned char reg);
+extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
+extern unsigned long vrtc_get_time(void);
+extern int vrtc_set_mmss(unsigned long nowtime);
+
+#endif

+ 13 - 1
arch/x86/include/asm/mrst.h

@@ -14,7 +14,9 @@
 #include <linux/sfi.h>
 #include <linux/sfi.h>
 
 
 extern int pci_mrst_init(void);
 extern int pci_mrst_init(void);
-int __init sfi_parse_mrtc(struct sfi_table_header *table);
+extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
+extern int sfi_mrtc_num;
+extern struct sfi_rtc_table_entry sfi_mrtc_array[];
 
 
 /*
 /*
  * Medfield is the follow-up of Moorestown, it combines two chip solution into
  * Medfield is the follow-up of Moorestown, it combines two chip solution into
@@ -50,4 +52,14 @@ extern void mrst_early_console_init(void);
 
 
 extern struct console early_hsu_console;
 extern struct console early_hsu_console;
 extern void hsu_early_console_init(void);
 extern void hsu_early_console_init(void);
+
+extern void intel_scu_devices_create(void);
+extern void intel_scu_devices_destroy(void);
+
+/* VRTC timer */
+#define MRST_VRTC_MAP_SZ	(1024)
+/*#define MRST_VRTC_PGOFFSET	(0xc00) */
+
+extern void mrst_rtc_init(void);
+
 #endif /* _ASM_X86_MRST_H */
 #endif /* _ASM_X86_MRST_H */

+ 12 - 0
arch/x86/include/asm/msr-index.h

@@ -257,6 +257,18 @@
 #define PACKAGE_THERM_INT_LOW_ENABLE		(1 << 1)
 #define PACKAGE_THERM_INT_LOW_ENABLE		(1 << 1)
 #define PACKAGE_THERM_INT_PLN_ENABLE		(1 << 24)
 #define PACKAGE_THERM_INT_PLN_ENABLE		(1 << 24)
 
 
+/* Thermal Thresholds Support */
+#define THERM_INT_THRESHOLD0_ENABLE    (1 << 15)
+#define THERM_SHIFT_THRESHOLD0        8
+#define THERM_MASK_THRESHOLD0          (0x7f << THERM_SHIFT_THRESHOLD0)
+#define THERM_INT_THRESHOLD1_ENABLE    (1 << 23)
+#define THERM_SHIFT_THRESHOLD1        16
+#define THERM_MASK_THRESHOLD1          (0x7f << THERM_SHIFT_THRESHOLD1)
+#define THERM_STATUS_THRESHOLD0        (1 << 6)
+#define THERM_LOG_THRESHOLD0           (1 << 7)
+#define THERM_STATUS_THRESHOLD1        (1 << 8)
+#define THERM_LOG_THRESHOLD1           (1 << 9)
+
 /* MISC_ENABLE bits: architectural */
 /* MISC_ENABLE bits: architectural */
 #define MSR_IA32_MISC_ENABLE_FAST_STRING	(1ULL << 0)
 #define MSR_IA32_MISC_ENABLE_FAST_STRING	(1ULL << 0)
 #define MSR_IA32_MISC_ENABLE_TCC		(1ULL << 1)
 #define MSR_IA32_MISC_ENABLE_TCC		(1ULL << 1)

+ 1 - 1
arch/x86/include/asm/paravirt.h

@@ -112,7 +112,7 @@ static inline void arch_safe_halt(void)
 
 
 static inline void halt(void)
 static inline void halt(void)
 {
 {
-	PVOP_VCALL0(pv_irq_ops.safe_halt);
+	PVOP_VCALL0(pv_irq_ops.halt);
 }
 }
 
 
 static inline void wbinvd(void)
 static inline void wbinvd(void)

+ 1 - 0
arch/x86/include/asm/pci.h

@@ -65,6 +65,7 @@ extern unsigned long pci_mem_start;
 
 
 #define PCIBIOS_MIN_CARDBUS_IO	0x4000
 #define PCIBIOS_MIN_CARDBUS_IO	0x4000
 
 
+extern int pcibios_enabled;
 void pcibios_config_init(void);
 void pcibios_config_init(void);
 struct pci_bus *pcibios_scan_root(int bus);
 struct pci_bus *pcibios_scan_root(int bus);
 
 

+ 6 - 0
arch/x86/include/asm/setup.h

@@ -53,6 +53,12 @@ extern void x86_mrst_early_setup(void);
 static inline void x86_mrst_early_setup(void) { }
 static inline void x86_mrst_early_setup(void) { }
 #endif
 #endif
 
 
+#ifdef CONFIG_X86_INTEL_CE
+extern void x86_ce4100_early_setup(void);
+#else
+static inline void x86_ce4100_early_setup(void) { }
+#endif
+
 #ifndef _SETUP
 #ifndef _SETUP
 
 
 /*
 /*

+ 5 - 4
arch/x86/include/asm/uv/uv_bau.h

@@ -26,20 +26,22 @@
  * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
  * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
  * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
  * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
  *
  *
- * We will use 31 sets, one for sending BAU messages from each of the 32
+ * We will use one set for sending BAU messages from each of the
  * cpu's on the uvhub.
  * cpu's on the uvhub.
  *
  *
  * TLB shootdown will use the first of the 8 descriptors of each set.
  * TLB shootdown will use the first of the 8 descriptors of each set.
  * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
  * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
  */
  */
 
 
+#define MAX_CPUS_PER_UVHUB		64
+#define MAX_CPUS_PER_SOCKET		32
+#define UV_ADP_SIZE			64 /* hardware-provided max. */
+#define UV_CPUS_PER_ACT_STATUS		32 /* hardware-provided max. */
 #define UV_ITEMS_PER_DESCRIPTOR		8
 #define UV_ITEMS_PER_DESCRIPTOR		8
 /* the 'throttle' to prevent the hardware stay-busy bug */
 /* the 'throttle' to prevent the hardware stay-busy bug */
 #define MAX_BAU_CONCURRENT		3
 #define MAX_BAU_CONCURRENT		3
-#define UV_CPUS_PER_ACT_STATUS		32
 #define UV_ACT_STATUS_MASK		0x3
 #define UV_ACT_STATUS_MASK		0x3
 #define UV_ACT_STATUS_SIZE		2
 #define UV_ACT_STATUS_SIZE		2
-#define UV_ADP_SIZE			32
 #define UV_DISTRIBUTION_SIZE		256
 #define UV_DISTRIBUTION_SIZE		256
 #define UV_SW_ACK_NPENDING		8
 #define UV_SW_ACK_NPENDING		8
 #define UV_NET_ENDPOINT_INTD		0x38
 #define UV_NET_ENDPOINT_INTD		0x38
@@ -100,7 +102,6 @@
  * number of destination side software ack resources
  * number of destination side software ack resources
  */
  */
 #define DEST_NUM_RESOURCES		8
 #define DEST_NUM_RESOURCES		8
-#define MAX_CPUS_PER_NODE		32
 /*
 /*
  * completion statuses for sending a TLB flush message
  * completion statuses for sending a TLB flush message
  */
  */

+ 0 - 1
arch/x86/kernel/Makefile

@@ -85,7 +85,6 @@ obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_VM86)		+= vm86_32.o
 obj-$(CONFIG_VM86)		+= vm86_32.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
-obj-$(CONFIG_EARLY_PRINTK_MRST)	+= early_printk_mrst.o
 
 
 obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
 obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
 obj-$(CONFIG_APB_TIMER)		+= apb_timer.o
 obj-$(CONFIG_APB_TIMER)		+= apb_timer.o

+ 8 - 3
arch/x86/kernel/acpi/boot.c

@@ -198,6 +198,11 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
 {
 {
 	unsigned int ver = 0;
 	unsigned int ver = 0;
 
 
+	if (id >= (MAX_LOCAL_APIC-1)) {
+		printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
+		return;
+	}
+
 	if (!enabled) {
 	if (!enabled) {
 		++disabled_cpus;
 		++disabled_cpus;
 		return;
 		return;
@@ -910,13 +915,13 @@ static int __init acpi_parse_madt_lapic_entries(void)
 	acpi_register_lapic_address(acpi_lapic_addr);
 	acpi_register_lapic_address(acpi_lapic_addr);
 
 
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
-				      acpi_parse_sapic, MAX_APICS);
+				      acpi_parse_sapic, MAX_LOCAL_APIC);
 
 
 	if (!count) {
 	if (!count) {
 		x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
 		x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
-						acpi_parse_x2apic, MAX_APICS);
+					acpi_parse_x2apic, MAX_LOCAL_APIC);
 		count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
 		count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
-					      acpi_parse_lapic, MAX_APICS);
+					acpi_parse_lapic, MAX_LOCAL_APIC);
 	}
 	}
 	if (!count && !x2count) {
 	if (!count && !x2count) {
 		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
 		printk(KERN_ERR PREFIX "No LAPIC entries present\n");

+ 2 - 1
arch/x86/kernel/alternative.c

@@ -353,6 +353,7 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
 	mutex_unlock(&smp_alt);
 	mutex_unlock(&smp_alt);
 }
 }
 
 
+bool skip_smp_alternatives;
 void alternatives_smp_switch(int smp)
 void alternatives_smp_switch(int smp)
 {
 {
 	struct smp_alt_module *mod;
 	struct smp_alt_module *mod;
@@ -368,7 +369,7 @@ void alternatives_smp_switch(int smp)
 	printk("lockdep: fixing up alternatives.\n");
 	printk("lockdep: fixing up alternatives.\n");
 #endif
 #endif
 
 
-	if (noreplace_smp || smp_alt_once)
+	if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
 		return;
 		return;
 	BUG_ON(!smp && (num_online_cpus() > 1));
 	BUG_ON(!smp && (num_online_cpus() > 1));
 
 

+ 80 - 55
arch/x86/kernel/amd_nb.c

@@ -12,95 +12,116 @@
 
 
 static u32 *flush_words;
 static u32 *flush_words;
 
 
-struct pci_device_id k8_nb_ids[] = {
+struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
 	{}
 	{}
 };
 };
-EXPORT_SYMBOL(k8_nb_ids);
+EXPORT_SYMBOL(amd_nb_misc_ids);
 
 
-struct k8_northbridge_info k8_northbridges;
-EXPORT_SYMBOL(k8_northbridges);
+struct amd_northbridge_info amd_northbridges;
+EXPORT_SYMBOL(amd_northbridges);
 
 
-static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
+static struct pci_dev *next_northbridge(struct pci_dev *dev,
+					struct pci_device_id *ids)
 {
 {
 	do {
 	do {
 		dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
 		dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
 		if (!dev)
 		if (!dev)
 			break;
 			break;
-	} while (!pci_match_id(&k8_nb_ids[0], dev));
+	} while (!pci_match_id(ids, dev));
 	return dev;
 	return dev;
 }
 }
 
 
-int cache_k8_northbridges(void)
+int amd_cache_northbridges(void)
 {
 {
-	int i;
-	struct pci_dev *dev;
+	int i = 0;
+	struct amd_northbridge *nb;
+	struct pci_dev *misc;
 
 
-	if (k8_northbridges.num)
+	if (amd_nb_num())
 		return 0;
 		return 0;
 
 
-	dev = NULL;
-	while ((dev = next_k8_northbridge(dev)) != NULL)
-		k8_northbridges.num++;
+	misc = NULL;
+	while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
+		i++;
 
 
-	/* some CPU families (e.g. family 0x11) do not support GART */
-	if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
-	    boot_cpu_data.x86 == 0x15)
-		k8_northbridges.gart_supported = 1;
+	if (i == 0)
+		return 0;
 
 
-	k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
-					  sizeof(void *), GFP_KERNEL);
-	if (!k8_northbridges.nb_misc)
+	nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
+	if (!nb)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	if (!k8_northbridges.num) {
-		k8_northbridges.nb_misc[0] = NULL;
-		return 0;
-	}
+	amd_northbridges.nb = nb;
+	amd_northbridges.num = i;
 
 
-	if (k8_northbridges.gart_supported) {
-		flush_words = kmalloc(k8_northbridges.num * sizeof(u32),
-				      GFP_KERNEL);
-		if (!flush_words) {
-			kfree(k8_northbridges.nb_misc);
-			return -ENOMEM;
-		}
-	}
+	misc = NULL;
+	for (i = 0; i != amd_nb_num(); i++) {
+		node_to_amd_nb(i)->misc = misc =
+			next_northbridge(misc, amd_nb_misc_ids);
+        }
+
+	/* some CPU families (e.g. family 0x11) do not support GART */
+	if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
+	    boot_cpu_data.x86 == 0x15)
+		amd_northbridges.flags |= AMD_NB_GART;
+
+	/*
+	 * Some CPU families support L3 Cache Index Disable. There are some
+	 * limitations because of E382 and E388 on family 0x10.
+	 */
+	if (boot_cpu_data.x86 == 0x10 &&
+	    boot_cpu_data.x86_model >= 0x8 &&
+	    (boot_cpu_data.x86_model > 0x9 ||
+	     boot_cpu_data.x86_mask >= 0x1))
+		amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
 
 
-	dev = NULL;
-	i = 0;
-	while ((dev = next_k8_northbridge(dev)) != NULL) {
-		k8_northbridges.nb_misc[i] = dev;
-		if (k8_northbridges.gart_supported)
-			pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
-	}
-	k8_northbridges.nb_misc[i] = NULL;
 	return 0;
 	return 0;
 }
 }
-EXPORT_SYMBOL_GPL(cache_k8_northbridges);
+EXPORT_SYMBOL_GPL(amd_cache_northbridges);
 
 
 /* Ignores subdevice/subvendor but as far as I can figure out
 /* Ignores subdevice/subvendor but as far as I can figure out
    they're useless anyways */
    they're useless anyways */
-int __init early_is_k8_nb(u32 device)
+int __init early_is_amd_nb(u32 device)
 {
 {
 	struct pci_device_id *id;
 	struct pci_device_id *id;
 	u32 vendor = device & 0xffff;
 	u32 vendor = device & 0xffff;
 	device >>= 16;
 	device >>= 16;
-	for (id = k8_nb_ids; id->vendor; id++)
+	for (id = amd_nb_misc_ids; id->vendor; id++)
 		if (vendor == id->vendor && device == id->device)
 		if (vendor == id->vendor && device == id->device)
 			return 1;
 			return 1;
 	return 0;
 	return 0;
 }
 }
 
 
-void k8_flush_garts(void)
+int amd_cache_gart(void)
+{
+       int i;
+
+       if (!amd_nb_has_feature(AMD_NB_GART))
+               return 0;
+
+       flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
+       if (!flush_words) {
+               amd_northbridges.flags &= ~AMD_NB_GART;
+               return -ENOMEM;
+       }
+
+       for (i = 0; i != amd_nb_num(); i++)
+               pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
+                                     &flush_words[i]);
+
+       return 0;
+}
+
+void amd_flush_garts(void)
 {
 {
 	int flushed, i;
 	int flushed, i;
 	unsigned long flags;
 	unsigned long flags;
 	static DEFINE_SPINLOCK(gart_lock);
 	static DEFINE_SPINLOCK(gart_lock);
 
 
-	if (!k8_northbridges.gart_supported)
+	if (!amd_nb_has_feature(AMD_NB_GART))
 		return;
 		return;
 
 
 	/* Avoid races between AGP and IOMMU. In theory it's not needed
 	/* Avoid races between AGP and IOMMU. In theory it's not needed
@@ -109,16 +130,16 @@ void k8_flush_garts(void)
 	   that it doesn't matter to serialize more. -AK */
 	   that it doesn't matter to serialize more. -AK */
 	spin_lock_irqsave(&gart_lock, flags);
 	spin_lock_irqsave(&gart_lock, flags);
 	flushed = 0;
 	flushed = 0;
-	for (i = 0; i < k8_northbridges.num; i++) {
-		pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c,
-				       flush_words[i]|1);
+	for (i = 0; i < amd_nb_num(); i++) {
+		pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
+				       flush_words[i] | 1);
 		flushed++;
 		flushed++;
 	}
 	}
-	for (i = 0; i < k8_northbridges.num; i++) {
+	for (i = 0; i < amd_nb_num(); i++) {
 		u32 w;
 		u32 w;
 		/* Make sure the hardware actually executed the flush*/
 		/* Make sure the hardware actually executed the flush*/
 		for (;;) {
 		for (;;) {
-			pci_read_config_dword(k8_northbridges.nb_misc[i],
+			pci_read_config_dword(node_to_amd_nb(i)->misc,
 					      0x9c, &w);
 					      0x9c, &w);
 			if (!(w & 1))
 			if (!(w & 1))
 				break;
 				break;
@@ -129,19 +150,23 @@ void k8_flush_garts(void)
 	if (!flushed)
 	if (!flushed)
 		printk("nothing to flush?\n");
 		printk("nothing to flush?\n");
 }
 }
-EXPORT_SYMBOL_GPL(k8_flush_garts);
+EXPORT_SYMBOL_GPL(amd_flush_garts);
 
 
-static __init int init_k8_nbs(void)
+static __init int init_amd_nbs(void)
 {
 {
 	int err = 0;
 	int err = 0;
 
 
-	err = cache_k8_northbridges();
+	err = amd_cache_northbridges();
 
 
 	if (err < 0)
 	if (err < 0)
-		printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n");
+		printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
+
+	if (amd_cache_gart() < 0)
+		printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
+		       "GART support disabled.\n");
 
 
 	return err;
 	return err;
 }
 }
 
 
 /* This has to go after the PCI subsystem */
 /* This has to go after the PCI subsystem */
-fs_initcall(init_k8_nbs);
+fs_initcall(init_amd_nbs);

+ 1 - 0
arch/x86/kernel/apb_timer.c

@@ -315,6 +315,7 @@ static void apbt_setup_irq(struct apbt_dev *adev)
 
 
 	if (system_state == SYSTEM_BOOTING) {
 	if (system_state == SYSTEM_BOOTING) {
 		irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
 		irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
+		irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
 		/* APB timer irqs are set up as mp_irqs, timer is edge type */
 		/* APB timer irqs are set up as mp_irqs, timer is edge type */
 		__set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
 		__set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
 		if (request_irq(adev->irq, apbt_interrupt_handler,
 		if (request_irq(adev->irq, apbt_interrupt_handler,

+ 5 - 5
arch/x86/kernel/aperture_64.c

@@ -206,7 +206,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
  * Do an PCI bus scan by hand because we're running before the PCI
  * Do an PCI bus scan by hand because we're running before the PCI
  * subsystem.
  * subsystem.
  *
  *
- * All K8 AGP bridges are AGPv3 compliant, so we can do this scan
+ * All AMD AGP bridges are AGPv3 compliant, so we can do this scan
  * generically. It's probably overkill to always scan all slots because
  * generically. It's probably overkill to always scan all slots because
  * the AGP bridges should be always an own bus on the HT hierarchy,
  * the AGP bridges should be always an own bus on the HT hierarchy,
  * but do it here for future safety.
  * but do it here for future safety.
@@ -303,7 +303,7 @@ void __init early_gart_iommu_check(void)
 		dev_limit = bus_dev_ranges[i].dev_limit;
 		dev_limit = bus_dev_ranges[i].dev_limit;
 
 
 		for (slot = dev_base; slot < dev_limit; slot++) {
 		for (slot = dev_base; slot < dev_limit; slot++) {
-			if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+			if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
 				continue;
 				continue;
 
 
 			ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
 			ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -358,7 +358,7 @@ void __init early_gart_iommu_check(void)
 		dev_limit = bus_dev_ranges[i].dev_limit;
 		dev_limit = bus_dev_ranges[i].dev_limit;
 
 
 		for (slot = dev_base; slot < dev_limit; slot++) {
 		for (slot = dev_base; slot < dev_limit; slot++) {
-			if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+			if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
 				continue;
 				continue;
 
 
 			ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
 			ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -400,7 +400,7 @@ int __init gart_iommu_hole_init(void)
 		dev_limit = bus_dev_ranges[i].dev_limit;
 		dev_limit = bus_dev_ranges[i].dev_limit;
 
 
 		for (slot = dev_base; slot < dev_limit; slot++) {
 		for (slot = dev_base; slot < dev_limit; slot++) {
-			if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+			if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
 				continue;
 				continue;
 
 
 			iommu_detected = 1;
 			iommu_detected = 1;
@@ -518,7 +518,7 @@ out:
 		dev_base = bus_dev_ranges[i].dev_base;
 		dev_base = bus_dev_ranges[i].dev_base;
 		dev_limit = bus_dev_ranges[i].dev_limit;
 		dev_limit = bus_dev_ranges[i].dev_limit;
 		for (slot = dev_base; slot < dev_limit; slot++) {
 		for (slot = dev_base; slot < dev_limit; slot++) {
-			if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+			if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
 				continue;
 				continue;
 
 
 			write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
 			write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);

+ 63 - 41
arch/x86/kernel/apic/apic.c

@@ -431,17 +431,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
 	reserved = reserve_eilvt_offset(offset, new);
 	reserved = reserve_eilvt_offset(offset, new);
 
 
 	if (reserved != new) {
 	if (reserved != new) {
-		pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but "
-		       "vector 0x%x was already reserved by another core, "
-		       "APIC%lX=0x%x\n",
-		       smp_processor_id(), new, reserved, reg, old);
+		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+		       "vector 0x%x, but the register is already in use for "
+		       "vector 0x%x on another cpu\n",
+		       smp_processor_id(), reg, offset, new, reserved);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
 	if (!eilvt_entry_is_changeable(old, new)) {
 	if (!eilvt_entry_is_changeable(old, new)) {
-		pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but "
-		       "register already in use, APIC%lX=0x%x\n",
-		       smp_processor_id(), new, reg, old);
+		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+		       "vector 0x%x, but the register is already in use for "
+		       "vector 0x%x on this cpu\n",
+		       smp_processor_id(), reg, offset, new, old);
 		return -EBUSY;
 		return -EBUSY;
 	}
 	}
 
 
@@ -1532,13 +1533,60 @@ static int __init detect_init_APIC(void)
 	return 0;
 	return 0;
 }
 }
 #else
 #else
+
+static int apic_verify(void)
+{
+	u32 features, h, l;
+
+	/*
+	 * The APIC feature bit should now be enabled
+	 * in `cpuid'
+	 */
+	features = cpuid_edx(1);
+	if (!(features & (1 << X86_FEATURE_APIC))) {
+		pr_warning("Could not enable APIC!\n");
+		return -1;
+	}
+	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+	/* The BIOS may have set up the APIC at some other address */
+	rdmsr(MSR_IA32_APICBASE, l, h);
+	if (l & MSR_IA32_APICBASE_ENABLE)
+		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+
+	pr_info("Found and enabled local APIC!\n");
+	return 0;
+}
+
+int apic_force_enable(void)
+{
+	u32 h, l;
+
+	if (disable_apic)
+		return -1;
+
+	/*
+	 * Some BIOSes disable the local APIC in the APIC_BASE
+	 * MSR. This can only be done in software for Intel P6 or later
+	 * and AMD K7 (Model > 1) or later.
+	 */
+	rdmsr(MSR_IA32_APICBASE, l, h);
+	if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+		pr_info("Local APIC disabled by BIOS -- reenabling.\n");
+		l &= ~MSR_IA32_APICBASE_BASE;
+		l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+		wrmsr(MSR_IA32_APICBASE, l, h);
+		enabled_via_apicbase = 1;
+	}
+	return apic_verify();
+}
+
 /*
 /*
  * Detect and initialize APIC
  * Detect and initialize APIC
  */
  */
 static int __init detect_init_APIC(void)
 static int __init detect_init_APIC(void)
 {
 {
-	u32 h, l, features;
-
 	/* Disabled by kernel option? */
 	/* Disabled by kernel option? */
 	if (disable_apic)
 	if (disable_apic)
 		return -1;
 		return -1;
@@ -1568,38 +1616,12 @@ static int __init detect_init_APIC(void)
 				"you can enable it with \"lapic\"\n");
 				"you can enable it with \"lapic\"\n");
 			return -1;
 			return -1;
 		}
 		}
-		/*
-		 * Some BIOSes disable the local APIC in the APIC_BASE
-		 * MSR. This can only be done in software for Intel P6 or later
-		 * and AMD K7 (Model > 1) or later.
-		 */
-		rdmsr(MSR_IA32_APICBASE, l, h);
-		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
-			pr_info("Local APIC disabled by BIOS -- reenabling.\n");
-			l &= ~MSR_IA32_APICBASE_BASE;
-			l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
-			wrmsr(MSR_IA32_APICBASE, l, h);
-			enabled_via_apicbase = 1;
-		}
-	}
-	/*
-	 * The APIC feature bit should now be enabled
-	 * in `cpuid'
-	 */
-	features = cpuid_edx(1);
-	if (!(features & (1 << X86_FEATURE_APIC))) {
-		pr_warning("Could not enable APIC!\n");
-		return -1;
+		if (apic_force_enable())
+			return -1;
+	} else {
+		if (apic_verify())
+			return -1;
 	}
 	}
-	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
-	/* The BIOS may have set up the APIC at some other address */
-	rdmsr(MSR_IA32_APICBASE, l, h);
-	if (l & MSR_IA32_APICBASE_ENABLE)
-		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
-
-	pr_info("Found and enabled local APIC!\n");
 
 
 	apic_pm_activate();
 	apic_pm_activate();
 
 
@@ -1687,7 +1709,7 @@ void __init init_apic_mappings(void)
  * This initializes the IO-APIC and APIC hardware if this is
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  * a UP kernel.
  */
  */
-int apic_version[MAX_APICS];
+int apic_version[MAX_LOCAL_APIC];
 
 
 int __init APIC_init_uniprocessor(void)
 int __init APIC_init_uniprocessor(void)
 {
 {

+ 22 - 15
arch/x86/kernel/apic/io_apic.c

@@ -1933,8 +1933,7 @@ void disable_IO_APIC(void)
  *
  *
  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
  */
  */
-
-void __init setup_ioapic_ids_from_mpc(void)
+void __init setup_ioapic_ids_from_mpc_nocheck(void)
 {
 {
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_00 reg_00;
 	physid_mask_t phys_id_present_map;
 	physid_mask_t phys_id_present_map;
@@ -1943,15 +1942,6 @@ void __init setup_ioapic_ids_from_mpc(void)
 	unsigned char old_id;
 	unsigned char old_id;
 	unsigned long flags;
 	unsigned long flags;
 
 
-	if (acpi_ioapic)
-		return;
-	/*
-	 * Don't check I/O APIC IDs for xAPIC systems.  They have
-	 * no meaning without the serial APIC bus.
-	 */
-	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		return;
 	/*
 	/*
 	 * This is broken; anything with a real cpu count has to
 	 * This is broken; anything with a real cpu count has to
 	 * circumvent this idiocy regardless.
 	 * circumvent this idiocy regardless.
@@ -2005,7 +1995,6 @@ void __init setup_ioapic_ids_from_mpc(void)
 			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 		}
 		}
 
 
-
 		/*
 		/*
 		 * We need to adjust the IRQ routing table
 		 * We need to adjust the IRQ routing table
 		 * if the ID changed.
 		 * if the ID changed.
@@ -2041,6 +2030,21 @@ void __init setup_ioapic_ids_from_mpc(void)
 			apic_printk(APIC_VERBOSE, " ok.\n");
 			apic_printk(APIC_VERBOSE, " ok.\n");
 	}
 	}
 }
 }
+
+void __init setup_ioapic_ids_from_mpc(void)
+{
+
+	if (acpi_ioapic)
+		return;
+	/*
+	 * Don't check I/O APIC IDs for xAPIC systems.  They have
+	 * no meaning without the serial APIC bus.
+	 */
+	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return;
+	setup_ioapic_ids_from_mpc_nocheck();
+}
 #endif
 #endif
 
 
 int no_timer_check __initdata;
 int no_timer_check __initdata;
@@ -3593,7 +3597,7 @@ int __init io_apic_get_redir_entries (int ioapic)
 	return reg_01.bits.entries + 1;
 	return reg_01.bits.entries + 1;
 }
 }
 
 
-void __init probe_nr_irqs_gsi(void)
+static void __init probe_nr_irqs_gsi(void)
 {
 {
 	int nr;
 	int nr;
 
 
@@ -3910,7 +3914,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
 	return res;
 	return res;
 }
 }
 
 
-void __init ioapic_init_mappings(void)
+void __init ioapic_and_gsi_init(void)
 {
 {
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
 	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
 	struct resource *ioapic_res;
 	struct resource *ioapic_res;
@@ -3948,6 +3952,8 @@ fake_ioapic_page:
 		ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
 		ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
 		ioapic_res++;
 		ioapic_res++;
 	}
 	}
+
+	probe_nr_irqs_gsi();
 }
 }
 
 
 void __init ioapic_insert_resources(void)
 void __init ioapic_insert_resources(void)
@@ -4057,7 +4063,8 @@ void __init pre_init_apic_IRQ0(void)
 
 
 	printk(KERN_INFO "Early APIC setup for system timer0\n");
 	printk(KERN_INFO "Early APIC setup for system timer0\n");
 #ifndef CONFIG_SMP
 #ifndef CONFIG_SMP
-	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+	physid_set_mask_of_physid(boot_cpu_physical_apicid,
+					 &phys_cpu_present_map);
 #endif
 #endif
 	/* Make sure the irq descriptor is set up */
 	/* Make sure the irq descriptor is set up */
 	cfg = alloc_irq_and_cfg_at(0, 0);
 	cfg = alloc_irq_and_cfg_at(0, 0);

+ 34 - 27
arch/x86/kernel/apic/x2apic_uv_x.c

@@ -48,6 +48,16 @@ unsigned int uv_apicid_hibits;
 EXPORT_SYMBOL_GPL(uv_apicid_hibits);
 EXPORT_SYMBOL_GPL(uv_apicid_hibits);
 static DEFINE_SPINLOCK(uv_nmi_lock);
 static DEFINE_SPINLOCK(uv_nmi_lock);
 
 
+static unsigned long __init uv_early_read_mmr(unsigned long addr)
+{
+	unsigned long val, *mmr;
+
+	mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
+	val = *mmr;
+	early_iounmap(mmr, sizeof(*mmr));
+	return val;
+}
+
 static inline bool is_GRU_range(u64 start, u64 end)
 static inline bool is_GRU_range(u64 start, u64 end)
 {
 {
 	return start >= gru_start_paddr && end <= gru_end_paddr;
 	return start >= gru_start_paddr && end <= gru_end_paddr;
@@ -58,28 +68,24 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
 	return is_ISA_range(start, end) || is_GRU_range(start, end);
 	return is_ISA_range(start, end) || is_GRU_range(start, end);
 }
 }
 
 
-static int early_get_nodeid(void)
+static int __init early_get_pnodeid(void)
 {
 {
 	union uvh_node_id_u node_id;
 	union uvh_node_id_u node_id;
-	unsigned long *mmr;
-
-	mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
-	node_id.v = *mmr;
-	early_iounmap(mmr, sizeof(*mmr));
+	union uvh_rh_gam_config_mmr_u  m_n_config;
+	int pnode;
 
 
 	/* Currently, all blades have same revision number */
 	/* Currently, all blades have same revision number */
+	node_id.v = uv_early_read_mmr(UVH_NODE_ID);
+	m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
 	uv_min_hub_revision_id = node_id.s.revision;
 	uv_min_hub_revision_id = node_id.s.revision;
 
 
-	return node_id.s.node_id;
+	pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
+	return pnode;
 }
 }
 
 
 static void __init early_get_apic_pnode_shift(void)
 static void __init early_get_apic_pnode_shift(void)
 {
 {
-	unsigned long *mmr;
-
-	mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr));
-	uvh_apicid.v = *mmr;
-	early_iounmap(mmr, sizeof(*mmr));
+	uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
 	if (!uvh_apicid.v)
 	if (!uvh_apicid.v)
 		/*
 		/*
 		 * Old bios, use default value
 		 * Old bios, use default value
@@ -95,21 +101,17 @@ static void __init early_get_apic_pnode_shift(void)
 static void __init uv_set_apicid_hibit(void)
 static void __init uv_set_apicid_hibit(void)
 {
 {
 	union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
 	union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
-	unsigned long *mmr;
 
 
-	mmr = early_ioremap(UV_LOCAL_MMR_BASE |
-		UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
-	apicid_mask.v = *mmr;
-	early_iounmap(mmr, sizeof(*mmr));
+	apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK);
 	uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
 	uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
 }
 }
 
 
 static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 {
-	int nodeid;
+	int pnodeid;
 
 
 	if (!strcmp(oem_id, "SGI")) {
 	if (!strcmp(oem_id, "SGI")) {
-		nodeid = early_get_nodeid();
+		pnodeid = early_get_pnodeid();
 		early_get_apic_pnode_shift();
 		early_get_apic_pnode_shift();
 		x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
 		x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
 		x86_platform.nmi_init = uv_nmi_init;
 		x86_platform.nmi_init = uv_nmi_init;
@@ -119,7 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 			uv_system_type = UV_X2APIC;
 			uv_system_type = UV_X2APIC;
 		else if (!strcmp(oem_table_id, "UVH")) {
 		else if (!strcmp(oem_table_id, "UVH")) {
 			__get_cpu_var(x2apic_extra_bits) =
 			__get_cpu_var(x2apic_extra_bits) =
-				nodeid << (uvh_apicid.s.pnode_shift - 1);
+				pnodeid << uvh_apicid.s.pnode_shift;
 			uv_system_type = UV_NON_UNIQUE_APIC;
 			uv_system_type = UV_NON_UNIQUE_APIC;
 			uv_set_apicid_hibit();
 			uv_set_apicid_hibit();
 			return 1;
 			return 1;
@@ -682,27 +684,32 @@ void uv_nmi_init(void)
 void __init uv_system_init(void)
 void __init uv_system_init(void)
 {
 {
 	union uvh_rh_gam_config_mmr_u  m_n_config;
 	union uvh_rh_gam_config_mmr_u  m_n_config;
+	union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
 	union uvh_node_id_u node_id;
 	union uvh_node_id_u node_id;
 	unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
 	unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
-	int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+	int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io;
 	int gnode_extra, max_pnode = 0;
 	int gnode_extra, max_pnode = 0;
 	unsigned long mmr_base, present, paddr;
 	unsigned long mmr_base, present, paddr;
-	unsigned short pnode_mask;
+	unsigned short pnode_mask, pnode_io_mask;
 
 
 	map_low_mmrs();
 	map_low_mmrs();
 
 
 	m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
 	m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
 	m_val = m_n_config.s.m_skt;
 	m_val = m_n_config.s.m_skt;
 	n_val = m_n_config.s.n_skt;
 	n_val = m_n_config.s.n_skt;
+	mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
+	n_io = mmioh.s.n_io;
 	mmr_base =
 	mmr_base =
 	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
 	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
 	    ~UV_MMR_ENABLE;
 	    ~UV_MMR_ENABLE;
 	pnode_mask = (1 << n_val) - 1;
 	pnode_mask = (1 << n_val) - 1;
+	pnode_io_mask = (1 << n_io) - 1;
+
 	node_id.v = uv_read_local_mmr(UVH_NODE_ID);
 	node_id.v = uv_read_local_mmr(UVH_NODE_ID);
 	gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
 	gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
 	gnode_upper = ((unsigned long)gnode_extra  << m_val);
 	gnode_upper = ((unsigned long)gnode_extra  << m_val);
-	printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n",
-			n_val, m_val, gnode_upper, gnode_extra);
+	printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n",
+			n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask);
 
 
 	printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
 	printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
 
 
@@ -735,7 +742,7 @@ void __init uv_system_init(void)
 		for (j = 0; j < 64; j++) {
 		for (j = 0; j < 64; j++) {
 			if (!test_bit(j, &present))
 			if (!test_bit(j, &present))
 				continue;
 				continue;
-			pnode = (i * 64 + j);
+			pnode = (i * 64 + j) & pnode_mask;
 			uv_blade_info[blade].pnode = pnode;
 			uv_blade_info[blade].pnode = pnode;
 			uv_blade_info[blade].nr_possible_cpus = 0;
 			uv_blade_info[blade].nr_possible_cpus = 0;
 			uv_blade_info[blade].nr_online_cpus = 0;
 			uv_blade_info[blade].nr_online_cpus = 0;
@@ -756,6 +763,7 @@ void __init uv_system_init(void)
 		/*
 		/*
 		 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
 		 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
 		 */
 		 */
+		uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
 		uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
 		uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
 		pnode = uv_apicid_to_pnode(apicid);
 		pnode = uv_apicid_to_pnode(apicid);
 		blade = boot_pnode_to_blade(pnode);
 		blade = boot_pnode_to_blade(pnode);
@@ -772,7 +780,6 @@ void __init uv_system_init(void)
 		uv_cpu_hub_info(cpu)->numa_blade_id = blade;
 		uv_cpu_hub_info(cpu)->numa_blade_id = blade;
 		uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
 		uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
 		uv_cpu_hub_info(cpu)->pnode = pnode;
 		uv_cpu_hub_info(cpu)->pnode = pnode;
-		uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
 		uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
 		uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
 		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
 		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
 		uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
 		uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
@@ -796,7 +803,7 @@ void __init uv_system_init(void)
 
 
 	map_gru_high(max_pnode);
 	map_gru_high(max_pnode);
 	map_mmr_high(max_pnode);
 	map_mmr_high(max_pnode);
-	map_mmioh_high(max_pnode);
+	map_mmioh_high(max_pnode & pnode_io_mask);
 
 
 	uv_cpu_init();
 	uv_cpu_init();
 	uv_scir_register_cpu_notifier();
 	uv_scir_register_cpu_notifier();

+ 63 - 84
arch/x86/kernel/cpu/intel_cacheinfo.c

@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx {
 };
 };
 
 
 struct amd_l3_cache {
 struct amd_l3_cache {
-	struct	 pci_dev *dev;
-	bool	 can_disable;
+	struct	 amd_northbridge *nb;
 	unsigned indices;
 	unsigned indices;
 	u8	 subcaches[4];
 	u8	 subcaches[4];
 };
 };
@@ -311,14 +310,12 @@ struct _cache_attr {
 /*
 /*
  * L3 cache descriptors
  * L3 cache descriptors
  */
  */
-static struct amd_l3_cache **__cpuinitdata l3_caches;
-
 static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 {
 {
 	unsigned int sc0, sc1, sc2, sc3;
 	unsigned int sc0, sc1, sc2, sc3;
 	u32 val = 0;
 	u32 val = 0;
 
 
-	pci_read_config_dword(l3->dev, 0x1C4, &val);
+	pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
 
 
 	/* calculate subcache sizes */
 	/* calculate subcache sizes */
 	l3->subcaches[0] = sc0 = !(val & BIT(0));
 	l3->subcaches[0] = sc0 = !(val & BIT(0));
@@ -330,47 +327,14 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 }
 }
 
 
-static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)
-{
-	struct amd_l3_cache *l3;
-	struct pci_dev *dev = node_to_k8_nb_misc(node);
-
-	l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
-	if (!l3) {
-		printk(KERN_WARNING "Error allocating L3 struct\n");
-		return NULL;
-	}
-
-	l3->dev = dev;
-
-	amd_calc_l3_indices(l3);
-
-	return l3;
-}
-
-static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
-					   int index)
+static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
+					int index)
 {
 {
+	static struct amd_l3_cache *__cpuinitdata l3_caches;
 	int node;
 	int node;
 
 
-	if (boot_cpu_data.x86 != 0x10)
-		return;
-
-	if (index < 3)
-		return;
-
-	/* see errata #382 and #388 */
-	if (boot_cpu_data.x86_model < 0x8)
-		return;
-
-	if ((boot_cpu_data.x86_model == 0x8 ||
-	     boot_cpu_data.x86_model == 0x9)
-		&&
-	     boot_cpu_data.x86_mask < 0x1)
-			return;
-
-	/* not in virtualized environments */
-	if (k8_northbridges.num == 0)
+	/* only for L3, and not in virtualized environments */
+	if (index < 3 || amd_nb_num() == 0)
 		return;
 		return;
 
 
 	/*
 	/*
@@ -378,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
 	 * never freed but this is done only on shutdown so it doesn't matter.
 	 * never freed but this is done only on shutdown so it doesn't matter.
 	 */
 	 */
 	if (!l3_caches) {
 	if (!l3_caches) {
-		int size = k8_northbridges.num * sizeof(struct amd_l3_cache *);
+		int size = amd_nb_num() * sizeof(struct amd_l3_cache);
 
 
 		l3_caches = kzalloc(size, GFP_ATOMIC);
 		l3_caches = kzalloc(size, GFP_ATOMIC);
 		if (!l3_caches)
 		if (!l3_caches)
@@ -387,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
 
 
 	node = amd_get_nb_id(smp_processor_id());
 	node = amd_get_nb_id(smp_processor_id());
 
 
-	if (!l3_caches[node]) {
-		l3_caches[node] = amd_init_l3_cache(node);
-		l3_caches[node]->can_disable = true;
+	if (!l3_caches[node].nb) {
+		l3_caches[node].nb = node_to_amd_nb(node);
+		amd_calc_l3_indices(&l3_caches[node]);
 	}
 	}
 
 
-	WARN_ON(!l3_caches[node]);
-
-	this_leaf->l3 = l3_caches[node];
+	this_leaf->l3 = &l3_caches[node];
 }
 }
 
 
 /*
 /*
@@ -408,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
 {
 {
 	unsigned int reg = 0;
 	unsigned int reg = 0;
 
 
-	pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg);
+	pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
 
 
 	/* check whether this slot is activated already */
 	/* check whether this slot is activated already */
 	if (reg & (3UL << 30))
 	if (reg & (3UL << 30))
@@ -422,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 {
 {
 	int index;
 	int index;
 
 
-	if (!this_leaf->l3 || !this_leaf->l3->can_disable)
+	if (!this_leaf->l3 ||
+	    !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	index = amd_get_l3_disable_slot(this_leaf->l3, slot);
 	index = amd_get_l3_disable_slot(this_leaf->l3, slot);
@@ -457,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
 		if (!l3->subcaches[i])
 		if (!l3->subcaches[i])
 			continue;
 			continue;
 
 
-		pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
+		pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
 
 
 		/*
 		/*
 		 * We need to WBINVD on a core on the node containing the L3
 		 * We need to WBINVD on a core on the node containing the L3
@@ -467,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
 		wbinvd_on_cpu(cpu);
 		wbinvd_on_cpu(cpu);
 
 
 		reg |= BIT(31);
 		reg |= BIT(31);
-		pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
+		pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
 	}
 	}
 }
 }
 
 
@@ -524,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 	if (!capable(CAP_SYS_ADMIN))
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 		return -EPERM;
 
 
-	if (!this_leaf->l3 || !this_leaf->l3->can_disable)
+	if (!this_leaf->l3 ||
+	    !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
 	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -545,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 #define STORE_CACHE_DISABLE(slot)					\
 #define STORE_CACHE_DISABLE(slot)					\
 static ssize_t								\
 static ssize_t								\
 store_cache_disable_##slot(struct _cpuid4_info *this_leaf,		\
 store_cache_disable_##slot(struct _cpuid4_info *this_leaf,		\
-			    const char *buf, size_t count)		\
+			   const char *buf, size_t count)		\
 {									\
 {									\
 	return store_cache_disable(this_leaf, buf, count, slot);	\
 	return store_cache_disable(this_leaf, buf, count, slot);	\
 }
 }
@@ -558,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
 		show_cache_disable_1, store_cache_disable_1);
 		show_cache_disable_1, store_cache_disable_1);
 
 
 #else	/* CONFIG_AMD_NB */
 #else	/* CONFIG_AMD_NB */
-static void __cpuinit
-amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
-{
-};
+#define amd_init_l3_cache(x, y)
 #endif /* CONFIG_AMD_NB */
 #endif /* CONFIG_AMD_NB */
 
 
 static int
 static int
@@ -575,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
 
 
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 		amd_cpuid4(index, &eax, &ebx, &ecx);
 		amd_cpuid4(index, &eax, &ebx, &ecx);
-		amd_check_l3_disable(this_leaf, index);
+		amd_init_l3_cache(this_leaf, index);
 	} else {
 	} else {
 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 	}
 	}
@@ -983,30 +944,48 @@ define_one_ro(size);
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_list);
 define_one_ro(shared_cpu_list);
 
 
-#define DEFAULT_SYSFS_CACHE_ATTRS	\
-	&type.attr,			\
-	&level.attr,			\
-	&coherency_line_size.attr,	\
-	&physical_line_partition.attr,	\
-	&ways_of_associativity.attr,	\
-	&number_of_sets.attr,		\
-	&size.attr,			\
-	&shared_cpu_map.attr,		\
-	&shared_cpu_list.attr
-
 static struct attribute *default_attrs[] = {
 static struct attribute *default_attrs[] = {
-	DEFAULT_SYSFS_CACHE_ATTRS,
+	&type.attr,
+	&level.attr,
+	&coherency_line_size.attr,
+	&physical_line_partition.attr,
+	&ways_of_associativity.attr,
+	&number_of_sets.attr,
+	&size.attr,
+	&shared_cpu_map.attr,
+	&shared_cpu_list.attr,
 	NULL
 	NULL
 };
 };
 
 
-static struct attribute *default_l3_attrs[] = {
-	DEFAULT_SYSFS_CACHE_ATTRS,
 #ifdef CONFIG_AMD_NB
 #ifdef CONFIG_AMD_NB
-	&cache_disable_0.attr,
-	&cache_disable_1.attr,
+static struct attribute ** __cpuinit amd_l3_attrs(void)
+{
+	static struct attribute **attrs;
+	int n;
+
+	if (attrs)
+		return attrs;
+
+	n = sizeof (default_attrs) / sizeof (struct attribute *);
+
+	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+		n += 2;
+
+	attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
+	if (attrs == NULL)
+		return attrs = default_attrs;
+
+	for (n = 0; default_attrs[n]; n++)
+		attrs[n] = default_attrs[n];
+
+	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
+		attrs[n++] = &cache_disable_0.attr;
+		attrs[n++] = &cache_disable_1.attr;
+	}
+
+	return attrs;
+}
 #endif
 #endif
-	NULL
-};
 
 
 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 {
@@ -1117,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 
 
 		this_leaf = CPUID4_INFO_IDX(cpu, i);
 		this_leaf = CPUID4_INFO_IDX(cpu, i);
 
 
-		if (this_leaf->l3 && this_leaf->l3->can_disable)
-			ktype_cache.default_attrs = default_l3_attrs;
-		else
-			ktype_cache.default_attrs = default_attrs;
-
+		ktype_cache.default_attrs = default_attrs;
+#ifdef CONFIG_AMD_NB
+		if (this_leaf->l3)
+			ktype_cache.default_attrs = amd_l3_attrs();
+#endif
 		retval = kobject_init_and_add(&(this_object->kobj),
 		retval = kobject_init_and_add(&(this_object->kobj),
 					      &ktype_cache,
 					      &ktype_cache,
 					      per_cpu(ici_cache_kobject, cpu),
 					      per_cpu(ici_cache_kobject, cpu),

+ 77 - 58
arch/x86/kernel/cpu/mcheck/mce_amd.c

@@ -31,8 +31,6 @@
 #include <asm/mce.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
 #include <asm/msr.h>
 
 
-#define PFX               "mce_threshold: "
-#define VERSION           "version 1.1.1"
 #define NR_BANKS          6
 #define NR_BANKS          6
 #define NR_BLOCKS         9
 #define NR_BLOCKS         9
 #define THRESHOLD_MAX     0xFFF
 #define THRESHOLD_MAX     0xFFF
@@ -59,12 +57,6 @@ struct threshold_block {
 	struct list_head	miscj;
 	struct list_head	miscj;
 };
 };
 
 
-/* defaults used early on boot */
-static struct threshold_block threshold_defaults = {
-	.interrupt_enable	= 0,
-	.threshold_limit	= THRESHOLD_MAX,
-};
-
 struct threshold_bank {
 struct threshold_bank {
 	struct kobject		*kobj;
 	struct kobject		*kobj;
 	struct threshold_block	*blocks;
 	struct threshold_block	*blocks;
@@ -89,50 +81,101 @@ static void amd_threshold_interrupt(void);
 struct thresh_restart {
 struct thresh_restart {
 	struct threshold_block	*b;
 	struct threshold_block	*b;
 	int			reset;
 	int			reset;
+	int			set_lvt_off;
+	int			lvt_off;
 	u16			old_limit;
 	u16			old_limit;
 };
 };
 
 
+static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
+{
+	int msr = (hi & MASK_LVTOFF_HI) >> 20;
+
+	if (apic < 0) {
+		pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
+		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
+		       b->bank, b->block, b->address, hi, lo);
+		return 0;
+	}
+
+	if (apic != msr) {
+		pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
+		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
+		       b->cpu, apic, b->bank, b->block, b->address, hi, lo);
+		return 0;
+	}
+
+	return 1;
+};
+
 /* must be called with correct cpu affinity */
 /* must be called with correct cpu affinity */
 /* Called via smp_call_function_single() */
 /* Called via smp_call_function_single() */
 static void threshold_restart_bank(void *_tr)
 static void threshold_restart_bank(void *_tr)
 {
 {
 	struct thresh_restart *tr = _tr;
 	struct thresh_restart *tr = _tr;
-	u32 mci_misc_hi, mci_misc_lo;
+	u32 hi, lo;
 
 
-	rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+	rdmsr(tr->b->address, lo, hi);
 
 
-	if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+	if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
 		tr->reset = 1;	/* limit cannot be lower than err count */
 		tr->reset = 1;	/* limit cannot be lower than err count */
 
 
 	if (tr->reset) {		/* reset err count and overflow bit */
 	if (tr->reset) {		/* reset err count and overflow bit */
-		mci_misc_hi =
-		    (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
+		hi =
+		    (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
 		    (THRESHOLD_MAX - tr->b->threshold_limit);
 		    (THRESHOLD_MAX - tr->b->threshold_limit);
 	} else if (tr->old_limit) {	/* change limit w/o reset */
 	} else if (tr->old_limit) {	/* change limit w/o reset */
-		int new_count = (mci_misc_hi & THRESHOLD_MAX) +
+		int new_count = (hi & THRESHOLD_MAX) +
 		    (tr->old_limit - tr->b->threshold_limit);
 		    (tr->old_limit - tr->b->threshold_limit);
 
 
-		mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
+		hi = (hi & ~MASK_ERR_COUNT_HI) |
 		    (new_count & THRESHOLD_MAX);
 		    (new_count & THRESHOLD_MAX);
 	}
 	}
 
 
+	if (tr->set_lvt_off) {
+		if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
+			/* set new lvt offset */
+			hi &= ~MASK_LVTOFF_HI;
+			hi |= tr->lvt_off << 20;
+		}
+	}
+
 	tr->b->interrupt_enable ?
 	tr->b->interrupt_enable ?
-	    (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
-	    (mci_misc_hi &= ~MASK_INT_TYPE_HI);
+	    (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
+	    (hi &= ~MASK_INT_TYPE_HI);
 
 
-	mci_misc_hi |= MASK_COUNT_EN_HI;
-	wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+	hi |= MASK_COUNT_EN_HI;
+	wrmsr(tr->b->address, lo, hi);
+}
+
+static void mce_threshold_block_init(struct threshold_block *b, int offset)
+{
+	struct thresh_restart tr = {
+		.b			= b,
+		.set_lvt_off		= 1,
+		.lvt_off		= offset,
+	};
+
+	b->threshold_limit		= THRESHOLD_MAX;
+	threshold_restart_bank(&tr);
+};
+
+static int setup_APIC_mce(int reserved, int new)
+{
+	if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
+					      APIC_EILVT_MSG_FIX, 0))
+		return new;
+
+	return reserved;
 }
 }
 
 
 /* cpu init entry point, called from mce.c with preempt off */
 /* cpu init entry point, called from mce.c with preempt off */
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
 {
+	struct threshold_block b;
 	unsigned int cpu = smp_processor_id();
 	unsigned int cpu = smp_processor_id();
 	u32 low = 0, high = 0, address = 0;
 	u32 low = 0, high = 0, address = 0;
 	unsigned int bank, block;
 	unsigned int bank, block;
-	struct thresh_restart tr;
-	int lvt_off = -1;
-	u8 offset;
+	int offset = -1;
 
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
@@ -163,39 +206,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 			if (shared_bank[bank] && c->cpu_core_id)
 			if (shared_bank[bank] && c->cpu_core_id)
 				break;
 				break;
 #endif
 #endif
-			offset = (high & MASK_LVTOFF_HI) >> 20;
-			if (lvt_off < 0) {
-				if (setup_APIC_eilvt(offset,
-						     THRESHOLD_APIC_VECTOR,
-						     APIC_EILVT_MSG_FIX, 0)) {
-					pr_err(FW_BUG "cpu %d, failed to "
-					       "setup threshold interrupt "
-					       "for bank %d, block %d "
-					       "(MSR%08X=0x%x%08x)",
-					       smp_processor_id(), bank, block,
-					       address, high, low);
-					continue;
-				}
-				lvt_off = offset;
-			} else if (lvt_off != offset) {
-				pr_err(FW_BUG "cpu %d, invalid threshold "
-				       "interrupt offset %d for bank %d,"
-				       "block %d (MSR%08X=0x%x%08x)",
-				       smp_processor_id(), lvt_off, bank,
-				       block, address, high, low);
-				continue;
-			}
-
-			high &= ~MASK_LVTOFF_HI;
-			high |= lvt_off << 20;
-			wrmsr(address, low, high);
+			offset = setup_APIC_mce(offset,
+						(high & MASK_LVTOFF_HI) >> 20);
 
 
-			threshold_defaults.address = address;
-			tr.b = &threshold_defaults;
-			tr.reset = 0;
-			tr.old_limit = 0;
-			threshold_restart_bank(&tr);
+			memset(&b, 0, sizeof(b));
+			b.cpu		= cpu;
+			b.bank		= bank;
+			b.block		= block;
+			b.address	= address;
 
 
+			mce_threshold_block_init(&b, offset);
 			mce_threshold_vector = amd_threshold_interrupt;
 			mce_threshold_vector = amd_threshold_interrupt;
 		}
 		}
 	}
 	}
@@ -298,9 +318,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
 
 
 	b->interrupt_enable = !!new;
 	b->interrupt_enable = !!new;
 
 
+	memset(&tr, 0, sizeof(tr));
 	tr.b		= b;
 	tr.b		= b;
-	tr.reset	= 0;
-	tr.old_limit	= 0;
 
 
 	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
 
@@ -321,10 +340,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
 	if (new < 1)
 	if (new < 1)
 		new = 1;
 		new = 1;
 
 
+	memset(&tr, 0, sizeof(tr));
 	tr.old_limit = b->threshold_limit;
 	tr.old_limit = b->threshold_limit;
 	b->threshold_limit = new;
 	b->threshold_limit = new;
 	tr.b = b;
 	tr.b = b;
-	tr.reset = 0;
 
 
 	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
 
@@ -603,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
 			continue;
 			continue;
 		err = threshold_create_bank(cpu, bank);
 		err = threshold_create_bank(cpu, bank);
 		if (err)
 		if (err)
-			goto out;
+			return err;
 	}
 	}
-out:
+
 	return err;
 	return err;
 }
 }
 
 

+ 40 - 0
arch/x86/kernel/cpu/mcheck/therm_throt.c

@@ -53,8 +53,13 @@ struct thermal_state {
 	struct _thermal_state core_power_limit;
 	struct _thermal_state core_power_limit;
 	struct _thermal_state package_throttle;
 	struct _thermal_state package_throttle;
 	struct _thermal_state package_power_limit;
 	struct _thermal_state package_power_limit;
+	struct _thermal_state core_thresh0;
+	struct _thermal_state core_thresh1;
 };
 };
 
 
+/* Callback to handle core threshold interrupts */
+int (*platform_thermal_notify)(__u64 msr_val);
+
 static DEFINE_PER_CPU(struct thermal_state, thermal_state);
 static DEFINE_PER_CPU(struct thermal_state, thermal_state);
 
 
 static atomic_t therm_throt_en	= ATOMIC_INIT(0);
 static atomic_t therm_throt_en	= ATOMIC_INIT(0);
@@ -200,6 +205,22 @@ static int therm_throt_process(bool new_event, int event, int level)
 	return 0;
 	return 0;
 }
 }
 
 
+static int thresh_event_valid(int event)
+{
+	struct _thermal_state *state;
+	unsigned int this_cpu = smp_processor_id();
+	struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
+	u64 now = get_jiffies_64();
+
+	state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
+
+	if (time_before64(now, state->next_check))
+		return 0;
+
+	state->next_check = now + CHECK_INTERVAL;
+	return 1;
+}
+
 #ifdef CONFIG_SYSFS
 #ifdef CONFIG_SYSFS
 /* Add/Remove thermal_throttle interface for CPU device: */
 /* Add/Remove thermal_throttle interface for CPU device: */
 static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
 static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
@@ -313,6 +334,22 @@ device_initcall(thermal_throttle_init_device);
 #define PACKAGE_THROTTLED	((__u64)2 << 62)
 #define PACKAGE_THROTTLED	((__u64)2 << 62)
 #define PACKAGE_POWER_LIMIT	((__u64)3 << 62)
 #define PACKAGE_POWER_LIMIT	((__u64)3 << 62)
 
 
+static void notify_thresholds(__u64 msr_val)
+{
+	/* check whether the interrupt handler is defined;
+	 * otherwise simply return
+	 */
+	if (!platform_thermal_notify)
+		return;
+
+	/* lower threshold reached */
+	if ((msr_val & THERM_LOG_THRESHOLD0) &&	thresh_event_valid(0))
+		platform_thermal_notify(msr_val);
+	/* higher threshold reached */
+	if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
+		platform_thermal_notify(msr_val);
+}
+
 /* Thermal transition interrupt handler */
 /* Thermal transition interrupt handler */
 static void intel_thermal_interrupt(void)
 static void intel_thermal_interrupt(void)
 {
 {
@@ -321,6 +358,9 @@ static void intel_thermal_interrupt(void)
 
 
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 
 
+	/* Check for violation of core thermal thresholds*/
+	notify_thresholds(msr_val);
+
 	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
 	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
 				THERMAL_THROTTLING_EVENT,
 				THERMAL_THROTTLING_EVENT,
 				CORE_LEVEL) != 0)
 				CORE_LEVEL) != 0)

+ 1 - 2
arch/x86/kernel/early_printk.c

@@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf)
 		if (!strncmp(buf, "xen", 3))
 		if (!strncmp(buf, "xen", 3))
 			early_console_register(&xenboot_console, keep);
 			early_console_register(&xenboot_console, keep);
 #endif
 #endif
-#ifdef CONFIG_X86_MRST_EARLY_PRINTK
+#ifdef CONFIG_EARLY_PRINTK_MRST
 		if (!strncmp(buf, "mrst", 4)) {
 		if (!strncmp(buf, "mrst", 4)) {
 			mrst_early_console_init();
 			mrst_early_console_init();
 			early_console_register(&early_mrst_console, keep);
 			early_console_register(&early_mrst_console, keep);
@@ -250,7 +250,6 @@ static int __init setup_early_printk(char *buf)
 			hsu_early_console_init();
 			hsu_early_console_init();
 			early_console_register(&early_hsu_console, keep);
 			early_console_register(&early_hsu_console, keep);
 		}
 		}
-
 #endif
 #endif
 		buf++;
 		buf++;
 	}
 	}

+ 3 - 0
arch/x86/kernel/ftrace.c

@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/list.h>
+#include <linux/module.h>
 
 
 #include <trace/syscall.h>
 #include <trace/syscall.h>
 
 
@@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code);
 int ftrace_arch_code_modify_prepare(void)
 int ftrace_arch_code_modify_prepare(void)
 {
 {
 	set_kernel_text_rw();
 	set_kernel_text_rw();
+	set_all_modules_text_rw();
 	modifying_code = 1;
 	modifying_code = 1;
 	return 0;
 	return 0;
 }
 }
@@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void)
 int ftrace_arch_code_modify_post_process(void)
 int ftrace_arch_code_modify_post_process(void)
 {
 {
 	modifying_code = 0;
 	modifying_code = 0;
+	set_all_modules_text_ro();
 	set_kernel_text_ro();
 	set_kernel_text_ro();
 	return 0;
 	return 0;
 }
 }

+ 3 - 0
arch/x86/kernel/head32.c

@@ -61,6 +61,9 @@ void __init i386_start_kernel(void)
 	case X86_SUBARCH_MRST:
 	case X86_SUBARCH_MRST:
 		x86_mrst_early_setup();
 		x86_mrst_early_setup();
 		break;
 		break;
+	case X86_SUBARCH_CE4100:
+		x86_ce4100_early_setup();
+		break;
 	default:
 	default:
 		i386_default_early_setup();
 		i386_default_early_setup();
 		break;
 		break;

+ 45 - 38
arch/x86/kernel/head_32.S

@@ -139,39 +139,6 @@ ENTRY(startup_32)
 	movl %eax, pa(olpc_ofw_pgd)
 	movl %eax, pa(olpc_ofw_pgd)
 #endif
 #endif
 
 
-#ifdef CONFIG_PARAVIRT
-	/* This is can only trip for a broken bootloader... */
-	cmpw $0x207, pa(boot_params + BP_version)
-	jb default_entry
-
-	/* Paravirt-compatible boot parameters.  Look to see what architecture
-		we're booting under. */
-	movl pa(boot_params + BP_hardware_subarch), %eax
-	cmpl $num_subarch_entries, %eax
-	jae bad_subarch
-
-	movl pa(subarch_entries)(,%eax,4), %eax
-	subl $__PAGE_OFFSET, %eax
-	jmp *%eax
-
-bad_subarch:
-WEAK(lguest_entry)
-WEAK(xen_entry)
-	/* Unknown implementation; there's really
-	   nothing we can do at this point. */
-	ud2a
-
-	__INITDATA
-
-subarch_entries:
-	.long default_entry		/* normal x86/PC */
-	.long lguest_entry		/* lguest hypervisor */
-	.long xen_entry			/* Xen hypervisor */
-	.long default_entry		/* Moorestown MID */
-num_subarch_entries = (. - subarch_entries) / 4
-.previous
-#endif /* CONFIG_PARAVIRT */
-
 /*
 /*
  * Initialize page tables.  This creates a PDE and a set of page
  * Initialize page tables.  This creates a PDE and a set of page
  * tables, which are located immediately beyond __brk_base.  The variable
  * tables, which are located immediately beyond __brk_base.  The variable
@@ -181,7 +148,6 @@ num_subarch_entries = (. - subarch_entries) / 4
  *
  *
  * Note that the stack is not yet set up!
  * Note that the stack is not yet set up!
  */
  */
-default_entry:
 #ifdef CONFIG_X86_PAE
 #ifdef CONFIG_X86_PAE
 
 
 	/*
 	/*
@@ -261,7 +227,42 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
 	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
 	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
 	movl %eax,pa(initial_page_table+0xffc)
 	movl %eax,pa(initial_page_table+0xffc)
 #endif
 #endif
-	jmp 3f
+
+#ifdef CONFIG_PARAVIRT
+	/* This is can only trip for a broken bootloader... */
+	cmpw $0x207, pa(boot_params + BP_version)
+	jb default_entry
+
+	/* Paravirt-compatible boot parameters.  Look to see what architecture
+		we're booting under. */
+	movl pa(boot_params + BP_hardware_subarch), %eax
+	cmpl $num_subarch_entries, %eax
+	jae bad_subarch
+
+	movl pa(subarch_entries)(,%eax,4), %eax
+	subl $__PAGE_OFFSET, %eax
+	jmp *%eax
+
+bad_subarch:
+WEAK(lguest_entry)
+WEAK(xen_entry)
+	/* Unknown implementation; there's really
+	   nothing we can do at this point. */
+	ud2a
+
+	__INITDATA
+
+subarch_entries:
+	.long default_entry		/* normal x86/PC */
+	.long lguest_entry		/* lguest hypervisor */
+	.long xen_entry			/* Xen hypervisor */
+	.long default_entry		/* Moorestown MID */
+num_subarch_entries = (. - subarch_entries) / 4
+.previous
+#else
+	jmp default_entry
+#endif /* CONFIG_PARAVIRT */
+
 /*
 /*
  * Non-boot CPU entry point; entered from trampoline.S
  * Non-boot CPU entry point; entered from trampoline.S
  * We can't lgdt here, because lgdt itself uses a data segment, but
  * We can't lgdt here, because lgdt itself uses a data segment, but
@@ -282,7 +283,7 @@ ENTRY(startup_32_smp)
 	movl %eax,%fs
 	movl %eax,%fs
 	movl %eax,%gs
 	movl %eax,%gs
 #endif /* CONFIG_SMP */
 #endif /* CONFIG_SMP */
-3:
+default_entry:
 
 
 /*
 /*
  *	New page tables may be in 4Mbyte page mode and may
  *	New page tables may be in 4Mbyte page mode and may
@@ -316,6 +317,10 @@ ENTRY(startup_32_smp)
 	subl $0x80000001, %eax
 	subl $0x80000001, %eax
 	cmpl $(0x8000ffff-0x80000001), %eax
 	cmpl $(0x8000ffff-0x80000001), %eax
 	ja 6f
 	ja 6f
+
+	/* Clear bogus XD_DISABLE bits */
+	call verify_cpu
+
 	mov $0x80000001, %eax
 	mov $0x80000001, %eax
 	cpuid
 	cpuid
 	/* Execute Disable bit supported? */
 	/* Execute Disable bit supported? */
@@ -611,6 +616,8 @@ ignore_int:
 #endif
 #endif
 	iret
 	iret
 
 
+#include "verify_cpu.S"
+
 	__REFDATA
 	__REFDATA
 .align 4
 .align 4
 ENTRY(initial_code)
 ENTRY(initial_code)
@@ -622,13 +629,13 @@ ENTRY(initial_code)
 __PAGE_ALIGNED_BSS
 __PAGE_ALIGNED_BSS
 	.align PAGE_SIZE_asm
 	.align PAGE_SIZE_asm
 #ifdef CONFIG_X86_PAE
 #ifdef CONFIG_X86_PAE
-ENTRY(initial_pg_pmd)
+initial_pg_pmd:
 	.fill 1024*KPMDS,4,0
 	.fill 1024*KPMDS,4,0
 #else
 #else
 ENTRY(initial_page_table)
 ENTRY(initial_page_table)
 	.fill 1024,4,0
 	.fill 1024,4,0
 #endif
 #endif
-ENTRY(initial_pg_fixmap)
+initial_pg_fixmap:
 	.fill 1024,4,0
 	.fill 1024,4,0
 ENTRY(empty_zero_page)
 ENTRY(empty_zero_page)
 	.fill 4096,1,0
 	.fill 4096,1,0

+ 10 - 24
arch/x86/kernel/microcode_amd.c

@@ -155,12 +155,6 @@ static int apply_microcode_amd(int cpu)
 	return 0;
 	return 0;
 }
 }
 
 
-static int get_ucode_data(void *to, const u8 *from, size_t n)
-{
-	memcpy(to, from, n);
-	return 0;
-}
-
 static void *
 static void *
 get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
 get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
 {
 {
@@ -168,8 +162,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
 	u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
 	u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
 	void *mc;
 	void *mc;
 
 
-	if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR))
-		return NULL;
+	get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR);
 
 
 	if (section_hdr[0] != UCODE_UCODE_TYPE) {
 	if (section_hdr[0] != UCODE_UCODE_TYPE) {
 		pr_err("error: invalid type field in container file section header\n");
 		pr_err("error: invalid type field in container file section header\n");
@@ -183,16 +176,13 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
 		return NULL;
 		return NULL;
 	}
 	}
 
 
-	mc = vmalloc(UCODE_MAX_SIZE);
-	if (mc) {
-		memset(mc, 0, UCODE_MAX_SIZE);
-		if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR,
-				   total_size)) {
-			vfree(mc);
-			mc = NULL;
-		} else
-			*mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
-	}
+	mc = vzalloc(UCODE_MAX_SIZE);
+	if (!mc)
+		return NULL;
+
+	get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size);
+	*mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
+
 	return mc;
 	return mc;
 }
 }
 
 
@@ -202,8 +192,7 @@ static int install_equiv_cpu_table(const u8 *buf)
 	unsigned int *buf_pos = (unsigned int *)container_hdr;
 	unsigned int *buf_pos = (unsigned int *)container_hdr;
 	unsigned long size;
 	unsigned long size;
 
 
-	if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE))
-		return 0;
+	get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE);
 
 
 	size = buf_pos[2];
 	size = buf_pos[2];
 
 
@@ -219,10 +208,7 @@ static int install_equiv_cpu_table(const u8 *buf)
 	}
 	}
 
 
 	buf += UCODE_CONTAINER_HEADER_SIZE;
 	buf += UCODE_CONTAINER_HEADER_SIZE;
-	if (get_ucode_data(equiv_cpu_table, buf, size)) {
-		vfree(equiv_cpu_table);
-		return 0;
-	}
+	get_ucode_data(equiv_cpu_table, buf, size);
 
 
 	return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
 	return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
 }
 }

+ 17 - 17
arch/x86/kernel/pci-gart_64.c

@@ -143,7 +143,7 @@ static void flush_gart(void)
 
 
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	if (need_flush) {
 	if (need_flush) {
-		k8_flush_garts();
+		amd_flush_garts();
 		need_flush = false;
 		need_flush = false;
 	}
 	}
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -561,17 +561,17 @@ static void enable_gart_translations(void)
 {
 {
 	int i;
 	int i;
 
 
-	if (!k8_northbridges.gart_supported)
+	if (!amd_nb_has_feature(AMD_NB_GART))
 		return;
 		return;
 
 
-	for (i = 0; i < k8_northbridges.num; i++) {
-		struct pci_dev *dev = k8_northbridges.nb_misc[i];
+	for (i = 0; i < amd_nb_num(); i++) {
+		struct pci_dev *dev = node_to_amd_nb(i)->misc;
 
 
 		enable_gart_translation(dev, __pa(agp_gatt_table));
 		enable_gart_translation(dev, __pa(agp_gatt_table));
 	}
 	}
 
 
 	/* Flush the GART-TLB to remove stale entries */
 	/* Flush the GART-TLB to remove stale entries */
-	k8_flush_garts();
+	amd_flush_garts();
 }
 }
 
 
 /*
 /*
@@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev)
 	if (!fix_up_north_bridges)
 	if (!fix_up_north_bridges)
 		return;
 		return;
 
 
-	if (!k8_northbridges.gart_supported)
+	if (!amd_nb_has_feature(AMD_NB_GART))
 		return;
 		return;
 
 
 	pr_info("PCI-DMA: Restoring GART aperture settings\n");
 	pr_info("PCI-DMA: Restoring GART aperture settings\n");
 
 
-	for (i = 0; i < k8_northbridges.num; i++) {
-		struct pci_dev *dev = k8_northbridges.nb_misc[i];
+	for (i = 0; i < amd_nb_num(); i++) {
+		struct pci_dev *dev = node_to_amd_nb(i)->misc;
 
 
 		/*
 		/*
 		 * Don't enable translations just yet.  That is the next
 		 * Don't enable translations just yet.  That is the next
@@ -644,7 +644,7 @@ static struct sys_device device_gart = {
  * Private Northbridge GATT initialization in case we cannot use the
  * Private Northbridge GATT initialization in case we cannot use the
  * AGP driver for some reason.
  * AGP driver for some reason.
  */
  */
-static __init int init_k8_gatt(struct agp_kern_info *info)
+static __init int init_amd_gatt(struct agp_kern_info *info)
 {
 {
 	unsigned aper_size, gatt_size, new_aper_size;
 	unsigned aper_size, gatt_size, new_aper_size;
 	unsigned aper_base, new_aper_base;
 	unsigned aper_base, new_aper_base;
@@ -656,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
 
 	aper_size = aper_base = info->aper_size = 0;
 	aper_size = aper_base = info->aper_size = 0;
 	dev = NULL;
 	dev = NULL;
-	for (i = 0; i < k8_northbridges.num; i++) {
-		dev = k8_northbridges.nb_misc[i];
+	for (i = 0; i < amd_nb_num(); i++) {
+		dev = node_to_amd_nb(i)->misc;
 		new_aper_base = read_aperture(dev, &new_aper_size);
 		new_aper_base = read_aperture(dev, &new_aper_size);
 		if (!new_aper_base)
 		if (!new_aper_base)
 			goto nommu;
 			goto nommu;
@@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void)
 	if (!no_agp)
 	if (!no_agp)
 		return;
 		return;
 
 
-	if (!k8_northbridges.gart_supported)
+	if (!amd_nb_has_feature(AMD_NB_GART))
 		return;
 		return;
 
 
-	for (i = 0; i < k8_northbridges.num; i++) {
+	for (i = 0; i < amd_nb_num(); i++) {
 		u32 ctl;
 		u32 ctl;
 
 
-		dev = k8_northbridges.nb_misc[i];
+		dev = node_to_amd_nb(i)->misc;
 		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
 		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
 
 
 		ctl &= ~GARTEN;
 		ctl &= ~GARTEN;
@@ -749,14 +749,14 @@ int __init gart_iommu_init(void)
 	unsigned long scratch;
 	unsigned long scratch;
 	long i;
 	long i;
 
 
-	if (!k8_northbridges.gart_supported)
+	if (!amd_nb_has_feature(AMD_NB_GART))
 		return 0;
 		return 0;
 
 
 #ifndef CONFIG_AGP_AMD64
 #ifndef CONFIG_AGP_AMD64
 	no_agp = 1;
 	no_agp = 1;
 #else
 #else
 	/* Makefile puts PCI initialization via subsys_initcall first. */
 	/* Makefile puts PCI initialization via subsys_initcall first. */
-	/* Add other K8 AGP bridge drivers here */
+	/* Add other AMD AGP bridge drivers here */
 	no_agp = no_agp ||
 	no_agp = no_agp ||
 		(agp_amd64_init() < 0) ||
 		(agp_amd64_init() < 0) ||
 		(agp_copy_info(agp_bridge, &info) < 0);
 		(agp_copy_info(agp_bridge, &info) < 0);
@@ -765,7 +765,7 @@ int __init gart_iommu_init(void)
 	if (no_iommu ||
 	if (no_iommu ||
 	    (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
 	    (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
 	    !gart_iommu_aperture ||
 	    !gart_iommu_aperture ||
-	    (no_agp && init_k8_gatt(&info) < 0)) {
+	    (no_agp && init_amd_gatt(&info) < 0)) {
 		if (max_pfn > MAX_DMA32_PFN) {
 		if (max_pfn > MAX_DMA32_PFN) {
 			pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
 			pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
 			pr_warning("falling back to iommu=soft.\n");
 			pr_warning("falling back to iommu=soft.\n");

+ 16 - 0
arch/x86/kernel/reboot_fixups_32.c

@@ -43,17 +43,33 @@ static void rdc321x_reset(struct pci_dev *dev)
 	outb(1, 0x92);
 	outb(1, 0x92);
 }
 }
 
 
+static void ce4100_reset(struct pci_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		outb(0x2, 0xcf9);
+		udelay(50);
+	}
+}
+
 struct device_fixup {
 struct device_fixup {
 	unsigned int vendor;
 	unsigned int vendor;
 	unsigned int device;
 	unsigned int device;
 	void (*reboot_fixup)(struct pci_dev *);
 	void (*reboot_fixup)(struct pci_dev *);
 };
 };
 
 
+/*
+ * PCI ids solely used for fixups_table go here
+ */
+#define PCI_DEVICE_ID_INTEL_CE4100	0x0708
+
 static const struct device_fixup fixups_table[] = {
 static const struct device_fixup fixups_table[] = {
 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
 { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
 { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
 { PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset },
 { PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset },
+{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100, ce4100_reset },
 };
 };
 
 
 /*
 /*

+ 5 - 8
arch/x86/kernel/setup.c

@@ -705,7 +705,7 @@ static u64 __init get_max_mapped(void)
 void __init setup_arch(char **cmdline_p)
 void __init setup_arch(char **cmdline_p)
 {
 {
 	int acpi = 0;
 	int acpi = 0;
-	int k8 = 0;
+	int amd = 0;
 	unsigned long flags;
 	unsigned long flags;
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
@@ -991,12 +991,12 @@ void __init setup_arch(char **cmdline_p)
 	acpi = acpi_numa_init();
 	acpi = acpi_numa_init();
 #endif
 #endif
 
 
-#ifdef CONFIG_K8_NUMA
+#ifdef CONFIG_AMD_NUMA
 	if (!acpi)
 	if (!acpi)
-		k8 = !k8_numa_init(0, max_pfn);
+		amd = !amd_numa_init(0, max_pfn);
 #endif
 #endif
 
 
-	initmem_init(0, max_pfn, acpi, k8);
+	initmem_init(0, max_pfn, acpi, amd);
 	memblock_find_dma_reserve();
 	memblock_find_dma_reserve();
 	dma32_reserve_bootmem();
 	dma32_reserve_bootmem();
 
 
@@ -1045,10 +1045,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #endif
 
 
 	init_apic_mappings();
 	init_apic_mappings();
-	ioapic_init_mappings();
-
-	/* need to wait for io_apic is mapped */
-	probe_nr_irqs_gsi();
+	ioapic_and_gsi_init();
 
 
 	kvm_guest_init();
 	kvm_guest_init();
 
 

+ 14 - 0
arch/x86/kernel/smpboot.c

@@ -1161,6 +1161,20 @@ out:
 	preempt_enable();
 	preempt_enable();
 }
 }
 
 
+void arch_disable_nonboot_cpus_begin(void)
+{
+	/*
+	 * Avoid the smp alternatives switch during the disable_nonboot_cpus().
+	 * In the suspend path, we will be back in the SMP mode shortly anyways.
+	 */
+	skip_smp_alternatives = true;
+}
+
+void arch_disable_nonboot_cpus_end(void)
+{
+	skip_smp_alternatives = false;
+}
+
 void arch_enable_nonboot_cpus_begin(void)
 void arch_enable_nonboot_cpus_begin(void)
 {
 {
 	set_mtrr_aps_delayed_init();
 	set_mtrr_aps_delayed_init();

+ 1 - 1
arch/x86/kernel/trampoline_64.S

@@ -127,7 +127,7 @@ startup_64:
 no_longmode:
 no_longmode:
 	hlt
 	hlt
 	jmp no_longmode
 	jmp no_longmode
-#include "verify_cpu_64.S"
+#include "verify_cpu.S"
 
 
 	# Careful these need to be in the same 64K segment as the above;
 	# Careful these need to be in the same 64K segment as the above;
 tidt:
 tidt:

+ 91 - 5
arch/x86/kernel/tsc.c

@@ -872,6 +872,9 @@ __cpuinit int unsynchronized_tsc(void)
 
 
 	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
 	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
 		return 0;
 		return 0;
+
+	if (tsc_clocksource_reliable)
+		return 0;
 	/*
 	/*
 	 * Intel systems are normally all synchronized.
 	 * Intel systems are normally all synchronized.
 	 * Exceptions must mark TSC as unstable:
 	 * Exceptions must mark TSC as unstable:
@@ -879,14 +882,92 @@ __cpuinit int unsynchronized_tsc(void)
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
 		/* assume multi socket systems are not synchronized: */
 		/* assume multi socket systems are not synchronized: */
 		if (num_possible_cpus() > 1)
 		if (num_possible_cpus() > 1)
-			tsc_unstable = 1;
+			return 1;
 	}
 	}
 
 
-	return tsc_unstable;
+	return 0;
+}
+
+
+static void tsc_refine_calibration_work(struct work_struct *work);
+static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
+/**
+ * tsc_refine_calibration_work - Further refine tsc freq calibration
+ * @work - ignored.
+ *
+ * This functions uses delayed work over a period of a
+ * second to further refine the TSC freq value. Since this is
+ * timer based, instead of loop based, we don't block the boot
+ * process while this longer calibration is done.
+ *
+ * If there are any calibration anomolies (too many SMIs, etc),
+ * or the refined calibration is off by 1% of the fast early
+ * calibration, we throw out the new calibration and use the
+ * early calibration.
+ */
+static void tsc_refine_calibration_work(struct work_struct *work)
+{
+	static u64 tsc_start = -1, ref_start;
+	static int hpet;
+	u64 tsc_stop, ref_stop, delta;
+	unsigned long freq;
+
+	/* Don't bother refining TSC on unstable systems */
+	if (check_tsc_unstable())
+		goto out;
+
+	/*
+	 * Since the work is started early in boot, we may be
+	 * delayed the first time we expire. So set the workqueue
+	 * again once we know timers are working.
+	 */
+	if (tsc_start == -1) {
+		/*
+		 * Only set hpet once, to avoid mixing hardware
+		 * if the hpet becomes enabled later.
+		 */
+		hpet = is_hpet_enabled();
+		schedule_delayed_work(&tsc_irqwork, HZ);
+		tsc_start = tsc_read_refs(&ref_start, hpet);
+		return;
+	}
+
+	tsc_stop = tsc_read_refs(&ref_stop, hpet);
+
+	/* hpet or pmtimer available ? */
+	if (!hpet && !ref_start && !ref_stop)
+		goto out;
+
+	/* Check, whether the sampling was disturbed by an SMI */
+	if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
+		goto out;
+
+	delta = tsc_stop - tsc_start;
+	delta *= 1000000LL;
+	if (hpet)
+		freq = calc_hpet_ref(delta, ref_start, ref_stop);
+	else
+		freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
+
+	/* Make sure we're within 1% */
+	if (abs(tsc_khz - freq) > tsc_khz/100)
+		goto out;
+
+	tsc_khz = freq;
+	printk(KERN_INFO "Refined TSC clocksource calibration: "
+		"%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
+					(unsigned long)tsc_khz % 1000);
+
+out:
+	clocksource_register_khz(&clocksource_tsc, tsc_khz);
 }
 }
 
 
-static void __init init_tsc_clocksource(void)
+
+static int __init init_tsc_clocksource(void)
 {
 {
+	if (!cpu_has_tsc || tsc_disabled > 0)
+		return 0;
+
 	if (tsc_clocksource_reliable)
 	if (tsc_clocksource_reliable)
 		clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
 		clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
 	/* lower the rating if we already know its unstable: */
 	/* lower the rating if we already know its unstable: */
@@ -894,8 +975,14 @@ static void __init init_tsc_clocksource(void)
 		clocksource_tsc.rating = 0;
 		clocksource_tsc.rating = 0;
 		clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
 		clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
 	}
 	}
-	clocksource_register_khz(&clocksource_tsc, tsc_khz);
+	schedule_delayed_work(&tsc_irqwork, 0);
+	return 0;
 }
 }
+/*
+ * We use device_initcall here, to ensure we run after the hpet
+ * is fully initialized, which may occur at fs_initcall time.
+ */
+device_initcall(init_tsc_clocksource);
 
 
 void __init tsc_init(void)
 void __init tsc_init(void)
 {
 {
@@ -949,6 +1036,5 @@ void __init tsc_init(void)
 		mark_tsc_unstable("TSCs unsynchronized");
 		mark_tsc_unstable("TSCs unsynchronized");
 
 
 	check_system_tsc_reliable();
 	check_system_tsc_reliable();
-	init_tsc_clocksource();
 }
 }
 
 

+ 41 - 8
arch/x86/kernel/verify_cpu_64.S → arch/x86/kernel/verify_cpu.S

@@ -7,6 +7,7 @@
  *	Copyright (c) 2007  Andi Kleen (ak@suse.de)
  *	Copyright (c) 2007  Andi Kleen (ak@suse.de)
  *	Copyright (c) 2007  Eric Biederman (ebiederm@xmission.com)
  *	Copyright (c) 2007  Eric Biederman (ebiederm@xmission.com)
  *	Copyright (c) 2007  Vivek Goyal (vgoyal@in.ibm.com)
  *	Copyright (c) 2007  Vivek Goyal (vgoyal@in.ibm.com)
+ *	Copyright (c) 2010  Kees Cook (kees.cook@canonical.com)
  *
  *
  * 	This source code is licensed under the GNU General Public License,
  * 	This source code is licensed under the GNU General Public License,
  * 	Version 2.  See the file COPYING for more details.
  * 	Version 2.  See the file COPYING for more details.
@@ -14,18 +15,17 @@
  *	This is a common code for verification whether CPU supports
  *	This is a common code for verification whether CPU supports
  * 	long mode and SSE or not. It is not called directly instead this
  * 	long mode and SSE or not. It is not called directly instead this
  *	file is included at various places and compiled in that context.
  *	file is included at various places and compiled in that context.
- * 	Following are the current usage.
+ *	This file is expected to run in 32bit code.  Currently:
  *
  *
- * 	This file is included by both 16bit and 32bit code.
+ *	arch/x86/boot/compressed/head_64.S: Boot cpu verification
+ *	arch/x86/kernel/trampoline_64.S: secondary processor verfication
+ *	arch/x86/kernel/head_32.S: processor startup
  *
  *
- *	arch/x86_64/boot/setup.S : Boot cpu verification (16bit)
- *	arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
- *	arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
- *	arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
- *
- *	verify_cpu, returns the status of cpu check in register %eax.
+ *	verify_cpu, returns the status of longmode and SSE in register %eax.
  *		0: Success    1: Failure
  *		0: Success    1: Failure
  *
  *
+ *	On Intel, the XD_DISABLE flag will be cleared as a side-effect.
+ *
  * 	The caller needs to check for the error code and take the action
  * 	The caller needs to check for the error code and take the action
  * 	appropriately. Either display a message or halt.
  * 	appropriately. Either display a message or halt.
  */
  */
@@ -62,8 +62,41 @@ verify_cpu:
 	cmpl	$0x444d4163,%ecx
 	cmpl	$0x444d4163,%ecx
 	jnz	verify_cpu_noamd
 	jnz	verify_cpu_noamd
 	mov	$1,%di			# cpu is from AMD
 	mov	$1,%di			# cpu is from AMD
+	jmp	verify_cpu_check
 
 
 verify_cpu_noamd:
 verify_cpu_noamd:
+	cmpl	$0x756e6547,%ebx        # GenuineIntel?
+	jnz	verify_cpu_check
+	cmpl	$0x49656e69,%edx
+	jnz	verify_cpu_check
+	cmpl	$0x6c65746e,%ecx
+	jnz	verify_cpu_check
+
+	# only call IA32_MISC_ENABLE when:
+	# family > 6 || (family == 6 && model >= 0xd)
+	movl	$0x1, %eax		# check CPU family and model
+	cpuid
+	movl	%eax, %ecx
+
+	andl	$0x0ff00f00, %eax	# mask family and extended family
+	shrl	$8, %eax
+	cmpl	$6, %eax
+	ja	verify_cpu_clear_xd	# family > 6, ok
+	jb	verify_cpu_check	# family < 6, skip
+
+	andl	$0x000f00f0, %ecx	# mask model and extended model
+	shrl	$4, %ecx
+	cmpl	$0xd, %ecx
+	jb	verify_cpu_check	# family == 6, model < 0xd, skip
+
+verify_cpu_clear_xd:
+	movl	$MSR_IA32_MISC_ENABLE, %ecx
+	rdmsr
+	btrl	$2, %edx		# clear MSR_IA32_MISC_ENABLE_XD_DISABLE
+	jnc	verify_cpu_check	# only write MSR if bit was changed
+	wrmsr
+
+verify_cpu_check:
 	movl    $0x1,%eax		# Does the cpu have what it takes
 	movl    $0x1,%eax		# Does the cpu have what it takes
 	cpuid
 	cpuid
 	andl	$REQUIRED_MASK0,%edx
 	andl	$REQUIRED_MASK0,%edx

+ 6 - 2
arch/x86/kernel/vmlinux.lds.S

@@ -69,7 +69,7 @@ jiffies_64 = jiffies;
 
 
 PHDRS {
 PHDRS {
 	text PT_LOAD FLAGS(5);          /* R_E */
 	text PT_LOAD FLAGS(5);          /* R_E */
-	data PT_LOAD FLAGS(7);          /* RWE */
+	data PT_LOAD FLAGS(6);          /* RW_ */
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
 	user PT_LOAD FLAGS(5);          /* R_E */
 	user PT_LOAD FLAGS(5);          /* R_E */
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
@@ -116,6 +116,10 @@ SECTIONS
 
 
 	EXCEPTION_TABLE(16) :text = 0x9090
 	EXCEPTION_TABLE(16) :text = 0x9090
 
 
+#if defined(CONFIG_DEBUG_RODATA)
+	/* .text should occupy whole number of pages */
+	. = ALIGN(PAGE_SIZE);
+#endif
 	X64_ALIGN_DEBUG_RODATA_BEGIN
 	X64_ALIGN_DEBUG_RODATA_BEGIN
 	RO_DATA(PAGE_SIZE)
 	RO_DATA(PAGE_SIZE)
 	X64_ALIGN_DEBUG_RODATA_END
 	X64_ALIGN_DEBUG_RODATA_END
@@ -335,7 +339,7 @@ SECTIONS
 		__bss_start = .;
 		__bss_start = .;
 		*(.bss..page_aligned)
 		*(.bss..page_aligned)
 		*(.bss)
 		*(.bss)
-		. = ALIGN(4);
+		. = ALIGN(PAGE_SIZE);
 		__bss_stop = .;
 		__bss_stop = .;
 	}
 	}
 
 

+ 0 - 105
arch/x86/lguest/i386_head.S

@@ -4,7 +4,6 @@
 #include <asm/asm-offsets.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/thread_info.h>
 #include <asm/processor-flags.h>
 #include <asm/processor-flags.h>
-#include <asm/pgtable.h>
 
 
 /*G:020
 /*G:020
  * Our story starts with the kernel booting into startup_32 in
  * Our story starts with the kernel booting into startup_32 in
@@ -38,113 +37,9 @@ ENTRY(lguest_entry)
 	/* Set up the initial stack so we can run C code. */
 	/* Set up the initial stack so we can run C code. */
 	movl $(init_thread_union+THREAD_SIZE),%esp
 	movl $(init_thread_union+THREAD_SIZE),%esp
 
 
-	call init_pagetables
-
 	/* Jumps are relative: we're running __PAGE_OFFSET too low. */
 	/* Jumps are relative: we're running __PAGE_OFFSET too low. */
 	jmp lguest_init+__PAGE_OFFSET
 	jmp lguest_init+__PAGE_OFFSET
 
 
-/*
- * Initialize page tables.  This creates a PDE and a set of page
- * tables, which are located immediately beyond __brk_base.  The variable
- * _brk_end is set up to point to the first "safe" location.
- * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end.
- *
- * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they
- * don't have a stack at this point, so we can't just use call and ret.
- */
-init_pagetables:
-#if PTRS_PER_PMD > 1
-#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
-#else
-#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
-#endif
-#define pa(X) ((X) - __PAGE_OFFSET)
-
-/* Enough space to fit pagetables for the low memory linear map */
-MAPPING_BEYOND_END = \
-	PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
-#ifdef CONFIG_X86_PAE
-
-	/*
-	 * In PAE mode initial_page_table is statically defined to contain
-	 * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
-	 * entries). The identity mapping is handled by pointing two PGD entries
-	 * to the first kernel PMD.
-	 *
-	 * Note the upper half of each PMD or PTE are always zero at this stage.
-	 */
-
-#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
-
-	xorl %ebx,%ebx				/* %ebx is kept at zero */
-
-	movl $pa(__brk_base), %edi
-	movl $pa(initial_pg_pmd), %edx
-	movl $PTE_IDENT_ATTR, %eax
-10:
-	leal PDE_IDENT_ATTR(%edi),%ecx		/* Create PMD entry */
-	movl %ecx,(%edx)			/* Store PMD entry */
-						/* Upper half already zero */
-	addl $8,%edx
-	movl $512,%ecx
-11:
-	stosl
-	xchgl %eax,%ebx
-	stosl
-	xchgl %eax,%ebx
-	addl $0x1000,%eax
-	loop 11b
-
-	/*
-	 * End condition: we must map up to the end + MAPPING_BEYOND_END.
-	 */
-	movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
-	cmpl %ebp,%eax
-	jb 10b
-1:
-	addl $__PAGE_OFFSET, %edi
-	movl %edi, pa(_brk_end)
-	shrl $12, %eax
-	movl %eax, pa(max_pfn_mapped)
-
-	/* Do early initialization of the fixmap area */
-	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
-	movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
-#else	/* Not PAE */
-
-page_pde_offset = (__PAGE_OFFSET >> 20);
-
-	movl $pa(__brk_base), %edi
-	movl $pa(initial_page_table), %edx
-	movl $PTE_IDENT_ATTR, %eax
-10:
-	leal PDE_IDENT_ATTR(%edi),%ecx		/* Create PDE entry */
-	movl %ecx,(%edx)			/* Store identity PDE entry */
-	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
-	addl $4,%edx
-	movl $1024, %ecx
-11:
-	stosl
-	addl $0x1000,%eax
-	loop 11b
-	/*
-	 * End condition: we must map up to the end + MAPPING_BEYOND_END.
-	 */
-	movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
-	cmpl %ebp,%eax
-	jb 10b
-	addl $__PAGE_OFFSET, %edi
-	movl %edi, pa(_brk_end)
-	shrl $12, %eax
-	movl %eax, pa(max_pfn_mapped)
-
-	/* Do early initialization of the fixmap area */
-	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
-	movl %eax,pa(initial_page_table+0xffc)
-#endif
-	ret
-
 /*G:055
 /*G:055
  * We create a macro which puts the assembler code between lgstart_ and lgend_
  * We create a macro which puts the assembler code between lgstart_ and lgend_
  * markers.  These templates are put in the .text section: they can't be
  * markers.  These templates are put in the .text section: they can't be

+ 1 - 1
arch/x86/mm/Makefile

@@ -23,7 +23,7 @@ mmiotrace-y			:= kmmio.o pf_in.o mmio-mod.o
 obj-$(CONFIG_MMIOTRACE_TEST)	+= testmmiotrace.o
 obj-$(CONFIG_MMIOTRACE_TEST)	+= testmmiotrace.o
 
 
 obj-$(CONFIG_NUMA)		+= numa.o numa_$(BITS).o
 obj-$(CONFIG_NUMA)		+= numa.o numa_$(BITS).o
-obj-$(CONFIG_K8_NUMA)		+= k8topology_64.o
+obj-$(CONFIG_AMD_NUMA)		+= amdtopology_64.o
 obj-$(CONFIG_ACPI_NUMA)		+= srat_$(BITS).o
 obj-$(CONFIG_ACPI_NUMA)		+= srat_$(BITS).o
 
 
 obj-$(CONFIG_HAVE_MEMBLOCK)		+= memblock.o
 obj-$(CONFIG_HAVE_MEMBLOCK)		+= memblock.o

+ 6 - 6
arch/x86/mm/k8topology_64.c → arch/x86/mm/amdtopology_64.c

@@ -1,8 +1,8 @@
 /*
 /*
- * AMD K8 NUMA support.
+ * AMD NUMA support.
  * Discover the memory map and associated nodes.
  * Discover the memory map and associated nodes.
  *
  *
- * This version reads it directly from the K8 northbridge.
+ * This version reads it directly from the AMD northbridge.
  *
  *
  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
  */
  */
@@ -57,7 +57,7 @@ static __init void early_get_boot_cpu_id(void)
 {
 {
 	/*
 	/*
 	 * need to get the APIC ID of the BSP so can use that to
 	 * need to get the APIC ID of the BSP so can use that to
-	 * create apicid_to_node in k8_scan_nodes()
+	 * create apicid_to_node in amd_scan_nodes()
 	 */
 	 */
 #ifdef CONFIG_X86_MPPARSE
 #ifdef CONFIG_X86_MPPARSE
 	/*
 	/*
@@ -69,7 +69,7 @@ static __init void early_get_boot_cpu_id(void)
 	early_init_lapic_mapping();
 	early_init_lapic_mapping();
 }
 }
 
 
-int __init k8_get_nodes(struct bootnode *physnodes)
+int __init amd_get_nodes(struct bootnode *physnodes)
 {
 {
 	int i;
 	int i;
 	int ret = 0;
 	int ret = 0;
@@ -82,7 +82,7 @@ int __init k8_get_nodes(struct bootnode *physnodes)
 	return ret;
 	return ret;
 }
 }
 
 
-int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
+int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
 {
 {
 	unsigned long start = PFN_PHYS(start_pfn);
 	unsigned long start = PFN_PHYS(start_pfn);
 	unsigned long end = PFN_PHYS(end_pfn);
 	unsigned long end = PFN_PHYS(end_pfn);
@@ -194,7 +194,7 @@ int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
 	return 0;
 	return 0;
 }
 }
 
 
-int __init k8_scan_nodes(void)
+int __init amd_scan_nodes(void)
 {
 {
 	unsigned int bits;
 	unsigned int bits;
 	unsigned int cores;
 	unsigned int cores;

+ 2 - 1
arch/x86/mm/init.c

@@ -364,8 +364,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 	/*
 	/*
 	 * We just marked the kernel text read only above, now that
 	 * We just marked the kernel text read only above, now that
 	 * we are going to free part of that, we need to make that
 	 * we are going to free part of that, we need to make that
-	 * writeable first.
+	 * writeable and non-executable first.
 	 */
 	 */
+	set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
 	set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
 	set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
 
 
 	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
 	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);

+ 19 - 1
arch/x86/mm/init_32.c

@@ -226,7 +226,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
 
 
 static inline int is_kernel_text(unsigned long addr)
 static inline int is_kernel_text(unsigned long addr)
 {
 {
-	if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
+	if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
 		return 1;
 		return 1;
 	return 0;
 	return 0;
 }
 }
@@ -912,6 +912,23 @@ void set_kernel_text_ro(void)
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 }
 }
 
 
+static void mark_nxdata_nx(void)
+{
+	/*
+	 * When this called, init has already been executed and released,
+	 * so everything past _etext sould be NX.
+	 */
+	unsigned long start = PFN_ALIGN(_etext);
+	/*
+	 * This comes from is_kernel_text upper limit. Also HPAGE where used:
+	 */
+	unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
+
+	if (__supported_pte_mask & _PAGE_NX)
+		printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
+	set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT);
+}
+
 void mark_rodata_ro(void)
 void mark_rodata_ro(void)
 {
 {
 	unsigned long start = PFN_ALIGN(_text);
 	unsigned long start = PFN_ALIGN(_text);
@@ -946,6 +963,7 @@ void mark_rodata_ro(void)
 	printk(KERN_INFO "Testing CPA: write protecting again\n");
 	printk(KERN_INFO "Testing CPA: write protecting again\n");
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 #endif
 #endif
+	mark_nxdata_nx();
 }
 }
 #endif
 #endif
 
 

+ 11 - 11
arch/x86/mm/numa_64.c

@@ -264,7 +264,7 @@ static struct bootnode physnodes[MAX_NUMNODES] __initdata;
 static char *cmdline __initdata;
 static char *cmdline __initdata;
 
 
 static int __init setup_physnodes(unsigned long start, unsigned long end,
 static int __init setup_physnodes(unsigned long start, unsigned long end,
-					int acpi, int k8)
+					int acpi, int amd)
 {
 {
 	int nr_nodes = 0;
 	int nr_nodes = 0;
 	int ret = 0;
 	int ret = 0;
@@ -274,13 +274,13 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
 	if (acpi)
 	if (acpi)
 		nr_nodes = acpi_get_nodes(physnodes);
 		nr_nodes = acpi_get_nodes(physnodes);
 #endif
 #endif
-#ifdef CONFIG_K8_NUMA
-	if (k8)
-		nr_nodes = k8_get_nodes(physnodes);
+#ifdef CONFIG_AMD_NUMA
+	if (amd)
+		nr_nodes = amd_get_nodes(physnodes);
 #endif
 #endif
 	/*
 	/*
 	 * Basic sanity checking on the physical node map: there may be errors
 	 * Basic sanity checking on the physical node map: there may be errors
-	 * if the SRAT or K8 incorrectly reported the topology or the mem=
+	 * if the SRAT or AMD code incorrectly reported the topology or the mem=
 	 * kernel parameter is used.
 	 * kernel parameter is used.
 	 */
 	 */
 	for (i = 0; i < nr_nodes; i++) {
 	for (i = 0; i < nr_nodes; i++) {
@@ -549,7 +549,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
  * numa=fake command-line option.
  * numa=fake command-line option.
  */
  */
 static int __init numa_emulation(unsigned long start_pfn,
 static int __init numa_emulation(unsigned long start_pfn,
-			unsigned long last_pfn, int acpi, int k8)
+			unsigned long last_pfn, int acpi, int amd)
 {
 {
 	u64 addr = start_pfn << PAGE_SHIFT;
 	u64 addr = start_pfn << PAGE_SHIFT;
 	u64 max_addr = last_pfn << PAGE_SHIFT;
 	u64 max_addr = last_pfn << PAGE_SHIFT;
@@ -557,7 +557,7 @@ static int __init numa_emulation(unsigned long start_pfn,
 	int num_nodes;
 	int num_nodes;
 	int i;
 	int i;
 
 
-	num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
+	num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd);
 	/*
 	/*
 	 * If the numa=fake command-line contains a 'M' or 'G', it represents
 	 * If the numa=fake command-line contains a 'M' or 'G', it represents
 	 * the fixed node size.  Otherwise, if it is just a single number N,
 	 * the fixed node size.  Otherwise, if it is just a single number N,
@@ -602,7 +602,7 @@ static int __init numa_emulation(unsigned long start_pfn,
 #endif /* CONFIG_NUMA_EMU */
 #endif /* CONFIG_NUMA_EMU */
 
 
 void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
 void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
-				int acpi, int k8)
+				int acpi, int amd)
 {
 {
 	int i;
 	int i;
 
 
@@ -610,7 +610,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
 	nodes_clear(node_online_map);
 	nodes_clear(node_online_map);
 
 
 #ifdef CONFIG_NUMA_EMU
 #ifdef CONFIG_NUMA_EMU
-	if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8))
+	if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
 		return;
 		return;
 	nodes_clear(node_possible_map);
 	nodes_clear(node_possible_map);
 	nodes_clear(node_online_map);
 	nodes_clear(node_online_map);
@@ -624,8 +624,8 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
 	nodes_clear(node_online_map);
 	nodes_clear(node_online_map);
 #endif
 #endif
 
 
-#ifdef CONFIG_K8_NUMA
-	if (!numa_off && k8 && !k8_scan_nodes())
+#ifdef CONFIG_AMD_NUMA
+	if (!numa_off && amd && !amd_scan_nodes())
 		return;
 		return;
 	nodes_clear(node_possible_map);
 	nodes_clear(node_possible_map);
 	nodes_clear(node_online_map);
 	nodes_clear(node_online_map);

+ 22 - 11
arch/x86/mm/pageattr.c

@@ -13,6 +13,7 @@
 #include <linux/pfn.h>
 #include <linux/pfn.h>
 #include <linux/percpu.h>
 #include <linux/percpu.h>
 #include <linux/gfp.h>
 #include <linux/gfp.h>
+#include <linux/pci.h>
 
 
 #include <asm/e820.h>
 #include <asm/e820.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
@@ -255,13 +256,16 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
 				   unsigned long pfn)
 				   unsigned long pfn)
 {
 {
 	pgprot_t forbidden = __pgprot(0);
 	pgprot_t forbidden = __pgprot(0);
+	pgprot_t required = __pgprot(0);
 
 
 	/*
 	/*
 	 * The BIOS area between 640k and 1Mb needs to be executable for
 	 * The BIOS area between 640k and 1Mb needs to be executable for
 	 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
 	 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
 	 */
 	 */
-	if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
+#ifdef CONFIG_PCI_BIOS
+	if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
 		pgprot_val(forbidden) |= _PAGE_NX;
 		pgprot_val(forbidden) |= _PAGE_NX;
+#endif
 
 
 	/*
 	/*
 	 * The kernel text needs to be executable for obvious reasons
 	 * The kernel text needs to be executable for obvious reasons
@@ -278,6 +282,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
 	if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
 	if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
 		   __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
 		   __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
 		pgprot_val(forbidden) |= _PAGE_RW;
 		pgprot_val(forbidden) |= _PAGE_RW;
+	/*
+	 * .data and .bss should always be writable.
+	 */
+	if (within(address, (unsigned long)_sdata, (unsigned long)_edata) ||
+	    within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop))
+		pgprot_val(required) |= _PAGE_RW;
 
 
 #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
 #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
 	/*
 	/*
@@ -317,6 +327,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
 #endif
 #endif
 
 
 	prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
 	prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
+	prot = __pgprot(pgprot_val(prot) | pgprot_val(required));
 
 
 	return prot;
 	return prot;
 }
 }
@@ -393,7 +404,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 {
 {
 	unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
 	unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
 	pte_t new_pte, old_pte, *tmp;
 	pte_t new_pte, old_pte, *tmp;
-	pgprot_t old_prot, new_prot;
+	pgprot_t old_prot, new_prot, req_prot;
 	int i, do_split = 1;
 	int i, do_split = 1;
 	unsigned int level;
 	unsigned int level;
 
 
@@ -438,10 +449,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	 * We are safe now. Check whether the new pgprot is the same:
 	 * We are safe now. Check whether the new pgprot is the same:
 	 */
 	 */
 	old_pte = *kpte;
 	old_pte = *kpte;
-	old_prot = new_prot = pte_pgprot(old_pte);
+	old_prot = new_prot = req_prot = pte_pgprot(old_pte);
 
 
-	pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
-	pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
+	pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
+	pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
 
 
 	/*
 	/*
 	 * old_pte points to the large page base address. So we need
 	 * old_pte points to the large page base address. So we need
@@ -450,17 +461,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
 	pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
 	cpa->pfn = pfn;
 	cpa->pfn = pfn;
 
 
-	new_prot = static_protections(new_prot, address, pfn);
+	new_prot = static_protections(req_prot, address, pfn);
 
 
 	/*
 	/*
 	 * We need to check the full range, whether
 	 * We need to check the full range, whether
 	 * static_protection() requires a different pgprot for one of
 	 * static_protection() requires a different pgprot for one of
 	 * the pages in the range we try to preserve:
 	 * the pages in the range we try to preserve:
 	 */
 	 */
-	addr = address + PAGE_SIZE;
-	pfn++;
-	for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) {
-		pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
+	addr = address & pmask;
+	pfn = pte_pfn(old_pte);
+	for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
+		pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
 
 
 		if (pgprot_val(chk_prot) != pgprot_val(new_prot))
 		if (pgprot_val(chk_prot) != pgprot_val(new_prot))
 			goto out_unlock;
 			goto out_unlock;
@@ -483,7 +494,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	 * that we limited the number of possible pages already to
 	 * that we limited the number of possible pages already to
 	 * the number of pages in the large page.
 	 * the number of pages in the large page.
 	 */
 	 */
-	if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
+	if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) {
 		/*
 		/*
 		 * The address is aligned and the number of pages
 		 * The address is aligned and the number of pages
 		 * covers the full page.
 		 * covers the full page.

+ 1 - 1
arch/x86/mm/setup_nx.c

@@ -41,7 +41,7 @@ void __init x86_report_nx(void)
 {
 {
 	if (!cpu_has_nx) {
 	if (!cpu_has_nx) {
 		printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
 		printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
-		       "missing in CPU or disabled in BIOS!\n");
+		       "missing in CPU!\n");
 	} else {
 	} else {
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 		if (disable_nx) {
 		if (disable_nx) {

+ 1 - 0
arch/x86/mm/srat_32.c

@@ -92,6 +92,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
 	/* mark this node as "seen" in node bitmap */
 	/* mark this node as "seen" in node bitmap */
 	BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
 	BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
 
 
+	/* don't need to check apic_id here, because it is always 8 bits */
 	apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
 	apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
 
 
 	printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
 	printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",

+ 10 - 0
arch/x86/mm/srat_64.c

@@ -134,6 +134,10 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 	}
 	}
 
 
 	apic_id = pa->apic_id;
 	apic_id = pa->apic_id;
+	if (apic_id >= MAX_LOCAL_APIC) {
+		printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
+		return;
+	}
 	apicid_to_node[apic_id] = node;
 	apicid_to_node[apic_id] = node;
 	node_set(node, cpu_nodes_parsed);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	acpi_numa = 1;
@@ -168,6 +172,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
 		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
 	else
 	else
 		apic_id = pa->apic_id;
 		apic_id = pa->apic_id;
+
+	if (apic_id >= MAX_LOCAL_APIC) {
+		printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
+		return;
+	}
+
 	apicid_to_node[apic_id] = node;
 	apicid_to_node[apic_id] = node;
 	node_set(node, cpu_nodes_parsed);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	acpi_numa = 1;

+ 1 - 0
arch/x86/oprofile/op_model_amd.c

@@ -610,6 +610,7 @@ static int force_ibs_eilvt_setup(void)
 		ret = setup_ibs_ctl(i);
 		ret = setup_ibs_ctl(i);
 		if (ret)
 		if (ret)
 			return ret;
 			return ret;
+		pr_err(FW_BUG "using offset %d for IBS interrupts\n", i);
 		return 0;
 		return 0;
 	}
 	}
 
 

+ 1 - 0
arch/x86/pci/Makefile

@@ -7,6 +7,7 @@ obj-$(CONFIG_PCI_OLPC)		+= olpc.o
 obj-$(CONFIG_PCI_XEN)		+= xen.o
 obj-$(CONFIG_PCI_XEN)		+= xen.o
 
 
 obj-y				+= fixup.o
 obj-y				+= fixup.o
+obj-$(CONFIG_X86_INTEL_CE)      += ce4100.o
 obj-$(CONFIG_ACPI)		+= acpi.o
 obj-$(CONFIG_ACPI)		+= acpi.o
 obj-y				+= legacy.o irq.o
 obj-y				+= legacy.o irq.o
 
 

+ 315 - 0
arch/x86/pci/ce4100.c

@@ -0,0 +1,315 @@
+/*
+ *  GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2010 Intel Corporation. All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *  The full GNU General Public License is included in this distribution
+ *  in the file called LICENSE.GPL.
+ *
+ *  Contact Information:
+ *    Intel Corporation
+ *    2200 Mission College Blvd.
+ *    Santa Clara, CA  97052
+ *
+ * This provides access methods for PCI registers that mis-behave on
+ * the CE4100. Each register can be assigned a private init, read and
+ * write routine. The exception to this is the bridge device.  The
+ * bridge device is the only device on bus zero (0) that requires any
+ * fixup so it is a special case ATM
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#include <asm/pci_x86.h>
+
+struct sim_reg {
+	u32 value;
+	u32 mask;
+};
+
+struct sim_dev_reg {
+	int dev_func;
+	int reg;
+	void (*init)(struct sim_dev_reg *reg);
+	void (*read)(struct sim_dev_reg *reg, u32 *value);
+	void (*write)(struct sim_dev_reg *reg, u32 value);
+	struct sim_reg sim_reg;
+};
+
+struct sim_reg_op {
+	void (*init)(struct sim_dev_reg *reg);
+	void (*read)(struct sim_dev_reg *reg, u32 value);
+	void (*write)(struct sim_dev_reg *reg, u32 value);
+};
+
+#define MB (1024 * 1024)
+#define KB (1024)
+#define SIZE_TO_MASK(size) (~(size - 1))
+
+#define DEFINE_REG(device, func, offset, size, init_op, read_op, write_op)\
+{ PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\
+	{0, SIZE_TO_MASK(size)} },
+
+static void reg_init(struct sim_dev_reg *reg)
+{
+	pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4,
+			      &reg->sim_reg.value);
+}
+
+static void reg_read(struct sim_dev_reg *reg, u32 *value)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&pci_config_lock, flags);
+	*value = reg->sim_reg.value;
+	raw_spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+
+static void reg_write(struct sim_dev_reg *reg, u32 value)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&pci_config_lock, flags);
+	reg->sim_reg.value = (value & reg->sim_reg.mask) |
+		(reg->sim_reg.value & ~reg->sim_reg.mask);
+	raw_spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+
+static void sata_reg_init(struct sim_dev_reg *reg)
+{
+	pci_direct_conf1.read(0, 1, PCI_DEVFN(14, 0), 0x10, 4,
+			      &reg->sim_reg.value);
+	reg->sim_reg.value += 0x400;
+}
+
+static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value)
+{
+	reg_read(reg, value);
+	if (*value != reg->sim_reg.mask)
+		*value |= 0x100;
+}
+
+void sata_revid_init(struct sim_dev_reg *reg)
+{
+	reg->sim_reg.value = 0x01060100;
+	reg->sim_reg.mask = 0;
+}
+
+static void sata_revid_read(struct sim_dev_reg *reg, u32 *value)
+{
+	reg_read(reg, value);
+}
+
+static struct sim_dev_reg bus1_fixups[] = {
+	DEFINE_REG(2, 0, 0x10, (16*MB), reg_init, reg_read, reg_write)
+	DEFINE_REG(2, 0, 0x14, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(2, 1, 0x10, (64*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(3, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(4, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(4, 1, 0x10, (128*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(6, 0, 0x10, (512*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(6, 1, 0x10, (512*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(6, 2, 0x10, (64*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(8, 0, 0x10, (1*MB), reg_init, reg_read, reg_write)
+	DEFINE_REG(8, 1, 0x10, (64*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(8, 2, 0x10, (64*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(9, 0, 0x10 , (1*MB), reg_init, reg_read, reg_write)
+	DEFINE_REG(9, 0, 0x14, (64*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(10, 0, 0x10, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(10, 0, 0x14, (256*MB), reg_init, reg_read, reg_write)
+	DEFINE_REG(11, 0, 0x10, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(11, 0, 0x14, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(11, 1, 0x10, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(11, 2, 0x10, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(11, 2, 0x14, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(11, 2, 0x18, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(11, 3, 0x10, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(11, 3, 0x14, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(11, 4, 0x10, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(11, 5, 0x10, (64*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(11, 6, 0x10, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(11, 7, 0x10, (64*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(12, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(12, 0, 0x14, (256), reg_init, reg_read, reg_write)
+	DEFINE_REG(12, 1, 0x10, (1024), reg_init, reg_read, reg_write)
+	DEFINE_REG(13, 0, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write)
+	DEFINE_REG(13, 1, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write)
+	DEFINE_REG(14, 0, 0x8,  0, sata_revid_init, sata_revid_read, 0)
+	DEFINE_REG(14, 0, 0x10, 0, reg_init, reg_read, reg_write)
+	DEFINE_REG(14, 0, 0x14, 0, reg_init, reg_read, reg_write)
+	DEFINE_REG(14, 0, 0x18, 0, reg_init, reg_read, reg_write)
+	DEFINE_REG(14, 0, 0x1C, 0, reg_init, reg_read, reg_write)
+	DEFINE_REG(14, 0, 0x20, 0, reg_init, reg_read, reg_write)
+	DEFINE_REG(14, 0, 0x24, (0x200), sata_reg_init, reg_read, reg_write)
+	DEFINE_REG(15, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(15, 0, 0x14, (64*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(16, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(16, 0, 0x14, (64*MB), reg_init, reg_read, reg_write)
+	DEFINE_REG(16, 0, 0x18, (64*MB), reg_init, reg_read, reg_write)
+	DEFINE_REG(17, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
+	DEFINE_REG(18, 0, 0x10, (1*KB), reg_init, reg_read, reg_write)
+};
+
+static void __init init_sim_regs(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
+		if (bus1_fixups[i].init)
+			bus1_fixups[i].init(&bus1_fixups[i]);
+	}
+}
+
+static inline void extract_bytes(u32 *value, int reg, int len)
+{
+	uint32_t mask;
+
+	*value >>= ((reg & 3) * 8);
+	mask = 0xFFFFFFFF >> ((4 - len) * 8);
+	*value &= mask;
+}
+
+int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
+{
+	u32 av_bridge_base, av_bridge_limit;
+	int retval = 0;
+
+	switch (reg) {
+	/* Make BARs appear to not request any memory. */
+	case PCI_BASE_ADDRESS_0:
+	case PCI_BASE_ADDRESS_0 + 1:
+	case PCI_BASE_ADDRESS_0 + 2:
+	case PCI_BASE_ADDRESS_0 + 3:
+		*value = 0;
+		break;
+
+		/* Since subordinate bus number register is hardwired
+		 * to zero and read only, so do the simulation.
+		 */
+	case PCI_PRIMARY_BUS:
+		if (len == 4)
+			*value = 0x00010100;
+		break;
+
+	case PCI_SUBORDINATE_BUS:
+		*value = 1;
+		break;
+
+	case PCI_MEMORY_BASE:
+	case PCI_MEMORY_LIMIT:
+		/* Get the A/V bridge base address. */
+		pci_direct_conf1.read(0, 0, devfn,
+				PCI_BASE_ADDRESS_0, 4, &av_bridge_base);
+
+		av_bridge_limit = av_bridge_base + (512*MB - 1);
+		av_bridge_limit >>= 16;
+		av_bridge_limit &= 0xFFF0;
+
+		av_bridge_base >>= 16;
+		av_bridge_base &= 0xFFF0;
+
+		if (reg == PCI_MEMORY_LIMIT)
+			*value = av_bridge_limit;
+		else if (len == 2)
+			*value = av_bridge_base;
+		else
+			*value = (av_bridge_limit << 16) | av_bridge_base;
+		break;
+		/* Make prefetchable memory limit smaller than prefetchable
+		 * memory base, so not claim prefetchable memory space.
+		 */
+	case PCI_PREF_MEMORY_BASE:
+		*value = 0xFFF0;
+		break;
+	case PCI_PREF_MEMORY_LIMIT:
+		*value = 0x0;
+		break;
+		/* Make IO limit smaller than IO base, so not claim IO space. */
+	case PCI_IO_BASE:
+		*value = 0xF0;
+		break;
+	case PCI_IO_LIMIT:
+		*value = 0;
+		break;
+	default:
+		retval = 1;
+	}
+	return retval;
+}
+
+static int ce4100_conf_read(unsigned int seg, unsigned int bus,
+			    unsigned int devfn, int reg, int len, u32 *value)
+{
+	int i, retval = 1;
+
+	if (bus == 1) {
+		for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
+			if (bus1_fixups[i].dev_func == devfn &&
+			    bus1_fixups[i].reg == (reg & ~3) &&
+			    bus1_fixups[i].read) {
+				bus1_fixups[i].read(&(bus1_fixups[i]),
+						    value);
+				extract_bytes(value, reg, len);
+				return 0;
+			}
+		}
+	}
+
+	if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) &&
+	    !bridge_read(devfn, reg, len, value))
+		return 0;
+
+	return pci_direct_conf1.read(seg, bus, devfn, reg, len, value);
+}
+
+static int ce4100_conf_write(unsigned int seg, unsigned int bus,
+			     unsigned int devfn, int reg, int len, u32 value)
+{
+	int i;
+
+	if (bus == 1) {
+		for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
+			if (bus1_fixups[i].dev_func == devfn &&
+			    bus1_fixups[i].reg == (reg & ~3) &&
+			    bus1_fixups[i].write) {
+				bus1_fixups[i].write(&(bus1_fixups[i]),
+						     value);
+				return 0;
+			}
+		}
+	}
+
+	/* Discard writes to A/V bridge BAR. */
+	if (bus == 0 && PCI_DEVFN(1, 0) == devfn &&
+	    ((reg & ~3) == PCI_BASE_ADDRESS_0))
+		return 0;
+
+	return pci_direct_conf1.write(seg, bus, devfn, reg, len, value);
+}
+
+struct pci_raw_ops ce4100_pci_conf = {
+	.read =	ce4100_conf_read,
+	.write = ce4100_conf_write,
+};
+
+static int __init ce4100_pci_init(void)
+{
+	init_sim_regs();
+	raw_pci_ops = &ce4100_pci_conf;
+	return 0;
+}
+subsys_initcall(ce4100_pci_init);

+ 23 - 0
arch/x86/pci/pcbios.c

@@ -9,6 +9,7 @@
 #include <linux/uaccess.h>
 #include <linux/uaccess.h>
 #include <asm/pci_x86.h>
 #include <asm/pci_x86.h>
 #include <asm/pci-functions.h>
 #include <asm/pci-functions.h>
+#include <asm/cacheflush.h>
 
 
 /* BIOS32 signature: "_32_" */
 /* BIOS32 signature: "_32_" */
 #define BIOS32_SIGNATURE	(('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
 #define BIOS32_SIGNATURE	(('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
@@ -25,6 +26,27 @@
 #define PCIBIOS_HW_TYPE1_SPEC		0x10
 #define PCIBIOS_HW_TYPE1_SPEC		0x10
 #define PCIBIOS_HW_TYPE2_SPEC		0x20
 #define PCIBIOS_HW_TYPE2_SPEC		0x20
 
 
+int pcibios_enabled;
+
+/* According to the BIOS specification at:
+ * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could
+ * restrict the x zone to some pages and make it ro. But this may be
+ * broken on some bios, complex to handle with static_protections.
+ * We could make the 0xe0000-0x100000 range rox, but this can break
+ * some ISA mapping.
+ *
+ * So we let's an rw and x hole when pcibios is used. This shouldn't
+ * happen for modern system with mmconfig, and if you don't want it
+ * you could disable pcibios...
+ */
+static inline void set_bios_x(void)
+{
+	pcibios_enabled = 1;
+	set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT);
+	if (__supported_pte_mask & _PAGE_NX)
+		printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n");
+}
+
 /*
 /*
  * This is the standard structure used to identify the entry point
  * This is the standard structure used to identify the entry point
  * to the BIOS32 Service Directory, as documented in
  * to the BIOS32 Service Directory, as documented in
@@ -332,6 +354,7 @@ static struct pci_raw_ops * __devinit pci_find_bios(void)
 			DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n",
 			DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n",
 					bios32_entry);
 					bios32_entry);
 			bios32_indirect.address = bios32_entry + PAGE_OFFSET;
 			bios32_indirect.address = bios32_entry + PAGE_OFFSET;
+			set_bios_x();
 			if (check_pcibios())
 			if (check_pcibios())
 				return &pci_bios_access;
 				return &pci_bios_access;
 		}
 		}

+ 2 - 0
arch/x86/platform/Makefile

@@ -1,5 +1,7 @@
 # Platform specific code goes here
 # Platform specific code goes here
+obj-y	+= ce4100/
 obj-y	+= efi/
 obj-y	+= efi/
+obj-y	+= iris/
 obj-y	+= mrst/
 obj-y	+= mrst/
 obj-y	+= olpc/
 obj-y	+= olpc/
 obj-y	+= scx200/
 obj-y	+= scx200/

+ 1 - 0
arch/x86/platform/ce4100/Makefile

@@ -0,0 +1 @@
+obj-$(CONFIG_X86_INTEL_CE)	+= ce4100.o

+ 132 - 0
arch/x86/platform/ce4100/ce4100.c

@@ -0,0 +1,132 @@
+/*
+ * Intel CE4100  platform specific setup code
+ *
+ * (C) Copyright 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_8250.h>
+
+#include <asm/setup.h>
+#include <asm/io.h>
+
+static int ce4100_i8042_detect(void)
+{
+	return 0;
+}
+
+static void __init sdv_find_smp_config(void)
+{
+}
+
+#ifdef CONFIG_SERIAL_8250
+
+
+static unsigned int mem_serial_in(struct uart_port *p, int offset)
+{
+	offset = offset << p->regshift;
+	return readl(p->membase + offset);
+}
+
+/*
+ * The UART Tx interrupts are not set under some conditions and therefore serial
+ * transmission hangs. This is a silicon issue and has not been root caused. The
+ * workaround for this silicon issue checks UART_LSR_THRE bit and UART_LSR_TEMT
+ * bit of LSR register in interrupt handler to see whether at least one of these
+ * two bits is set, if so then process the transmit request. If this workaround
+ * is not applied, then the serial transmission may hang. This workaround is for
+ * errata number 9 in Errata - B step.
+*/
+
+static unsigned int ce4100_mem_serial_in(struct uart_port *p, int offset)
+{
+	unsigned int ret, ier, lsr;
+
+	if (offset == UART_IIR) {
+		offset = offset << p->regshift;
+		ret = readl(p->membase + offset);
+		if (ret & UART_IIR_NO_INT) {
+			/* see if the TX interrupt should have really set */
+			ier = mem_serial_in(p, UART_IER);
+			/* see if the UART's XMIT interrupt is enabled */
+			if (ier & UART_IER_THRI) {
+				lsr = mem_serial_in(p, UART_LSR);
+				/* now check to see if the UART should be
+				   generating an interrupt (but isn't) */
+				if (lsr & (UART_LSR_THRE | UART_LSR_TEMT))
+					ret &= ~UART_IIR_NO_INT;
+			}
+		}
+	} else
+		ret =  mem_serial_in(p, offset);
+	return ret;
+}
+
+static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = offset << p->regshift;
+	writel(value, p->membase + offset);
+}
+
+static void ce4100_serial_fixup(int port, struct uart_port *up,
+	unsigned short *capabilites)
+{
+#ifdef CONFIG_EARLY_PRINTK
+	/*
+	 * Over ride the legacy port configuration that comes from
+	 * asm/serial.h. Using the ioport driver then switching to the
+	 * PCI memmaped driver hangs the IOAPIC
+	 */
+	if (up->iotype !=  UPIO_MEM32) {
+		up->uartclk  = 14745600;
+		up->mapbase = 0xdffe0200;
+		set_fixmap_nocache(FIX_EARLYCON_MEM_BASE,
+				up->mapbase & PAGE_MASK);
+		up->membase =
+			(void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
+		up->membase += up->mapbase & ~PAGE_MASK;
+		up->iotype   = UPIO_MEM32;
+		up->regshift = 2;
+	}
+#endif
+	up->iobase = 0;
+	up->serial_in = ce4100_mem_serial_in;
+	up->serial_out = ce4100_mem_serial_out;
+
+	*capabilites |= (1 << 12);
+}
+
+static __init void sdv_serial_fixup(void)
+{
+	serial8250_set_isa_configurator(ce4100_serial_fixup);
+}
+
+#else
+static inline void sdv_serial_fixup(void);
+#endif
+
+static void __init sdv_arch_setup(void)
+{
+	sdv_serial_fixup();
+}
+
+/*
+ * CE4100 specific x86_init function overrides and early setup
+ * calls.
+ */
+void __init x86_ce4100_early_setup(void)
+{
+	x86_init.oem.arch_setup = sdv_arch_setup;
+	x86_platform.i8042_detect = ce4100_i8042_detect;
+	x86_init.resources.probe_roms = x86_init_noop;
+	x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+	x86_init.mpparse.find_smp_config = sdv_find_smp_config;
+}

+ 1 - 0
arch/x86/platform/iris/Makefile

@@ -0,0 +1 @@
+obj-$(CONFIG_X86_32_IRIS)		+= iris.o

+ 91 - 0
arch/x86/platform/iris/iris.c

@@ -0,0 +1,91 @@
+/*
+ * Eurobraille/Iris power off support.
+ *
+ * Eurobraille's Iris machine is a PC with no APM or ACPI support.
+ * It is shutdown by a special I/O sequence which this module provides.
+ *
+ *  Copyright (C) Shérab <Sebastien.Hinderer@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/moduleparam.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <asm/io.h>
+
+#define IRIS_GIO_BASE		0x340
+#define IRIS_GIO_INPUT		IRIS_GIO_BASE
+#define IRIS_GIO_OUTPUT		(IRIS_GIO_BASE + 1)
+#define IRIS_GIO_PULSE		0x80 /* First byte to send */
+#define IRIS_GIO_REST		0x00 /* Second byte to send */
+#define IRIS_GIO_NODEV		0xff /* Likely not an Iris */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>");
+MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille");
+MODULE_SUPPORTED_DEVICE("Eurobraille/Iris");
+
+static int force;
+
+module_param(force, bool, 0);
+MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation.");
+
+static void (*old_pm_power_off)(void);
+
+static void iris_power_off(void)
+{
+	outb(IRIS_GIO_PULSE, IRIS_GIO_OUTPUT);
+	msleep(850);
+	outb(IRIS_GIO_REST, IRIS_GIO_OUTPUT);
+}
+
+/*
+ * Before installing the power_off handler, try to make sure the OS is
+ * running on an Iris.  Since Iris does not support DMI, this is done
+ * by reading its input port and seeing whether the read value is
+ * meaningful.
+ */
+static int iris_init(void)
+{
+	unsigned char status;
+	if (force != 1) {
+		printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n");
+		return -ENODEV;
+	}
+	status = inb(IRIS_GIO_INPUT);
+	if (status == IRIS_GIO_NODEV) {
+		printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n");
+		return -ENODEV;
+	}
+	old_pm_power_off = pm_power_off;
+	pm_power_off = &iris_power_off;
+	printk(KERN_INFO "Iris power_off handler installed.\n");
+
+	return 0;
+}
+
+static void iris_exit(void)
+{
+	pm_power_off = old_pm_power_off;
+	printk(KERN_INFO "Iris power_off handler uninstalled.\n");
+}
+
+module_init(iris_init);
+module_exit(iris_exit);

+ 2 - 0
arch/x86/platform/mrst/Makefile

@@ -1 +1,3 @@
 obj-$(CONFIG_X86_MRST)		+= mrst.o
 obj-$(CONFIG_X86_MRST)		+= mrst.o
+obj-$(CONFIG_X86_MRST)		+= vrtc.o
+obj-$(CONFIG_EARLY_PRINTK_MRST)	+= early_printk_mrst.o

+ 0 - 0
arch/x86/kernel/early_printk_mrst.c → arch/x86/platform/mrst/early_printk_mrst.c


+ 535 - 11
arch/x86/platform/mrst/mrst.c

@@ -9,9 +9,19 @@
  * as published by the Free Software Foundation; version 2
  * as published by the Free Software Foundation; version 2
  * of the License.
  * of the License.
  */
  */
+
+#define pr_fmt(fmt) "mrst: " fmt
+
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/sfi.h>
 #include <linux/sfi.h>
+#include <linux/intel_pmic_gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/i2c.h>
+#include <linux/i2c/pca953x.h>
+#include <linux/gpio_keys.h>
+#include <linux/input.h>
+#include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/module.h>
 
 
@@ -23,7 +33,9 @@
 #include <asm/mrst.h>
 #include <asm/mrst.h>
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/i8259.h>
 #include <asm/i8259.h>
+#include <asm/intel_scu_ipc.h>
 #include <asm/apb_timer.h>
 #include <asm/apb_timer.h>
+#include <asm/reboot.h>
 
 
 /*
 /*
  * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
  * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
@@ -102,10 +114,10 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table)
 		memcpy(sfi_mtimer_array, pentry, totallen);
 		memcpy(sfi_mtimer_array, pentry, totallen);
 	}
 	}
 
 
-	printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num);
+	pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num);
 	pentry = sfi_mtimer_array;
 	pentry = sfi_mtimer_array;
 	for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
 	for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
-		printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz,"
+		pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz,"
 			" irq = %d\n", totallen, (u32)pentry->phys_addr,
 			" irq = %d\n", totallen, (u32)pentry->phys_addr,
 			pentry->freq_hz, pentry->irq);
 			pentry->freq_hz, pentry->irq);
 			if (!pentry->irq)
 			if (!pentry->irq)
@@ -176,14 +188,14 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
 		memcpy(sfi_mrtc_array, pentry, totallen);
 		memcpy(sfi_mrtc_array, pentry, totallen);
 	}
 	}
 
 
-	printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num);
+	pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num);
 	pentry = sfi_mrtc_array;
 	pentry = sfi_mrtc_array;
 	for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
 	for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
-		printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n",
+		pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
 			totallen, (u32)pentry->phys_addr, pentry->irq);
 			totallen, (u32)pentry->phys_addr, pentry->irq);
 		mp_irq.type = MP_IOAPIC;
 		mp_irq.type = MP_IOAPIC;
 		mp_irq.irqtype = mp_INT;
 		mp_irq.irqtype = mp_INT;
-		mp_irq.irqflag = 0;
+		mp_irq.irqflag = 0xf;	/* level trigger and active low */
 		mp_irq.srcbus = 0;
 		mp_irq.srcbus = 0;
 		mp_irq.srcbusirq = pentry->irq;	/* IRQ */
 		mp_irq.srcbusirq = pentry->irq;	/* IRQ */
 		mp_irq.dstapic = MP_APIC_ALL;
 		mp_irq.dstapic = MP_APIC_ALL;
@@ -209,6 +221,7 @@ static unsigned long __init mrst_calibrate_tsc(void)
 
 
 void __init mrst_time_init(void)
 void __init mrst_time_init(void)
 {
 {
+	sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
 	switch (mrst_timer_options) {
 	switch (mrst_timer_options) {
 	case MRST_TIMER_APBT_ONLY:
 	case MRST_TIMER_APBT_ONLY:
 		break;
 		break;
@@ -224,16 +237,10 @@ void __init mrst_time_init(void)
 		return;
 		return;
 	}
 	}
 	/* we need at least one APB timer */
 	/* we need at least one APB timer */
-	sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
 	pre_init_apic_IRQ0();
 	pre_init_apic_IRQ0();
 	apbt_time_init();
 	apbt_time_init();
 }
 }
 
 
-void __init mrst_rtc_init(void)
-{
-	sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
-}
-
 void __cpuinit mrst_arch_setup(void)
 void __cpuinit mrst_arch_setup(void)
 {
 {
 	if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
 	if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
@@ -256,6 +263,17 @@ static int mrst_i8042_detect(void)
 	return 0;
 	return 0;
 }
 }
 
 
+/* Reboot and power off are handled by the SCU on a MID device */
+static void mrst_power_off(void)
+{
+	intel_scu_ipc_simple_command(0xf1, 1);
+}
+
+static void mrst_reboot(void)
+{
+	intel_scu_ipc_simple_command(0xf1, 0);
+}
+
 /*
 /*
  * Moorestown specific x86_init function overrides and early setup
  * Moorestown specific x86_init function overrides and early setup
  * calls.
  * calls.
@@ -281,6 +299,10 @@ void __init x86_mrst_early_setup(void)
 
 
 	legacy_pic = &null_legacy_pic;
 	legacy_pic = &null_legacy_pic;
 
 
+	/* Moorestown specific power_off/restart method */
+	pm_power_off = mrst_power_off;
+	machine_ops.emergency_restart  = mrst_reboot;
+
 	/* Avoid searching for BIOS MP tables */
 	/* Avoid searching for BIOS MP tables */
 	x86_init.mpparse.find_smp_config = x86_init_noop;
 	x86_init.mpparse.find_smp_config = x86_init_noop;
 	x86_init.mpparse.get_smp_config = x86_init_uint_noop;
 	x86_init.mpparse.get_smp_config = x86_init_uint_noop;
@@ -309,3 +331,505 @@ static inline int __init setup_x86_mrst_timer(char *arg)
 	return 0;
 	return 0;
 }
 }
 __setup("x86_mrst_timer=", setup_x86_mrst_timer);
 __setup("x86_mrst_timer=", setup_x86_mrst_timer);
+
+/*
+ * Parsing GPIO table first, since the DEVS table will need this table
+ * to map the pin name to the actual pin.
+ */
+static struct sfi_gpio_table_entry *gpio_table;
+static int gpio_num_entry;
+
+static int __init sfi_parse_gpio(struct sfi_table_header *table)
+{
+	struct sfi_table_simple *sb;
+	struct sfi_gpio_table_entry *pentry;
+	int num, i;
+
+	if (gpio_table)
+		return 0;
+	sb = (struct sfi_table_simple *)table;
+	num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
+	pentry = (struct sfi_gpio_table_entry *)sb->pentry;
+
+	gpio_table = (struct sfi_gpio_table_entry *)
+				kmalloc(num * sizeof(*pentry), GFP_KERNEL);
+	if (!gpio_table)
+		return -1;
+	memcpy(gpio_table, pentry, num * sizeof(*pentry));
+	gpio_num_entry = num;
+
+	pr_debug("GPIO pin info:\n");
+	for (i = 0; i < num; i++, pentry++)
+		pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s,"
+		" pin = %d\n", i,
+			pentry->controller_name,
+			pentry->pin_name,
+			pentry->pin_no);
+	return 0;
+}
+
+static int get_gpio_by_name(const char *name)
+{
+	struct sfi_gpio_table_entry *pentry = gpio_table;
+	int i;
+
+	if (!pentry)
+		return -1;
+	for (i = 0; i < gpio_num_entry; i++, pentry++) {
+		if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
+			return pentry->pin_no;
+	}
+	return -1;
+}
+
+/*
+ * Here defines the array of devices platform data that IAFW would export
+ * through SFI "DEVS" table, we use name and type to match the device and
+ * its platform data.
+ */
+struct devs_id {
+	char name[SFI_NAME_LEN + 1];
+	u8 type;
+	u8 delay;
+	void *(*get_platform_data)(void *info);
+};
+
+/* the offset for the mapping of global gpio pin to irq */
+#define MRST_IRQ_OFFSET 0x100
+
+static void __init *pmic_gpio_platform_data(void *info)
+{
+	static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
+	int gpio_base = get_gpio_by_name("pmic_gpio_base");
+
+	if (gpio_base == -1)
+		gpio_base = 64;
+	pmic_gpio_pdata.gpio_base = gpio_base;
+	pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET;
+	pmic_gpio_pdata.gpiointr = 0xffffeff8;
+
+	return &pmic_gpio_pdata;
+}
+
+static void __init *max3111_platform_data(void *info)
+{
+	struct spi_board_info *spi_info = info;
+	int intr = get_gpio_by_name("max3111_int");
+
+	if (intr == -1)
+		return NULL;
+	spi_info->irq = intr + MRST_IRQ_OFFSET;
+	return NULL;
+}
+
+/* we have multiple max7315 on the board ... */
+#define MAX7315_NUM 2
+static void __init *max7315_platform_data(void *info)
+{
+	static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
+	static int nr;
+	struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
+	struct i2c_board_info *i2c_info = info;
+	int gpio_base, intr;
+	char base_pin_name[SFI_NAME_LEN + 1];
+	char intr_pin_name[SFI_NAME_LEN + 1];
+
+	if (nr == MAX7315_NUM) {
+		pr_err("too many max7315s, we only support %d\n",
+				MAX7315_NUM);
+		return NULL;
+	}
+	/* we have several max7315 on the board, we only need load several
+	 * instances of the same pca953x driver to cover them
+	 */
+	strcpy(i2c_info->type, "max7315");
+	if (nr++) {
+		sprintf(base_pin_name, "max7315_%d_base", nr);
+		sprintf(intr_pin_name, "max7315_%d_int", nr);
+	} else {
+		strcpy(base_pin_name, "max7315_base");
+		strcpy(intr_pin_name, "max7315_int");
+	}
+
+	gpio_base = get_gpio_by_name(base_pin_name);
+	intr = get_gpio_by_name(intr_pin_name);
+
+	if (gpio_base == -1)
+		return NULL;
+	max7315->gpio_base = gpio_base;
+	if (intr != -1) {
+		i2c_info->irq = intr + MRST_IRQ_OFFSET;
+		max7315->irq_base = gpio_base + MRST_IRQ_OFFSET;
+	} else {
+		i2c_info->irq = -1;
+		max7315->irq_base = -1;
+	}
+	return max7315;
+}
+
+static void __init *emc1403_platform_data(void *info)
+{
+	static short intr2nd_pdata;
+	struct i2c_board_info *i2c_info = info;
+	int intr = get_gpio_by_name("thermal_int");
+	int intr2nd = get_gpio_by_name("thermal_alert");
+
+	if (intr == -1 || intr2nd == -1)
+		return NULL;
+
+	i2c_info->irq = intr + MRST_IRQ_OFFSET;
+	intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
+
+	return &intr2nd_pdata;
+}
+
+static void __init *lis331dl_platform_data(void *info)
+{
+	static short intr2nd_pdata;
+	struct i2c_board_info *i2c_info = info;
+	int intr = get_gpio_by_name("accel_int");
+	int intr2nd = get_gpio_by_name("accel_2");
+
+	if (intr == -1 || intr2nd == -1)
+		return NULL;
+
+	i2c_info->irq = intr + MRST_IRQ_OFFSET;
+	intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
+
+	return &intr2nd_pdata;
+}
+
+static void __init *no_platform_data(void *info)
+{
+	return NULL;
+}
+
+static const struct devs_id __initconst device_ids[] = {
+	{"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
+	{"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
+	{"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
+	{"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
+	{"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
+	{"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
+	{"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
+	{"msic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
+	{},
+};
+
+#define MAX_IPCDEVS	24
+static struct platform_device *ipc_devs[MAX_IPCDEVS];
+static int ipc_next_dev;
+
+#define MAX_SCU_SPI	24
+static struct spi_board_info *spi_devs[MAX_SCU_SPI];
+static int spi_next_dev;
+
+#define MAX_SCU_I2C	24
+static struct i2c_board_info *i2c_devs[MAX_SCU_I2C];
+static int i2c_bus[MAX_SCU_I2C];
+static int i2c_next_dev;
+
+static void __init intel_scu_device_register(struct platform_device *pdev)
+{
+	if(ipc_next_dev == MAX_IPCDEVS)
+		pr_err("too many SCU IPC devices");
+	else
+		ipc_devs[ipc_next_dev++] = pdev;
+}
+
+static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
+{
+	struct spi_board_info *new_dev;
+
+	if (spi_next_dev == MAX_SCU_SPI) {
+		pr_err("too many SCU SPI devices");
+		return;
+	}
+
+	new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+	if (!new_dev) {
+		pr_err("failed to alloc mem for delayed spi dev %s\n",
+			sdev->modalias);
+		return;
+	}
+	memcpy(new_dev, sdev, sizeof(*sdev));
+
+	spi_devs[spi_next_dev++] = new_dev;
+}
+
+static void __init intel_scu_i2c_device_register(int bus,
+						struct i2c_board_info *idev)
+{
+	struct i2c_board_info *new_dev;
+
+	if (i2c_next_dev == MAX_SCU_I2C) {
+		pr_err("too many SCU I2C devices");
+		return;
+	}
+
+	new_dev = kzalloc(sizeof(*idev), GFP_KERNEL);
+	if (!new_dev) {
+		pr_err("failed to alloc mem for delayed i2c dev %s\n",
+			idev->type);
+		return;
+	}
+	memcpy(new_dev, idev, sizeof(*idev));
+
+	i2c_bus[i2c_next_dev] = bus;
+	i2c_devs[i2c_next_dev++] = new_dev;
+}
+
+/* Called by IPC driver */
+void intel_scu_devices_create(void)
+{
+	int i;
+
+	for (i = 0; i < ipc_next_dev; i++)
+		platform_device_add(ipc_devs[i]);
+
+	for (i = 0; i < spi_next_dev; i++)
+		spi_register_board_info(spi_devs[i], 1);
+
+	for (i = 0; i < i2c_next_dev; i++) {
+		struct i2c_adapter *adapter;
+		struct i2c_client *client;
+
+		adapter = i2c_get_adapter(i2c_bus[i]);
+		if (adapter) {
+			client = i2c_new_device(adapter, i2c_devs[i]);
+			if (!client)
+				pr_err("can't create i2c device %s\n",
+					i2c_devs[i]->type);
+		} else
+			i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
+	}
+}
+EXPORT_SYMBOL_GPL(intel_scu_devices_create);
+
+/* Called by IPC driver */
+void intel_scu_devices_destroy(void)
+{
+	int i;
+
+	for (i = 0; i < ipc_next_dev; i++)
+		platform_device_del(ipc_devs[i]);
+}
+EXPORT_SYMBOL_GPL(intel_scu_devices_destroy);
+
+static void __init install_irq_resource(struct platform_device *pdev, int irq)
+{
+	/* Single threaded */
+	static struct resource __initdata res = {
+		.name = "IRQ",
+		.flags = IORESOURCE_IRQ,
+	};
+	res.start = irq;
+	platform_device_add_resources(pdev, &res, 1);
+}
+
+static void __init sfi_handle_ipc_dev(struct platform_device *pdev)
+{
+	const struct devs_id *dev = device_ids;
+	void *pdata = NULL;
+
+	while (dev->name[0]) {
+		if (dev->type == SFI_DEV_TYPE_IPC &&
+			!strncmp(dev->name, pdev->name, SFI_NAME_LEN)) {
+			pdata = dev->get_platform_data(pdev);
+			break;
+		}
+		dev++;
+	}
+	pdev->dev.platform_data = pdata;
+	intel_scu_device_register(pdev);
+}
+
+static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info)
+{
+	const struct devs_id *dev = device_ids;
+	void *pdata = NULL;
+
+	while (dev->name[0]) {
+		if (dev->type == SFI_DEV_TYPE_SPI &&
+				!strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) {
+			pdata = dev->get_platform_data(spi_info);
+			break;
+		}
+		dev++;
+	}
+	spi_info->platform_data = pdata;
+	if (dev->delay)
+		intel_scu_spi_device_register(spi_info);
+	else
+		spi_register_board_info(spi_info, 1);
+}
+
+static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info)
+{
+	const struct devs_id *dev = device_ids;
+	void *pdata = NULL;
+
+	while (dev->name[0]) {
+		if (dev->type == SFI_DEV_TYPE_I2C &&
+			!strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) {
+			pdata = dev->get_platform_data(i2c_info);
+			break;
+		}
+		dev++;
+	}
+	i2c_info->platform_data = pdata;
+
+	if (dev->delay)
+		intel_scu_i2c_device_register(bus, i2c_info);
+	else
+		i2c_register_board_info(bus, i2c_info, 1);
+ }
+
+
+static int __init sfi_parse_devs(struct sfi_table_header *table)
+{
+	struct sfi_table_simple *sb;
+	struct sfi_device_table_entry *pentry;
+	struct spi_board_info spi_info;
+	struct i2c_board_info i2c_info;
+	struct platform_device *pdev;
+	int num, i, bus;
+	int ioapic;
+	struct io_apic_irq_attr irq_attr;
+
+	sb = (struct sfi_table_simple *)table;
+	num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
+	pentry = (struct sfi_device_table_entry *)sb->pentry;
+
+	for (i = 0; i < num; i++, pentry++) {
+		if (pentry->irq != (u8)0xff) { /* native RTE case */
+			/* these SPI2 devices are not exposed to system as PCI
+			 * devices, but they have separate RTE entry in IOAPIC
+			 * so we have to enable them one by one here
+			 */
+			ioapic = mp_find_ioapic(pentry->irq);
+			irq_attr.ioapic = ioapic;
+			irq_attr.ioapic_pin = pentry->irq;
+			irq_attr.trigger = 1;
+			irq_attr.polarity = 1;
+			io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
+		}
+		switch (pentry->type) {
+		case SFI_DEV_TYPE_IPC:
+			/* ID as IRQ is a hack that will go away */
+			pdev = platform_device_alloc(pentry->name, pentry->irq);
+			if (pdev == NULL) {
+				pr_err("out of memory for SFI platform device '%s'.\n",
+							pentry->name);
+				continue;
+			}
+			install_irq_resource(pdev, pentry->irq);
+			pr_debug("info[%2d]: IPC bus, name = %16.16s, "
+				"irq = 0x%2x\n", i, pentry->name, pentry->irq);
+			sfi_handle_ipc_dev(pdev);
+			break;
+		case SFI_DEV_TYPE_SPI:
+			memset(&spi_info, 0, sizeof(spi_info));
+			strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
+			spi_info.irq = pentry->irq;
+			spi_info.bus_num = pentry->host_num;
+			spi_info.chip_select = pentry->addr;
+			spi_info.max_speed_hz = pentry->max_freq;
+			pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, "
+				"irq = 0x%2x, max_freq = %d, cs = %d\n", i,
+				spi_info.bus_num,
+				spi_info.modalias,
+				spi_info.irq,
+				spi_info.max_speed_hz,
+				spi_info.chip_select);
+			sfi_handle_spi_dev(&spi_info);
+			break;
+		case SFI_DEV_TYPE_I2C:
+			memset(&i2c_info, 0, sizeof(i2c_info));
+			bus = pentry->host_num;
+			strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
+			i2c_info.irq = pentry->irq;
+			i2c_info.addr = pentry->addr;
+			pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
+				"irq = 0x%2x, addr = 0x%x\n", i, bus,
+				i2c_info.type,
+				i2c_info.irq,
+				i2c_info.addr);
+			sfi_handle_i2c_dev(bus, &i2c_info);
+			break;
+		case SFI_DEV_TYPE_UART:
+		case SFI_DEV_TYPE_HSI:
+		default:
+			;
+		}
+	}
+	return 0;
+}
+
+static int __init mrst_platform_init(void)
+{
+	sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio);
+	sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs);
+	return 0;
+}
+arch_initcall(mrst_platform_init);
+
+/*
+ * we will search these buttons in SFI GPIO table (by name)
+ * and register them dynamically. Please add all possible
+ * buttons here, we will shrink them if no GPIO found.
+ */
+static struct gpio_keys_button gpio_button[] = {
+	{KEY_POWER,		-1, 1, "power_btn",	EV_KEY, 0, 3000},
+	{KEY_PROG1,		-1, 1, "prog_btn1",	EV_KEY, 0, 20},
+	{KEY_PROG2,		-1, 1, "prog_btn2",	EV_KEY, 0, 20},
+	{SW_LID,		-1, 1, "lid_switch",	EV_SW,  0, 20},
+	{KEY_VOLUMEUP,		-1, 1, "vol_up",	EV_KEY, 0, 20},
+	{KEY_VOLUMEDOWN,	-1, 1, "vol_down",	EV_KEY, 0, 20},
+	{KEY_CAMERA,		-1, 1, "camera_full",	EV_KEY, 0, 20},
+	{KEY_CAMERA_FOCUS,	-1, 1, "camera_half",	EV_KEY, 0, 20},
+	{SW_KEYPAD_SLIDE,	-1, 1, "MagSw1",	EV_SW,  0, 20},
+	{SW_KEYPAD_SLIDE,	-1, 1, "MagSw2",	EV_SW,  0, 20},
+};
+
+static struct gpio_keys_platform_data mrst_gpio_keys = {
+	.buttons	= gpio_button,
+	.rep		= 1,
+	.nbuttons	= -1, /* will fill it after search */
+};
+
+static struct platform_device pb_device = {
+	.name		= "gpio-keys",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= &mrst_gpio_keys,
+	},
+};
+
+/*
+ * Shrink the non-existent buttons, register the gpio button
+ * device if there is some
+ */
+static int __init pb_keys_init(void)
+{
+	struct gpio_keys_button *gb = gpio_button;
+	int i, num, good = 0;
+
+	num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
+	for (i = 0; i < num; i++) {
+		gb[i].gpio = get_gpio_by_name(gb[i].desc);
+		if (gb[i].gpio == -1)
+			continue;
+
+		if (i != good)
+			gb[good] = gb[i];
+		good++;
+	}
+
+	if (good) {
+		mrst_gpio_keys.nbuttons = good;
+		return platform_device_register(&pb_device);
+	}
+	return 0;
+}
+late_initcall(pb_keys_init);

+ 165 - 0
arch/x86/platform/mrst/vrtc.c

@@ -0,0 +1,165 @@
+/*
+ * vrtc.c: Driver for virtual RTC device on Intel MID platform
+ *
+ * (C) Copyright 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Note:
+ * VRTC is emulated by system controller firmware, the real HW
+ * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
+ * in a memory mapped IO space that is visible to the host IA
+ * processor.
+ *
+ * This driver is based on RTC CMOS driver.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/platform_device.h>
+
+#include <asm/mrst.h>
+#include <asm/mrst-vrtc.h>
+#include <asm/time.h>
+#include <asm/fixmap.h>
+
+static unsigned char __iomem *vrtc_virt_base;
+
+unsigned char vrtc_cmos_read(unsigned char reg)
+{
+	unsigned char retval;
+
+	/* vRTC's registers range from 0x0 to 0xD */
+	if (reg > 0xd || !vrtc_virt_base)
+		return 0xff;
+
+	lock_cmos_prefix(reg);
+	retval = __raw_readb(vrtc_virt_base + (reg << 2));
+	lock_cmos_suffix(reg);
+	return retval;
+}
+EXPORT_SYMBOL_GPL(vrtc_cmos_read);
+
+void vrtc_cmos_write(unsigned char val, unsigned char reg)
+{
+	if (reg > 0xd || !vrtc_virt_base)
+		return;
+
+	lock_cmos_prefix(reg);
+	__raw_writeb(val, vrtc_virt_base + (reg << 2));
+	lock_cmos_suffix(reg);
+}
+EXPORT_SYMBOL_GPL(vrtc_cmos_write);
+
+unsigned long vrtc_get_time(void)
+{
+	u8 sec, min, hour, mday, mon;
+	u32 year;
+
+	while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP))
+		cpu_relax();
+
+	sec = vrtc_cmos_read(RTC_SECONDS);
+	min = vrtc_cmos_read(RTC_MINUTES);
+	hour = vrtc_cmos_read(RTC_HOURS);
+	mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
+	mon = vrtc_cmos_read(RTC_MONTH);
+	year = vrtc_cmos_read(RTC_YEAR);
+
+	/* vRTC YEAR reg contains the offset to 1960 */
+	year += 1960;
+
+	printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d "
+		"mon: %d year: %d\n", sec, min, hour, mday, mon, year);
+
+	return mktime(year, mon, mday, hour, min, sec);
+}
+
+/* Only care about the minutes and seconds */
+int vrtc_set_mmss(unsigned long nowtime)
+{
+	int real_sec, real_min;
+	int vrtc_min;
+
+	vrtc_min = vrtc_cmos_read(RTC_MINUTES);
+
+	real_sec = nowtime % 60;
+	real_min = nowtime / 60;
+	if (((abs(real_min - vrtc_min) + 15)/30) & 1)
+		real_min += 30;
+	real_min %= 60;
+
+	vrtc_cmos_write(real_sec, RTC_SECONDS);
+	vrtc_cmos_write(real_min, RTC_MINUTES);
+	return 0;
+}
+
+void __init mrst_rtc_init(void)
+{
+	unsigned long rtc_paddr;
+	void __iomem *virt_base;
+
+	sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
+	if (!sfi_mrtc_num)
+		return;
+
+	rtc_paddr = sfi_mrtc_array[0].phys_addr;
+
+	/* vRTC's register address may not be page aligned */
+	set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr);
+
+	virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC);
+	virt_base += rtc_paddr & ~PAGE_MASK;
+	vrtc_virt_base = virt_base;
+
+	x86_platform.get_wallclock = vrtc_get_time;
+	x86_platform.set_wallclock = vrtc_set_mmss;
+}
+
+/*
+ * The Moorestown platform has a memory mapped virtual RTC device that emulates
+ * the programming interface of the RTC.
+ */
+
+static struct resource vrtc_resources[] = {
+	[0] = {
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.flags	= IORESOURCE_IRQ,
+	}
+};
+
+static struct platform_device vrtc_device = {
+	.name		= "rtc_mrst",
+	.id		= -1,
+	.resource	= vrtc_resources,
+	.num_resources	= ARRAY_SIZE(vrtc_resources),
+};
+
+/* Register the RTC device if appropriate */
+static int __init mrst_device_create(void)
+{
+	/* No Moorestown, no device */
+	if (!mrst_identify_cpu())
+		return -ENODEV;
+	/* No timer, no device */
+	if (!sfi_mrtc_num)
+		return -ENODEV;
+
+	/* iomem resource */
+	vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr;
+	vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr +
+				MRST_VRTC_MAP_SZ;
+	/* irq resource */
+	vrtc_resources[1].start = sfi_mrtc_array[0].irq;
+	vrtc_resources[1].end = sfi_mrtc_array[0].irq;
+
+	return platform_device_register(&vrtc_device);
+}
+
+module_init(mrst_device_create);

+ 2 - 2
arch/x86/platform/sfi/sfi.c

@@ -48,9 +48,9 @@ static void __init mp_sfi_register_lapic_address(unsigned long address)
 /* All CPUs enumerated by SFI must be present and enabled */
 /* All CPUs enumerated by SFI must be present and enabled */
 static void __cpuinit mp_sfi_register_lapic(u8 id)
 static void __cpuinit mp_sfi_register_lapic(u8 id)
 {
 {
-	if (MAX_APICS - id <= 0) {
+	if (MAX_LOCAL_APIC - id <= 0) {
 		pr_warning("Processor #%d invalid (max %d)\n",
 		pr_warning("Processor #%d invalid (max %d)\n",
-			id, MAX_APICS);
+			id, MAX_LOCAL_APIC);
 		return;
 		return;
 	}
 	}
 
 

+ 18 - 4
arch/x86/platform/uv/tlb_uv.c

@@ -1341,7 +1341,7 @@ uv_activation_descriptor_init(int node, int pnode)
 
 
 	/*
 	/*
 	 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
 	 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
-	 * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub
+	 * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE)
 	 */
 	 */
 	bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE
 	bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE
 				* UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
 				* UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
@@ -1490,7 +1490,7 @@ calculate_destination_timeout(void)
 /*
 /*
  * initialize the bau_control structure for each cpu
  * initialize the bau_control structure for each cpu
  */
  */
-static void __init uv_init_per_cpu(int nuvhubs)
+static int __init uv_init_per_cpu(int nuvhubs)
 {
 {
 	int i;
 	int i;
 	int cpu;
 	int cpu;
@@ -1507,7 +1507,7 @@ static void __init uv_init_per_cpu(int nuvhubs)
 	struct bau_control *smaster = NULL;
 	struct bau_control *smaster = NULL;
 	struct socket_desc {
 	struct socket_desc {
 		short num_cpus;
 		short num_cpus;
-		short cpu_number[16];
+		short cpu_number[MAX_CPUS_PER_SOCKET];
 	};
 	};
 	struct uvhub_desc {
 	struct uvhub_desc {
 		unsigned short socket_mask;
 		unsigned short socket_mask;
@@ -1540,6 +1540,10 @@ static void __init uv_init_per_cpu(int nuvhubs)
 		sdp = &bdp->socket[socket];
 		sdp = &bdp->socket[socket];
 		sdp->cpu_number[sdp->num_cpus] = cpu;
 		sdp->cpu_number[sdp->num_cpus] = cpu;
 		sdp->num_cpus++;
 		sdp->num_cpus++;
+		if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
+			printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus);
+			return 1;
+		}
 	}
 	}
 	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
 	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
 		if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
 		if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
@@ -1570,6 +1574,12 @@ static void __init uv_init_per_cpu(int nuvhubs)
 				bcp->uvhub_master = hmaster;
 				bcp->uvhub_master = hmaster;
 				bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->
 				bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->
 						blade_processor_id;
 						blade_processor_id;
+				if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
+					printk(KERN_EMERG
+						"%d cpus per uvhub invalid\n",
+						bcp->uvhub_cpu);
+					return 1;
+				}
 			}
 			}
 nextsocket:
 nextsocket:
 			socket++;
 			socket++;
@@ -1595,6 +1605,7 @@ nextsocket:
 		bcp->congested_reps = congested_reps;
 		bcp->congested_reps = congested_reps;
 		bcp->congested_period = congested_period;
 		bcp->congested_period = congested_period;
 	}
 	}
+	return 0;
 }
 }
 
 
 /*
 /*
@@ -1625,7 +1636,10 @@ static int __init uv_bau_init(void)
 	spin_lock_init(&disable_lock);
 	spin_lock_init(&disable_lock);
 	congested_cycles = microsec_2_cycles(congested_response_us);
 	congested_cycles = microsec_2_cycles(congested_response_us);
 
 
-	uv_init_per_cpu(nuvhubs);
+	if (uv_init_per_cpu(nuvhubs)) {
+		nobau = 1;
+		return 0;
+	}
 
 
 	uv_partition_base_pnode = 0x7fffffff;
 	uv_partition_base_pnode = 0x7fffffff;
 	for (uvhub = 0; uvhub < nuvhubs; uvhub++)
 	for (uvhub = 0; uvhub < nuvhubs; uvhub++)

+ 1 - 1
arch/x86/platform/visws/visws_quirks.c

@@ -171,7 +171,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
 	ver = m->apicver;
 	ver = m->apicver;
 	if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
 	if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
 		printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
 		printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
-			m->apicid, MAX_APICS);
+			m->apicid, MAX_LOCAL_APIC);
 		return;
 		return;
 	}
 	}
 
 

+ 12 - 2
drivers/acpi/numa.c

@@ -275,13 +275,23 @@ acpi_table_parse_srat(enum acpi_srat_type id,
 int __init acpi_numa_init(void)
 int __init acpi_numa_init(void)
 {
 {
 	int ret = 0;
 	int ret = 0;
+	int nr_cpu_entries = nr_cpu_ids;
+
+#ifdef CONFIG_X86
+	/*
+	 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
+	 * SRAT cpu entries could have different order with that in MADT.
+	 * So go over all cpu entries in SRAT to get apicid to node mapping.
+	 */
+	nr_cpu_entries = MAX_LOCAL_APIC;
+#endif
 
 
 	/* SRAT: Static Resource Affinity Table */
 	/* SRAT: Static Resource Affinity Table */
 	if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
 	if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
 		acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
 		acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
-				     acpi_parse_x2apic_affinity, nr_cpu_ids);
+				     acpi_parse_x2apic_affinity, nr_cpu_entries);
 		acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
 		acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
-				     acpi_parse_processor_affinity, nr_cpu_ids);
+				     acpi_parse_processor_affinity, nr_cpu_entries);
 		ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
 		ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
 					    acpi_parse_memory_affinity,
 					    acpi_parse_memory_affinity,
 					    NR_NODE_MEMBLKS);
 					    NR_NODE_MEMBLKS);

+ 16 - 17
drivers/char/agp/amd64-agp.c

@@ -38,7 +38,7 @@ static int agp_bridges_found;
 
 
 static void amd64_tlbflush(struct agp_memory *temp)
 static void amd64_tlbflush(struct agp_memory *temp)
 {
 {
-	k8_flush_garts();
+	amd_flush_garts();
 }
 }
 
 
 static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
 static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
@@ -124,7 +124,7 @@ static int amd64_fetch_size(void)
 	u32 temp;
 	u32 temp;
 	struct aper_size_info_32 *values;
 	struct aper_size_info_32 *values;
 
 
-	dev = k8_northbridges.nb_misc[0];
+	dev = node_to_amd_nb(0)->misc;
 	if (dev==NULL)
 	if (dev==NULL)
 		return 0;
 		return 0;
 
 
@@ -181,16 +181,15 @@ static int amd_8151_configure(void)
 	unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
 	unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
 	int i;
 	int i;
 
 
-	if (!k8_northbridges.gart_supported)
+	if (!amd_nb_has_feature(AMD_NB_GART))
 		return 0;
 		return 0;
 
 
 	/* Configure AGP regs in each x86-64 host bridge. */
 	/* Configure AGP regs in each x86-64 host bridge. */
-	for (i = 0; i < k8_northbridges.num; i++) {
+	for (i = 0; i < amd_nb_num(); i++) {
 		agp_bridge->gart_bus_addr =
 		agp_bridge->gart_bus_addr =
-				amd64_configure(k8_northbridges.nb_misc[i],
-						gatt_bus);
+			amd64_configure(node_to_amd_nb(i)->misc, gatt_bus);
 	}
 	}
-	k8_flush_garts();
+	amd_flush_garts();
 	return 0;
 	return 0;
 }
 }
 
 
@@ -200,11 +199,11 @@ static void amd64_cleanup(void)
 	u32 tmp;
 	u32 tmp;
 	int i;
 	int i;
 
 
-	if (!k8_northbridges.gart_supported)
+	if (!amd_nb_has_feature(AMD_NB_GART))
 		return;
 		return;
 
 
-	for (i = 0; i < k8_northbridges.num; i++) {
-		struct pci_dev *dev = k8_northbridges.nb_misc[i];
+	for (i = 0; i < amd_nb_num(); i++) {
+		struct pci_dev *dev = node_to_amd_nb(i)->misc;
 		/* disable gart translation */
 		/* disable gart translation */
 		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp);
 		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp);
 		tmp &= ~GARTEN;
 		tmp &= ~GARTEN;
@@ -331,15 +330,15 @@ static __devinit int cache_nbs(struct pci_dev *pdev, u32 cap_ptr)
 {
 {
 	int i;
 	int i;
 
 
-	if (cache_k8_northbridges() < 0)
+	if (amd_cache_northbridges() < 0)
 		return -ENODEV;
 		return -ENODEV;
 
 
-	if (!k8_northbridges.gart_supported)
+	if (!amd_nb_has_feature(AMD_NB_GART))
 		return -ENODEV;
 		return -ENODEV;
 
 
 	i = 0;
 	i = 0;
-	for (i = 0; i < k8_northbridges.num; i++) {
-		struct pci_dev *dev = k8_northbridges.nb_misc[i];
+	for (i = 0; i < amd_nb_num(); i++) {
+		struct pci_dev *dev = node_to_amd_nb(i)->misc;
 		if (fix_northbridge(dev, pdev, cap_ptr) < 0) {
 		if (fix_northbridge(dev, pdev, cap_ptr) < 0) {
 			dev_err(&dev->dev, "no usable aperture found\n");
 			dev_err(&dev->dev, "no usable aperture found\n");
 #ifdef __x86_64__
 #ifdef __x86_64__
@@ -416,7 +415,7 @@ static int __devinit uli_agp_init(struct pci_dev *pdev)
 	}
 	}
 
 
 	/* shadow x86-64 registers into ULi registers */
 	/* shadow x86-64 registers into ULi registers */
-	pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE,
+	pci_read_config_dword (node_to_amd_nb(0)->misc, AMD64_GARTAPERTUREBASE,
 			       &httfea);
 			       &httfea);
 
 
 	/* if x86-64 aperture base is beyond 4G, exit here */
 	/* if x86-64 aperture base is beyond 4G, exit here */
@@ -484,7 +483,7 @@ static int nforce3_agp_init(struct pci_dev *pdev)
 	pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp);
 	pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp);
 
 
 	/* shadow x86-64 registers into NVIDIA registers */
 	/* shadow x86-64 registers into NVIDIA registers */
-	pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE,
+	pci_read_config_dword (node_to_amd_nb(0)->misc, AMD64_GARTAPERTUREBASE,
 			       &apbase);
 			       &apbase);
 
 
 	/* if x86-64 aperture base is beyond 4G, exit here */
 	/* if x86-64 aperture base is beyond 4G, exit here */
@@ -778,7 +777,7 @@ int __init agp_amd64_init(void)
 		}
 		}
 
 
 		/* First check that we have at least one AMD64 NB */
 		/* First check that we have at least one AMD64 NB */
-		if (!pci_dev_present(k8_nb_ids))
+		if (!pci_dev_present(amd_nb_misc_ids))
 			return -ENODEV;
 			return -ENODEV;
 
 
 		/* Look for any AGP bridge */
 		/* Look for any AGP bridge */

+ 2 - 2
drivers/edac/amd64_edac.c

@@ -2917,7 +2917,7 @@ static int __init amd64_edac_init(void)
 
 
 	opstate_init();
 	opstate_init();
 
 
-	if (cache_k8_northbridges() < 0)
+	if (amd_cache_northbridges() < 0)
 		goto err_ret;
 		goto err_ret;
 
 
 	msrs = msrs_alloc();
 	msrs = msrs_alloc();
@@ -2934,7 +2934,7 @@ static int __init amd64_edac_init(void)
 	 * to finish initialization of the MC instances.
 	 * to finish initialization of the MC instances.
 	 */
 	 */
 	err = -ENODEV;
 	err = -ENODEV;
-	for (nb = 0; nb < k8_northbridges.num; nb++) {
+	for (nb = 0; nb < amd_nb_num(); nb++) {
 		if (!pvt_lookup[nb])
 		if (!pvt_lookup[nb])
 			continue;
 			continue;
 
 

+ 5 - 0
drivers/platform/x86/intel_scu_ipc.c

@@ -26,6 +26,7 @@
 #include <linux/sfi.h>
 #include <linux/sfi.h>
 #include <asm/mrst.h>
 #include <asm/mrst.h>
 #include <asm/intel_scu_ipc.h>
 #include <asm/intel_scu_ipc.h>
+#include <asm/mrst.h>
 
 
 /* IPC defines the following message types */
 /* IPC defines the following message types */
 #define IPCMSG_WATCHDOG_TIMER 0xF8 /* Set Kernel Watchdog Threshold */
 #define IPCMSG_WATCHDOG_TIMER 0xF8 /* Set Kernel Watchdog Threshold */
@@ -699,6 +700,9 @@ static int ipc_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		iounmap(ipcdev.ipc_base);
 		iounmap(ipcdev.ipc_base);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
+
+	intel_scu_devices_create();
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -720,6 +724,7 @@ static void ipc_remove(struct pci_dev *pdev)
 	iounmap(ipcdev.ipc_base);
 	iounmap(ipcdev.ipc_base);
 	iounmap(ipcdev.i2c_base);
 	iounmap(ipcdev.i2c_base);
 	ipcdev.pdev = NULL;
 	ipcdev.pdev = NULL;
+	intel_scu_devices_destroy();
 }
 }
 
 
 static const struct pci_device_id pci_ids[] = {
 static const struct pci_device_id pci_ids[] = {

+ 12 - 0
drivers/rtc/Kconfig

@@ -463,6 +463,18 @@ config RTC_DRV_CMOS
 	  This driver can also be built as a module. If so, the module
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-cmos.
 	  will be called rtc-cmos.
 
 
+config RTC_DRV_VRTC
+	tristate "Virtual RTC for Moorestown platforms"
+	depends on X86_MRST
+	default y if X86_MRST
+
+	help
+	Say "yes" here to get direct support for the real time clock
+	found on Moorestown platforms. The VRTC is a emulated RTC that
+	derives its clock source from a real RTC in the PMIC. The MC146818
+	style programming interface is mostly conserved, but any
+	updates are done via IPC calls to the system controller FW.
+
 config RTC_DRV_DS1216
 config RTC_DRV_DS1216
 	tristate "Dallas DS1216"
 	tristate "Dallas DS1216"
 	depends on SNI_RM
 	depends on SNI_RM

+ 1 - 0
drivers/rtc/Makefile

@@ -30,6 +30,7 @@ obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc-cmos.o
 obj-$(CONFIG_RTC_DRV_COH901331)	+= rtc-coh901331.o
 obj-$(CONFIG_RTC_DRV_COH901331)	+= rtc-coh901331.o
 obj-$(CONFIG_RTC_DRV_DAVINCI)	+= rtc-davinci.o
 obj-$(CONFIG_RTC_DRV_DAVINCI)	+= rtc-davinci.o
 obj-$(CONFIG_RTC_DRV_DM355EVM)	+= rtc-dm355evm.o
 obj-$(CONFIG_RTC_DRV_DM355EVM)	+= rtc-dm355evm.o
+obj-$(CONFIG_RTC_DRV_VRTC)	+= rtc-mrst.o
 obj-$(CONFIG_RTC_DRV_DS1216)	+= rtc-ds1216.o
 obj-$(CONFIG_RTC_DRV_DS1216)	+= rtc-ds1216.o
 obj-$(CONFIG_RTC_DRV_DS1286)	+= rtc-ds1286.o
 obj-$(CONFIG_RTC_DRV_DS1286)	+= rtc-ds1286.o
 obj-$(CONFIG_RTC_DRV_DS1302)	+= rtc-ds1302.o
 obj-$(CONFIG_RTC_DRV_DS1302)	+= rtc-ds1302.o

+ 582 - 0
drivers/rtc/rtc-mrst.c

@@ -0,0 +1,582 @@
+/*
+ * rtc-mrst.c: Driver for Moorestown virtual RTC
+ *
+ * (C) Copyright 2009 Intel Corporation
+ * Author: Jacob Pan (jacob.jun.pan@intel.com)
+ *	   Feng Tang (feng.tang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Note:
+ * VRTC is emulated by system controller firmware, the real HW
+ * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
+ * in a memory mapped IO space that is visible to the host IA
+ * processor.
+ *
+ * This driver is based upon drivers/rtc/rtc-cmos.c
+ */
+
+/*
+ * Note:
+ *  * vRTC only supports binary mode and 24H mode
+ *  * vRTC only support PIE and AIE, no UIE, and its PIE only happens
+ *    at 23:59:59pm everyday, no support for adjustable frequency
+ *  * Alarm function is also limited to hr/min/sec.
+ */
+
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+
+#include <asm-generic/rtc.h>
+#include <asm/intel_scu_ipc.h>
+#include <asm/mrst.h>
+#include <asm/mrst-vrtc.h>
+
+struct mrst_rtc {
+	struct rtc_device	*rtc;
+	struct device		*dev;
+	int			irq;
+	struct resource		*iomem;
+
+	u8			enabled_wake;
+	u8			suspend_ctrl;
+};
+
+static const char driver_name[] = "rtc_mrst";
+
+#define	RTC_IRQMASK	(RTC_PF | RTC_AF)
+
+static inline int is_intr(u8 rtc_intr)
+{
+	if (!(rtc_intr & RTC_IRQF))
+		return 0;
+	return rtc_intr & RTC_IRQMASK;
+}
+
+/*
+ * rtc_time's year contains the increment over 1900, but vRTC's YEAR
+ * register can't be programmed to value larger than 0x64, so vRTC
+ * driver chose to use 1960 (1970 is UNIX time start point) as the base,
+ * and does the translation at read/write time.
+ *
+ * Why not just use 1970 as the offset? it's because using 1960 will
+ * make it consistent in leap year setting for both vrtc and low-level
+ * physical rtc devices.
+ */
+static int mrst_read_time(struct device *dev, struct rtc_time *time)
+{
+	unsigned long flags;
+
+	if (rtc_is_updating())
+		mdelay(20);
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	time->tm_sec = vrtc_cmos_read(RTC_SECONDS);
+	time->tm_min = vrtc_cmos_read(RTC_MINUTES);
+	time->tm_hour = vrtc_cmos_read(RTC_HOURS);
+	time->tm_mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
+	time->tm_mon = vrtc_cmos_read(RTC_MONTH);
+	time->tm_year = vrtc_cmos_read(RTC_YEAR);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
+	/* Adjust for the 1960/1900 */
+	time->tm_year += 60;
+	time->tm_mon--;
+	return RTC_24H;
+}
+
+static int mrst_set_time(struct device *dev, struct rtc_time *time)
+{
+	int ret;
+	unsigned long flags;
+	unsigned char mon, day, hrs, min, sec;
+	unsigned int yrs;
+
+	yrs = time->tm_year;
+	mon = time->tm_mon + 1;   /* tm_mon starts at zero */
+	day = time->tm_mday;
+	hrs = time->tm_hour;
+	min = time->tm_min;
+	sec = time->tm_sec;
+
+	if (yrs < 70 || yrs > 138)
+		return -EINVAL;
+	yrs -= 60;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+
+	vrtc_cmos_write(yrs, RTC_YEAR);
+	vrtc_cmos_write(mon, RTC_MONTH);
+	vrtc_cmos_write(day, RTC_DAY_OF_MONTH);
+	vrtc_cmos_write(hrs, RTC_HOURS);
+	vrtc_cmos_write(min, RTC_MINUTES);
+	vrtc_cmos_write(sec, RTC_SECONDS);
+
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
+	ret = intel_scu_ipc_simple_command(IPCMSG_VRTC, IPC_CMD_VRTC_SETTIME);
+	return ret;
+}
+
+static int mrst_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+	struct mrst_rtc	*mrst = dev_get_drvdata(dev);
+	unsigned char rtc_control;
+
+	if (mrst->irq <= 0)
+		return -EIO;
+
+	/* Basic alarms only support hour, minute, and seconds fields.
+	 * Some also support day and month, for alarms up to a year in
+	 * the future.
+	 */
+	t->time.tm_mday = -1;
+	t->time.tm_mon = -1;
+	t->time.tm_year = -1;
+
+	/* vRTC only supports binary mode */
+	spin_lock_irq(&rtc_lock);
+	t->time.tm_sec = vrtc_cmos_read(RTC_SECONDS_ALARM);
+	t->time.tm_min = vrtc_cmos_read(RTC_MINUTES_ALARM);
+	t->time.tm_hour = vrtc_cmos_read(RTC_HOURS_ALARM);
+
+	rtc_control = vrtc_cmos_read(RTC_CONTROL);
+	spin_unlock_irq(&rtc_lock);
+
+	t->enabled = !!(rtc_control & RTC_AIE);
+	t->pending = 0;
+
+	return 0;
+}
+
+static void mrst_checkintr(struct mrst_rtc *mrst, unsigned char rtc_control)
+{
+	unsigned char	rtc_intr;
+
+	/*
+	 * NOTE after changing RTC_xIE bits we always read INTR_FLAGS;
+	 * allegedly some older rtcs need that to handle irqs properly
+	 */
+	rtc_intr = vrtc_cmos_read(RTC_INTR_FLAGS);
+	rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
+	if (is_intr(rtc_intr))
+		rtc_update_irq(mrst->rtc, 1, rtc_intr);
+}
+
+static void mrst_irq_enable(struct mrst_rtc *mrst, unsigned char mask)
+{
+	unsigned char	rtc_control;
+
+	/*
+	 * Flush any pending IRQ status, notably for update irqs,
+	 * before we enable new IRQs
+	 */
+	rtc_control = vrtc_cmos_read(RTC_CONTROL);
+	mrst_checkintr(mrst, rtc_control);
+
+	rtc_control |= mask;
+	vrtc_cmos_write(rtc_control, RTC_CONTROL);
+
+	mrst_checkintr(mrst, rtc_control);
+}
+
+static void mrst_irq_disable(struct mrst_rtc *mrst, unsigned char mask)
+{
+	unsigned char	rtc_control;
+
+	rtc_control = vrtc_cmos_read(RTC_CONTROL);
+	rtc_control &= ~mask;
+	vrtc_cmos_write(rtc_control, RTC_CONTROL);
+	mrst_checkintr(mrst, rtc_control);
+}
+
+static int mrst_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+	struct mrst_rtc	*mrst = dev_get_drvdata(dev);
+	unsigned char hrs, min, sec;
+	int ret = 0;
+
+	if (!mrst->irq)
+		return -EIO;
+
+	hrs = t->time.tm_hour;
+	min = t->time.tm_min;
+	sec = t->time.tm_sec;
+
+	spin_lock_irq(&rtc_lock);
+	/* Next rtc irq must not be from previous alarm setting */
+	mrst_irq_disable(mrst, RTC_AIE);
+
+	/* Update alarm */
+	vrtc_cmos_write(hrs, RTC_HOURS_ALARM);
+	vrtc_cmos_write(min, RTC_MINUTES_ALARM);
+	vrtc_cmos_write(sec, RTC_SECONDS_ALARM);
+
+	spin_unlock_irq(&rtc_lock);
+
+	ret = intel_scu_ipc_simple_command(IPCMSG_VRTC, IPC_CMD_VRTC_SETALARM);
+	if (ret)
+		return ret;
+
+	spin_lock_irq(&rtc_lock);
+	if (t->enabled)
+		mrst_irq_enable(mrst, RTC_AIE);
+
+	spin_unlock_irq(&rtc_lock);
+
+	return 0;
+}
+
+static int mrst_irq_set_state(struct device *dev, int enabled)
+{
+	struct mrst_rtc	*mrst = dev_get_drvdata(dev);
+	unsigned long	flags;
+
+	if (!mrst->irq)
+		return -ENXIO;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+
+	if (enabled)
+		mrst_irq_enable(mrst, RTC_PIE);
+	else
+		mrst_irq_disable(mrst, RTC_PIE);
+
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	return 0;
+}
+
+#if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE)
+
+/* Currently, the vRTC doesn't support UIE ON/OFF */
+static int
+mrst_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+	struct mrst_rtc	*mrst = dev_get_drvdata(dev);
+	unsigned long	flags;
+
+	switch (cmd) {
+	case RTC_AIE_OFF:
+	case RTC_AIE_ON:
+		if (!mrst->irq)
+			return -EINVAL;
+		break;
+	default:
+		/* PIE ON/OFF is handled by mrst_irq_set_state() */
+		return -ENOIOCTLCMD;
+	}
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	switch (cmd) {
+	case RTC_AIE_OFF:	/* alarm off */
+		mrst_irq_disable(mrst, RTC_AIE);
+		break;
+	case RTC_AIE_ON:	/* alarm on */
+		mrst_irq_enable(mrst, RTC_AIE);
+		break;
+	}
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	return 0;
+}
+
+#else
+#define	mrst_rtc_ioctl	NULL
+#endif
+
+#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE)
+
+static int mrst_procfs(struct device *dev, struct seq_file *seq)
+{
+	unsigned char	rtc_control, valid;
+
+	spin_lock_irq(&rtc_lock);
+	rtc_control = vrtc_cmos_read(RTC_CONTROL);
+	valid = vrtc_cmos_read(RTC_VALID);
+	spin_unlock_irq(&rtc_lock);
+
+	return seq_printf(seq,
+			"periodic_IRQ\t: %s\n"
+			"alarm\t\t: %s\n"
+			"BCD\t\t: no\n"
+			"periodic_freq\t: daily (not adjustable)\n",
+			(rtc_control & RTC_PIE) ? "on" : "off",
+			(rtc_control & RTC_AIE) ? "on" : "off");
+}
+
+#else
+#define	mrst_procfs	NULL
+#endif
+
+static const struct rtc_class_ops mrst_rtc_ops = {
+	.ioctl		= mrst_rtc_ioctl,
+	.read_time	= mrst_read_time,
+	.set_time	= mrst_set_time,
+	.read_alarm	= mrst_read_alarm,
+	.set_alarm	= mrst_set_alarm,
+	.proc		= mrst_procfs,
+	.irq_set_state	= mrst_irq_set_state,
+};
+
+static struct mrst_rtc	mrst_rtc;
+
+/*
+ * When vRTC IRQ is captured by SCU FW, FW will clear the AIE bit in
+ * Reg B, so no need for this driver to clear it
+ */
+static irqreturn_t mrst_rtc_irq(int irq, void *p)
+{
+	u8 irqstat;
+
+	spin_lock(&rtc_lock);
+	/* This read will clear all IRQ flags inside Reg C */
+	irqstat = vrtc_cmos_read(RTC_INTR_FLAGS);
+	spin_unlock(&rtc_lock);
+
+	irqstat &= RTC_IRQMASK | RTC_IRQF;
+	if (is_intr(irqstat)) {
+		rtc_update_irq(p, 1, irqstat);
+		return IRQ_HANDLED;
+	}
+	return IRQ_NONE;
+}
+
+static int __init
+vrtc_mrst_do_probe(struct device *dev, struct resource *iomem, int rtc_irq)
+{
+	int retval = 0;
+	unsigned char rtc_control;
+
+	/* There can be only one ... */
+	if (mrst_rtc.dev)
+		return -EBUSY;
+
+	if (!iomem)
+		return -ENODEV;
+
+	iomem = request_mem_region(iomem->start,
+			iomem->end + 1 - iomem->start,
+			driver_name);
+	if (!iomem) {
+		dev_dbg(dev, "i/o mem already in use.\n");
+		return -EBUSY;
+	}
+
+	mrst_rtc.irq = rtc_irq;
+	mrst_rtc.iomem = iomem;
+
+	mrst_rtc.rtc = rtc_device_register(driver_name, dev,
+				&mrst_rtc_ops, THIS_MODULE);
+	if (IS_ERR(mrst_rtc.rtc)) {
+		retval = PTR_ERR(mrst_rtc.rtc);
+		goto cleanup0;
+	}
+
+	mrst_rtc.dev = dev;
+	dev_set_drvdata(dev, &mrst_rtc);
+	rename_region(iomem, dev_name(&mrst_rtc.rtc->dev));
+
+	spin_lock_irq(&rtc_lock);
+	mrst_irq_disable(&mrst_rtc, RTC_PIE | RTC_AIE);
+	rtc_control = vrtc_cmos_read(RTC_CONTROL);
+	spin_unlock_irq(&rtc_lock);
+
+	if (!(rtc_control & RTC_24H) || (rtc_control & (RTC_DM_BINARY)))
+		dev_dbg(dev, "TODO: support more than 24-hr BCD mode\n");
+
+	if (rtc_irq) {
+		retval = request_irq(rtc_irq, mrst_rtc_irq,
+				IRQF_DISABLED, dev_name(&mrst_rtc.rtc->dev),
+				mrst_rtc.rtc);
+		if (retval < 0) {
+			dev_dbg(dev, "IRQ %d is already in use, err %d\n",
+				rtc_irq, retval);
+			goto cleanup1;
+		}
+	}
+	dev_dbg(dev, "initialised\n");
+	return 0;
+
+cleanup1:
+	mrst_rtc.dev = NULL;
+	rtc_device_unregister(mrst_rtc.rtc);
+cleanup0:
+	release_region(iomem->start, iomem->end + 1 - iomem->start);
+	dev_err(dev, "rtc-mrst: unable to initialise\n");
+	return retval;
+}
+
+static void rtc_mrst_do_shutdown(void)
+{
+	spin_lock_irq(&rtc_lock);
+	mrst_irq_disable(&mrst_rtc, RTC_IRQMASK);
+	spin_unlock_irq(&rtc_lock);
+}
+
+static void __exit rtc_mrst_do_remove(struct device *dev)
+{
+	struct mrst_rtc	*mrst = dev_get_drvdata(dev);
+	struct resource *iomem;
+
+	rtc_mrst_do_shutdown();
+
+	if (mrst->irq)
+		free_irq(mrst->irq, mrst->rtc);
+
+	rtc_device_unregister(mrst->rtc);
+	mrst->rtc = NULL;
+
+	iomem = mrst->iomem;
+	release_region(iomem->start, iomem->end + 1 - iomem->start);
+	mrst->iomem = NULL;
+
+	mrst->dev = NULL;
+	dev_set_drvdata(dev, NULL);
+}
+
+#ifdef	CONFIG_PM
+static int mrst_suspend(struct device *dev, pm_message_t mesg)
+{
+	struct mrst_rtc	*mrst = dev_get_drvdata(dev);
+	unsigned char	tmp;
+
+	/* Only the alarm might be a wakeup event source */
+	spin_lock_irq(&rtc_lock);
+	mrst->suspend_ctrl = tmp = vrtc_cmos_read(RTC_CONTROL);
+	if (tmp & (RTC_PIE | RTC_AIE)) {
+		unsigned char	mask;
+
+		if (device_may_wakeup(dev))
+			mask = RTC_IRQMASK & ~RTC_AIE;
+		else
+			mask = RTC_IRQMASK;
+		tmp &= ~mask;
+		vrtc_cmos_write(tmp, RTC_CONTROL);
+
+		mrst_checkintr(mrst, tmp);
+	}
+	spin_unlock_irq(&rtc_lock);
+
+	if (tmp & RTC_AIE) {
+		mrst->enabled_wake = 1;
+		enable_irq_wake(mrst->irq);
+	}
+
+	dev_dbg(&mrst_rtc.rtc->dev, "suspend%s, ctrl %02x\n",
+			(tmp & RTC_AIE) ? ", alarm may wake" : "",
+			tmp);
+
+	return 0;
+}
+
+/*
+ * We want RTC alarms to wake us from the deep power saving state
+ */
+static inline int mrst_poweroff(struct device *dev)
+{
+	return mrst_suspend(dev, PMSG_HIBERNATE);
+}
+
+static int mrst_resume(struct device *dev)
+{
+	struct mrst_rtc	*mrst = dev_get_drvdata(dev);
+	unsigned char tmp = mrst->suspend_ctrl;
+
+	/* Re-enable any irqs previously active */
+	if (tmp & RTC_IRQMASK) {
+		unsigned char	mask;
+
+		if (mrst->enabled_wake) {
+			disable_irq_wake(mrst->irq);
+			mrst->enabled_wake = 0;
+		}
+
+		spin_lock_irq(&rtc_lock);
+		do {
+			vrtc_cmos_write(tmp, RTC_CONTROL);
+
+			mask = vrtc_cmos_read(RTC_INTR_FLAGS);
+			mask &= (tmp & RTC_IRQMASK) | RTC_IRQF;
+			if (!is_intr(mask))
+				break;
+
+			rtc_update_irq(mrst->rtc, 1, mask);
+			tmp &= ~RTC_AIE;
+		} while (mask & RTC_AIE);
+		spin_unlock_irq(&rtc_lock);
+	}
+
+	dev_dbg(&mrst_rtc.rtc->dev, "resume, ctrl %02x\n", tmp);
+
+	return 0;
+}
+
+#else
+#define	mrst_suspend	NULL
+#define	mrst_resume	NULL
+
+static inline int mrst_poweroff(struct device *dev)
+{
+	return -ENOSYS;
+}
+
+#endif
+
+static int __init vrtc_mrst_platform_probe(struct platform_device *pdev)
+{
+	return vrtc_mrst_do_probe(&pdev->dev,
+			platform_get_resource(pdev, IORESOURCE_MEM, 0),
+			platform_get_irq(pdev, 0));
+}
+
+static int __exit vrtc_mrst_platform_remove(struct platform_device *pdev)
+{
+	rtc_mrst_do_remove(&pdev->dev);
+	return 0;
+}
+
+static void vrtc_mrst_platform_shutdown(struct platform_device *pdev)
+{
+	if (system_state == SYSTEM_POWER_OFF && !mrst_poweroff(&pdev->dev))
+		return;
+
+	rtc_mrst_do_shutdown();
+}
+
+MODULE_ALIAS("platform:vrtc_mrst");
+
+static struct platform_driver vrtc_mrst_platform_driver = {
+	.probe		= vrtc_mrst_platform_probe,
+	.remove		= __exit_p(vrtc_mrst_platform_remove),
+	.shutdown	= vrtc_mrst_platform_shutdown,
+	.driver = {
+		.name		= (char *) driver_name,
+		.suspend	= mrst_suspend,
+		.resume		= mrst_resume,
+	}
+};
+
+static int __init vrtc_mrst_init(void)
+{
+	return platform_driver_register(&vrtc_mrst_platform_driver);
+}
+
+static void __exit vrtc_mrst_exit(void)
+{
+	platform_driver_unregister(&vrtc_mrst_platform_driver);
+}
+
+module_init(vrtc_mrst_init);
+module_exit(vrtc_mrst_exit);
+
+MODULE_AUTHOR("Jacob Pan; Feng Tang");
+MODULE_DESCRIPTION("Driver for Moorestown virtual RTC");
+MODULE_LICENSE("GPL");

+ 8 - 3
fs/gfs2/bmap.c

@@ -763,7 +763,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 	int metadata;
 	int metadata;
 	unsigned int revokes = 0;
 	unsigned int revokes = 0;
 	int x;
 	int x;
-	int error;
+	int error = 0;
 
 
 	if (!*top)
 	if (!*top)
 		sm->sm_first = 0;
 		sm->sm_first = 0;
@@ -780,7 +780,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 	if (metadata)
 	if (metadata)
 		revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
 		revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
 
 
-	error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
+	if (ip != GFS2_I(sdp->sd_rindex))
+		error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
+	else if (!sdp->sd_rgrps)
+		error = gfs2_ri_update(ip);
+
 	if (error)
 	if (error)
 		return error;
 		return error;
 
 
@@ -879,7 +883,8 @@ out_rg_gunlock:
 out_rlist:
 out_rlist:
 	gfs2_rlist_free(&rlist);
 	gfs2_rlist_free(&rlist);
 out:
 out:
-	gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
+	if (ip != GFS2_I(sdp->sd_rindex))
+		gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
 	return error;
 	return error;
 }
 }
 
 

+ 34 - 37
fs/gfs2/glock.c

@@ -541,21 +541,6 @@ out_locked:
 	spin_unlock(&gl->gl_spin);
 	spin_unlock(&gl->gl_spin);
 }
 }
 
 
-static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
-				 unsigned int req_state,
-				 unsigned int flags)
-{
-	int ret = LM_OUT_ERROR;
-
-	if (!sdp->sd_lockstruct.ls_ops->lm_lock)
-		return req_state == LM_ST_UNLOCKED ? 0 : req_state;
-
-	if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
-		ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
-							 req_state, flags);
-	return ret;
-}
-
 /**
 /**
  * do_xmote - Calls the DLM to change the state of a lock
  * do_xmote - Calls the DLM to change the state of a lock
  * @gl: The lock state
  * @gl: The lock state
@@ -575,13 +560,14 @@ __acquires(&gl->gl_spin)
 
 
 	lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
 	lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
 		      LM_FLAG_PRIORITY);
 		      LM_FLAG_PRIORITY);
-	BUG_ON(gl->gl_state == target);
-	BUG_ON(gl->gl_state == gl->gl_target);
+	GLOCK_BUG_ON(gl, gl->gl_state == target);
+	GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
 	if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
 	if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
 	    glops->go_inval) {
 	    glops->go_inval) {
 		set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
 		set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
 		do_error(gl, 0); /* Fail queued try locks */
 		do_error(gl, 0); /* Fail queued try locks */
 	}
 	}
+	gl->gl_req = target;
 	spin_unlock(&gl->gl_spin);
 	spin_unlock(&gl->gl_spin);
 	if (glops->go_xmote_th)
 	if (glops->go_xmote_th)
 		glops->go_xmote_th(gl);
 		glops->go_xmote_th(gl);
@@ -594,15 +580,17 @@ __acquires(&gl->gl_spin)
 	    gl->gl_state == LM_ST_DEFERRED) &&
 	    gl->gl_state == LM_ST_DEFERRED) &&
 	    !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
 	    !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
 		lck_flags |= LM_FLAG_TRY_1CB;
 		lck_flags |= LM_FLAG_TRY_1CB;
-	ret = gfs2_lm_lock(sdp, gl, target, lck_flags);
 
 
-	if (!(ret & LM_OUT_ASYNC)) {
-		finish_xmote(gl, ret);
+	if (sdp->sd_lockstruct.ls_ops->lm_lock)	{
+		/* lock_dlm */
+		ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
+		GLOCK_BUG_ON(gl, ret);
+	} else { /* lock_nolock */
+		finish_xmote(gl, target);
 		if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
 		if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
 			gfs2_glock_put(gl);
 			gfs2_glock_put(gl);
-	} else {
-		GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC);
 	}
 	}
+
 	spin_lock(&gl->gl_spin);
 	spin_lock(&gl->gl_spin);
 }
 }
 
 
@@ -951,17 +939,22 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
 
 
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
 {
 {
+	struct va_format vaf;
 	va_list args;
 	va_list args;
 
 
 	va_start(args, fmt);
 	va_start(args, fmt);
+
 	if (seq) {
 	if (seq) {
 		struct gfs2_glock_iter *gi = seq->private;
 		struct gfs2_glock_iter *gi = seq->private;
 		vsprintf(gi->string, fmt, args);
 		vsprintf(gi->string, fmt, args);
 		seq_printf(seq, gi->string);
 		seq_printf(seq, gi->string);
 	} else {
 	} else {
-		printk(KERN_ERR " ");
-		vprintk(fmt, args);
+		vaf.fmt = fmt;
+		vaf.va = &args;
+
+		printk(KERN_ERR " %pV", &vaf);
 	}
 	}
+
 	va_end(args);
 	va_end(args);
 }
 }
 
 
@@ -1361,24 +1354,28 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl)
  * @gl: Pointer to the glock
  * @gl: Pointer to the glock
  * @ret: The return value from the dlm
  * @ret: The return value from the dlm
  *
  *
+ * The gl_reply field is under the gl_spin lock so that it is ok
+ * to use a bitfield shared with other glock state fields.
  */
  */
 
 
 void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 {
 {
 	struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
 	struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
 
 
+	spin_lock(&gl->gl_spin);
 	gl->gl_reply = ret;
 	gl->gl_reply = ret;
 
 
 	if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
 	if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
-		spin_lock(&gl->gl_spin);
 		if (gfs2_should_freeze(gl)) {
 		if (gfs2_should_freeze(gl)) {
 			set_bit(GLF_FROZEN, &gl->gl_flags);
 			set_bit(GLF_FROZEN, &gl->gl_flags);
 			spin_unlock(&gl->gl_spin);
 			spin_unlock(&gl->gl_spin);
 			return;
 			return;
 		}
 		}
-		spin_unlock(&gl->gl_spin);
 	}
 	}
+
+	spin_unlock(&gl->gl_spin);
 	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
 	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+	smp_wmb();
 	gfs2_glock_hold(gl);
 	gfs2_glock_hold(gl);
 	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
 	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
 		gfs2_glock_put(gl);
 		gfs2_glock_put(gl);
@@ -1626,18 +1623,17 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
 static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
 static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
 {
 {
 	struct task_struct *gh_owner = NULL;
 	struct task_struct *gh_owner = NULL;
-	char buffer[KSYM_SYMBOL_LEN];
 	char flags_buf[32];
 	char flags_buf[32];
 
 
-	sprint_symbol(buffer, gh->gh_ip);
 	if (gh->gh_owner_pid)
 	if (gh->gh_owner_pid)
 		gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
 		gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
-	gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n",
-		  state2str(gh->gh_state),
-		  hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
-		  gh->gh_error, 
-		  gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
-		  gh_owner ? gh_owner->comm : "(ended)", buffer);
+	gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
+		       state2str(gh->gh_state),
+		       hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
+		       gh->gh_error,
+		       gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
+		       gh_owner ? gh_owner->comm : "(ended)",
+		       (void *)gh->gh_ip);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1782,12 +1778,13 @@ int __init gfs2_glock_init(void)
 	}
 	}
 #endif
 #endif
 
 
-	glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER |
+	glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
 					  WQ_HIGHPRI | WQ_FREEZEABLE, 0);
 					  WQ_HIGHPRI | WQ_FREEZEABLE, 0);
 	if (IS_ERR(glock_workqueue))
 	if (IS_ERR(glock_workqueue))
 		return PTR_ERR(glock_workqueue);
 		return PTR_ERR(glock_workqueue);
-	gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER |
-						WQ_FREEZEABLE, 0);
+	gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
+						WQ_MEM_RECLAIM | WQ_FREEZEABLE,
+						0);
 	if (IS_ERR(gfs2_delete_workqueue)) {
 	if (IS_ERR(gfs2_delete_workqueue)) {
 		destroy_workqueue(glock_workqueue);
 		destroy_workqueue(glock_workqueue);
 		return PTR_ERR(gfs2_delete_workqueue);
 		return PTR_ERR(gfs2_delete_workqueue);

+ 6 - 22
fs/gfs2/glock.h

@@ -87,11 +87,10 @@ enum {
 #define GL_ASYNC		0x00000040
 #define GL_ASYNC		0x00000040
 #define GL_EXACT		0x00000080
 #define GL_EXACT		0x00000080
 #define GL_SKIP			0x00000100
 #define GL_SKIP			0x00000100
-#define GL_ATIME		0x00000200
 #define GL_NOCACHE		0x00000400
 #define GL_NOCACHE		0x00000400
   
   
 /*
 /*
- * lm_lock() and lm_async_cb return flags
+ * lm_async_cb return flags
  *
  *
  * LM_OUT_ST_MASK
  * LM_OUT_ST_MASK
  * Masks the lower two bits of lock state in the returned value.
  * Masks the lower two bits of lock state in the returned value.
@@ -99,15 +98,11 @@ enum {
  * LM_OUT_CANCELED
  * LM_OUT_CANCELED
  * The lock request was canceled.
  * The lock request was canceled.
  *
  *
- * LM_OUT_ASYNC
- * The result of the request will be returned in an LM_CB_ASYNC callback.
- *
  */
  */
 
 
 #define LM_OUT_ST_MASK		0x00000003
 #define LM_OUT_ST_MASK		0x00000003
 #define LM_OUT_CANCELED		0x00000008
 #define LM_OUT_CANCELED		0x00000008
-#define LM_OUT_ASYNC		0x00000080
-#define LM_OUT_ERROR		0x00000100
+#define LM_OUT_ERROR		0x00000004
 
 
 /*
 /*
  * lm_recovery_done() messages
  * lm_recovery_done() messages
@@ -124,25 +119,12 @@ struct lm_lockops {
  	void (*lm_unmount) (struct gfs2_sbd *sdp);
  	void (*lm_unmount) (struct gfs2_sbd *sdp);
 	void (*lm_withdraw) (struct gfs2_sbd *sdp);
 	void (*lm_withdraw) (struct gfs2_sbd *sdp);
 	void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
 	void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
-	unsigned int (*lm_lock) (struct gfs2_glock *gl,
-				 unsigned int req_state, unsigned int flags);
+	int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
+			unsigned int flags);
 	void (*lm_cancel) (struct gfs2_glock *gl);
 	void (*lm_cancel) (struct gfs2_glock *gl);
 	const match_table_t *lm_tokens;
 	const match_table_t *lm_tokens;
 };
 };
 
 
-#define LM_FLAG_TRY		0x00000001
-#define LM_FLAG_TRY_1CB		0x00000002
-#define LM_FLAG_NOEXP		0x00000004
-#define LM_FLAG_ANY		0x00000008
-#define LM_FLAG_PRIORITY	0x00000010
-
-#define GL_ASYNC		0x00000040
-#define GL_EXACT		0x00000080
-#define GL_SKIP			0x00000100
-#define GL_NOCACHE		0x00000400
-
-#define GLR_TRYFAILED		13
-
 extern struct workqueue_struct *gfs2_delete_workqueue;
 extern struct workqueue_struct *gfs2_delete_workqueue;
 static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
 static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
 {
 {
@@ -212,6 +194,8 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
+
+__attribute__ ((format(printf, 2, 3)))
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
 
 
 /**
 /**

+ 0 - 1
fs/gfs2/glops.c

@@ -325,7 +325,6 @@ static void trans_go_sync(struct gfs2_glock *gl)
 
 
 	if (gl->gl_state != LM_ST_UNLOCKED &&
 	if (gl->gl_state != LM_ST_UNLOCKED &&
 	    test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
 	    test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
-		flush_workqueue(gfs2_delete_workqueue);
 		gfs2_meta_syncfs(sdp);
 		gfs2_meta_syncfs(sdp);
 		gfs2_log_shutdown(sdp);
 		gfs2_log_shutdown(sdp);
 	}
 	}

+ 7 - 5
fs/gfs2/incore.h

@@ -207,12 +207,14 @@ struct gfs2_glock {
 
 
 	spinlock_t gl_spin;
 	spinlock_t gl_spin;
 
 
-	unsigned int gl_state;
-	unsigned int gl_target;
-	unsigned int gl_reply;
+	/* State fields protected by gl_spin */
+	unsigned int gl_state:2,	/* Current state */
+		     gl_target:2,	/* Target state */
+		     gl_demote_state:2,	/* State requested by remote node */
+		     gl_req:2,		/* State in last dlm request */
+		     gl_reply:8;	/* Last reply from the dlm */
+
 	unsigned int gl_hash;
 	unsigned int gl_hash;
-	unsigned int gl_req;
-	unsigned int gl_demote_state; /* state requested by remote node */
 	unsigned long gl_demote_time; /* time of first demote request */
 	unsigned long gl_demote_time; /* time of first demote request */
 	struct list_head gl_holders;
 	struct list_head gl_holders;
 
 

+ 0 - 9
fs/gfs2/inode.c

@@ -916,17 +916,8 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 	if (error)
 	if (error)
 		return error;
 		return error;
 
 
-	if ((attr->ia_valid & ATTR_SIZE) &&
-	    attr->ia_size != i_size_read(inode)) {
-		error = vmtruncate(inode, attr->ia_size);
-		if (error)
-			return error;
-	}
-
 	setattr_copy(inode, attr);
 	setattr_copy(inode, attr);
 	mark_inode_dirty(inode);
 	mark_inode_dirty(inode);
-
-	gfs2_assert_warn(GFS2_SB(inode), !error);
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 	brelse(dibh);

+ 4 - 11
fs/gfs2/lock_dlm.c

@@ -146,15 +146,13 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
 	return lkf;
 	return lkf;
 }
 }
 
 
-static unsigned int gdlm_lock(struct gfs2_glock *gl,
-			      unsigned int req_state, unsigned int flags)
+static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
+		     unsigned int flags)
 {
 {
 	struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
 	struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
-	int error;
 	int req;
 	int req;
 	u32 lkf;
 	u32 lkf;
 
 
-	gl->gl_req = req_state;
 	req = make_mode(req_state);
 	req = make_mode(req_state);
 	lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
 	lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
 
 
@@ -162,13 +160,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl,
 	 * Submit the actual lock request.
 	 * Submit the actual lock request.
 	 */
 	 */
 
 
-	error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
-			 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
-	if (error == -EAGAIN)
-		return 0;
-	if (error)
-		return LM_OUT_ERROR;
-	return LM_OUT_ASYNC;
+	return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
+			GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
 }
 }
 
 
 static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
 static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)

+ 1 - 17
fs/gfs2/ops_inode.c

@@ -1069,7 +1069,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 {
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct buffer_head *dibh;
 	u32 ouid, ogid, nuid, ngid;
 	u32 ouid, ogid, nuid, ngid;
 	int error;
 	int error;
 
 
@@ -1100,25 +1099,10 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	if (error)
 	if (error)
 		goto out_gunlock_q;
 		goto out_gunlock_q;
 
 
-	error = gfs2_meta_inode_buffer(ip, &dibh);
+	error = gfs2_setattr_simple(ip, attr);
 	if (error)
 	if (error)
 		goto out_end_trans;
 		goto out_end_trans;
 
 
-	if ((attr->ia_valid & ATTR_SIZE) &&
-	    attr->ia_size != i_size_read(inode)) {
-		int error;
-
-		error = vmtruncate(inode, attr->ia_size);
-		gfs2_assert_warn(sdp, !error);
-	}
-
-	setattr_copy(inode, attr);
-	mark_inode_dirty(inode);
-
-	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(ip, dibh->b_data);
-	brelse(dibh);
-
 	if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
 	if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
 		u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
 		u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
 		gfs2_quota_change(ip, -blocks, ouid, ogid);
 		gfs2_quota_change(ip, -blocks, ouid, ogid);

+ 11 - 2
fs/gfs2/quota.c

@@ -666,6 +666,10 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 			qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
 			qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
 			qd->qd_qb.qb_limit = qp->qu_limit;
 			qd->qd_qb.qb_limit = qp->qu_limit;
 		}
 		}
+		if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
+			qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
+			qd->qd_qb.qb_value = qp->qu_value;
+		}
 	}
 	}
 
 
 	/* Write the quota into the quota file on disk */
 	/* Write the quota into the quota file on disk */
@@ -1509,7 +1513,7 @@ out:
 }
 }
 
 
 /* GFS2 only supports a subset of the XFS fields */
 /* GFS2 only supports a subset of the XFS fields */
-#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD)
+#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT)
 
 
 static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 			  struct fs_disk_quota *fdq)
 			  struct fs_disk_quota *fdq)
@@ -1569,9 +1573,15 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 	if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
 	if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
 	    ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
 	    ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
 		fdq->d_fieldmask ^= FS_DQ_BSOFT;
 		fdq->d_fieldmask ^= FS_DQ_BSOFT;
+
 	if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
 	if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
 	    ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
 	    ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
 		fdq->d_fieldmask ^= FS_DQ_BHARD;
 		fdq->d_fieldmask ^= FS_DQ_BHARD;
+
+	if ((fdq->d_fieldmask & FS_DQ_BCOUNT) &&
+	    ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value)))
+		fdq->d_fieldmask ^= FS_DQ_BCOUNT;
+
 	if (fdq->d_fieldmask == 0)
 	if (fdq->d_fieldmask == 0)
 		goto out_i;
 		goto out_i;
 
 
@@ -1620,4 +1630,3 @@ const struct quotactl_ops gfs2_quotactl_ops = {
 	.get_dqblk	= gfs2_get_dqblk,
 	.get_dqblk	= gfs2_get_dqblk,
 	.set_dqblk	= gfs2_set_dqblk,
 	.set_dqblk	= gfs2_set_dqblk,
 };
 };
-

+ 13 - 44
fs/gfs2/rgrp.c

@@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 	for (rgrps = 0;; rgrps++) {
 	for (rgrps = 0;; rgrps++) {
 		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 
 
-		if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode))
+		if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
 			break;
 			break;
 		error = gfs2_internal_read(ip, &ra_state, buf, &pos,
 		error = gfs2_internal_read(ip, &ra_state, buf, &pos,
 					   sizeof(struct gfs2_rindex));
 					   sizeof(struct gfs2_rindex));
@@ -583,7 +583,7 @@ static int read_rindex_entry(struct gfs2_inode *ip,
  * Returns: 0 on successful update, error code otherwise
  * Returns: 0 on successful update, error code otherwise
  */
  */
 
 
-static int gfs2_ri_update(struct gfs2_inode *ip)
+int gfs2_ri_update(struct gfs2_inode *ip)
 {
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct inode *inode = &ip->i_inode;
 	struct inode *inode = &ip->i_inode;
@@ -613,46 +613,6 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 	return 0;
 	return 0;
 }
 }
 
 
-/**
- * gfs2_ri_update_special - Pull in a new resource index from the disk
- *
- * This is a special version that's safe to call from gfs2_inplace_reserve_i.
- * In this case we know that we don't have any resource groups in memory yet.
- *
- * @ip: pointer to the rindex inode
- *
- * Returns: 0 on successful update, error code otherwise
- */
-static int gfs2_ri_update_special(struct gfs2_inode *ip)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct inode *inode = &ip->i_inode;
-	struct file_ra_state ra_state;
-	struct gfs2_rgrpd *rgd;
-	unsigned int max_data = 0;
-	int error;
-
-	file_ra_state_init(&ra_state, inode->i_mapping);
-	for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
-		/* Ignore partials */
-		if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
-		    i_size_read(inode))
-			break;
-		error = read_rindex_entry(ip, &ra_state);
-		if (error) {
-			clear_rgrpdi(sdp);
-			return error;
-		}
-	}
-	list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
-		if (rgd->rd_data > max_data)
-			max_data = rgd->rd_data;
-	sdp->sd_max_rg_data = max_data;
-
-	sdp->sd_rindex_uptodate = 1;
-	return 0;
-}
-
 /**
 /**
  * gfs2_rindex_hold - Grab a lock on the rindex
  * gfs2_rindex_hold - Grab a lock on the rindex
  * @sdp: The GFS2 superblock
  * @sdp: The GFS2 superblock
@@ -1226,16 +1186,25 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
 			error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
 			error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
 		else if (!sdp->sd_rgrps) /* We may not have the rindex read
 		else if (!sdp->sd_rgrps) /* We may not have the rindex read
 					    in, so: */
 					    in, so: */
-			error = gfs2_ri_update_special(ip);
+			error = gfs2_ri_update(ip);
 		if (error)
 		if (error)
 			return error;
 			return error;
 	}
 	}
 
 
+try_again:
 	do {
 	do {
 		error = get_local_rgrp(ip, &last_unlinked);
 		error = get_local_rgrp(ip, &last_unlinked);
 		/* If there is no space, flushing the log may release some */
 		/* If there is no space, flushing the log may release some */
-		if (error)
+		if (error) {
+			if (ip == GFS2_I(sdp->sd_rindex) &&
+			    !sdp->sd_rindex_uptodate) {
+				error = gfs2_ri_update(ip);
+				if (error)
+					return error;
+				goto try_again;
+			}
 			gfs2_log_flush(sdp, NULL);
 			gfs2_log_flush(sdp, NULL);
+		}
 	} while (error && tries++ < 3);
 	} while (error && tries++ < 3);
 
 
 	if (error) {
 	if (error) {

+ 1 - 0
fs/gfs2/rgrp.h

@@ -48,6 +48,7 @@ extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
 
 
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
 
 
+extern int gfs2_ri_update(struct gfs2_inode *ip);
 extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
 extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
 extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
 extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
 
 

Daži faili netika attēloti, jo izmaiņu fails ir pārāk liels