浏览代码

Merge branch 'master'

Jeff Garzik 19 年之前
父节点
当前提交
88e3c1da8b
共有 100 个文件被更改,包括 2598 次插入711 次删除
  1. 10 1
      Documentation/kernel-parameters.txt
  2. 44 7
      Documentation/power/swsusp.txt
  3. 149 0
      Documentation/power/userland-swsusp.txt
  4. 33 41
      Documentation/power/video.txt
  5. 72 0
      Documentation/powerpc/booting-without-of.txt
  6. 8 7
      Documentation/powerpc/eeh-pci-error-recovery.txt
  7. 2 2
      Documentation/powerpc/hvcs.txt
  8. 2 2
      MAINTAINERS
  9. 4 6
      arch/cris/kernel/irq.c
  10. 4 6
      arch/frv/kernel/irq.c
  11. 14 10
      arch/i386/Kconfig
  12. 9 0
      arch/i386/Kconfig.debug
  13. 1 1
      arch/i386/kernel/Makefile
  14. 321 0
      arch/i386/kernel/alternative.c
  15. 1 0
      arch/i386/kernel/apic.c
  16. 1 0
      arch/i386/kernel/cpu/centaur.c
  17. 30 17
      arch/i386/kernel/cpu/common.c
  18. 1 3
      arch/i386/kernel/cpu/cpufreq/powernow-k8.c
  19. 6 6
      arch/i386/kernel/cpu/intel.c
  20. 2 2
      arch/i386/kernel/cpu/intel_cacheinfo.c
  21. 1 1
      arch/i386/kernel/cpu/proc.c
  22. 1 1
      arch/i386/kernel/crash.c
  23. 4 0
      arch/i386/kernel/entry.S
  24. 1 4
      arch/i386/kernel/head.S
  25. 11 14
      arch/i386/kernel/io_apic.c
  26. 2 2
      arch/i386/kernel/kprobes.c
  27. 22 10
      arch/i386/kernel/module.c
  28. 4 3
      arch/i386/kernel/mpparse.c
  29. 3 3
      arch/i386/kernel/nmi.c
  30. 1 1
      arch/i386/kernel/process.c
  31. 2 2
      arch/i386/kernel/ptrace.c
  32. 4 4
      arch/i386/kernel/semaphore.c
  33. 10 108
      arch/i386/kernel/setup.c
  34. 2 5
      arch/i386/kernel/signal.c
  35. 3 0
      arch/i386/kernel/smpboot.c
  36. 9 0
      arch/i386/kernel/topology.c
  37. 40 17
      arch/i386/kernel/traps.c
  38. 20 0
      arch/i386/kernel/vmlinux.lds.S
  39. 3 0
      arch/i386/kernel/vsyscall-sysenter.S
  40. 4 1
      arch/i386/mach-es7000/es7000.h
  41. 2 4
      arch/i386/mach-es7000/es7000plat.c
  42. 138 72
      arch/i386/mm/fault.c
  43. 22 23
      arch/i386/mm/init.c
  44. 2 5
      arch/i386/oprofile/nmi_int.c
  45. 1 6
      arch/ia64/hp/sim/simserial.c
  46. 4 6
      arch/m32r/kernel/irq.c
  47. 1 3
      arch/m68k/bvme6000/rtc.c
  48. 4 6
      arch/mips/kernel/irq.c
  49. 2 2
      arch/mips/kernel/smp.c
  50. 1 4
      arch/mips/sgi-ip27/ip27-irq.c
  51. 10 15
      arch/parisc/kernel/smp.c
  52. 32 6
      arch/powerpc/Kconfig
  53. 1 1
      arch/powerpc/Makefile
  54. 0 2
      arch/powerpc/boot/install.sh
  55. 2 2
      arch/powerpc/boot/main.c
  56. 721 0
      arch/powerpc/configs/mpc8540_ads_defconfig
  57. 3 0
      arch/powerpc/kernel/asm-offsets.c
  58. 11 1
      arch/powerpc/kernel/cputable.c
  59. 6 5
      arch/powerpc/kernel/entry_64.S
  60. 0 25
      arch/powerpc/kernel/firmware.c
  61. 0 2
      arch/powerpc/kernel/head_44x.S
  62. 9 2
      arch/powerpc/kernel/head_64.S
  63. 0 2
      arch/powerpc/kernel/head_8xx.S
  64. 363 0
      arch/powerpc/kernel/head_booke.h
  65. 4 2
      arch/powerpc/kernel/head_fsl_booke.S
  66. 0 2
      arch/powerpc/kernel/iomap.c
  67. 0 1
      arch/powerpc/kernel/iommu.c
  68. 24 13
      arch/powerpc/kernel/irq.c
  69. 2 3
      arch/powerpc/kernel/kprobes.c
  70. 1 4
      arch/powerpc/kernel/of_device.c
  71. 0 1
      arch/powerpc/kernel/pci_iommu.c
  72. 0 1
      arch/powerpc/kernel/ppc_ksyms.c
  73. 6 3
      arch/powerpc/kernel/process.c
  74. 0 4
      arch/powerpc/kernel/prom.c
  75. 0 2
      arch/powerpc/kernel/ptrace-common.h
  76. 0 1
      arch/powerpc/kernel/rtas-proc.c
  77. 0 2
      arch/powerpc/kernel/rtas_pci.c
  78. 2 3
      arch/powerpc/kernel/setup-common.c
  79. 2 3
      arch/powerpc/kernel/setup_32.c
  80. 0 2
      arch/powerpc/kernel/setup_64.c
  81. 0 2
      arch/powerpc/kernel/signal_64.c
  82. 3 1
      arch/powerpc/kernel/smp.c
  83. 239 2
      arch/powerpc/kernel/time.c
  84. 0 2
      arch/powerpc/kernel/vdso.c
  85. 0 2
      arch/powerpc/lib/copypage_64.S
  86. 0 2
      arch/powerpc/lib/copyuser_64.S
  87. 11 3
      arch/powerpc/lib/e2a.c
  88. 0 2
      arch/powerpc/lib/memcpy_64.S
  89. 0 2
      arch/powerpc/lib/rheap.c
  90. 0 2
      arch/powerpc/mm/fault.c
  91. 0 2
      arch/powerpc/mm/hash_low_32.S
  92. 16 16
      arch/powerpc/mm/hash_utils_64.c
  93. 0 48
      arch/powerpc/mm/init_64.c
  94. 16 0
      arch/powerpc/mm/lmb.c
  95. 1 2
      arch/powerpc/mm/mem.c
  96. 0 2
      arch/powerpc/mm/mmap.c
  97. 74 86
      arch/powerpc/mm/numa.c
  98. 0 2
      arch/powerpc/mm/slb_low.S
  99. 0 4
      arch/powerpc/mm/stab.c
  100. 1 1
      arch/powerpc/mm/tlb_64.c

+ 10 - 1
Documentation/kernel-parameters.txt

@@ -1008,7 +1008,9 @@ running once the system is up.
 			noexec=on: enable non-executable mappings (default)
 			noexec=on: enable non-executable mappings (default)
 			noexec=off: disable nn-executable mappings
 			noexec=off: disable nn-executable mappings
 
 
-	nofxsr		[BUGS=IA-32]
+	nofxsr		[BUGS=IA-32] Disables x86 floating point extended
+			register save and restore. The kernel will only save
+			legacy floating-point registers on task switch.
 
 
 	nohlt		[BUGS=ARM]
 	nohlt		[BUGS=ARM]
 
 
@@ -1053,6 +1055,8 @@ running once the system is up.
 
 
 	nosbagart	[IA-64]
 	nosbagart	[IA-64]
 
 
+	nosep		[BUGS=IA-32] Disables x86 SYSENTER/SYSEXIT support.
+
 	nosmp		[SMP] Tells an SMP kernel to act as a UP kernel.
 	nosmp		[SMP] Tells an SMP kernel to act as a UP kernel.
 
 
 	nosync		[HW,M68K] Disables sync negotiation for all devices.
 	nosync		[HW,M68K] Disables sync negotiation for all devices.
@@ -1122,6 +1126,11 @@ running once the system is up.
 	pas16=		[HW,SCSI]
 	pas16=		[HW,SCSI]
 			See header of drivers/scsi/pas16.c.
 			See header of drivers/scsi/pas16.c.
 
 
+	pause_on_oops=
+			Halt all CPUs after the first oops has been printed for
+			the specified number of seconds.  This is to be used if
+			your oopses keep scrolling off the screen.
+
 	pcbit=		[HW,ISDN]
 	pcbit=		[HW,ISDN]
 
 
 	pcd.		[PARIDE]
 	pcd.		[PARIDE]

+ 44 - 7
Documentation/power/swsusp.txt

@@ -17,6 +17,11 @@ Some warnings, first.
  * but it will probably only crash.
  * but it will probably only crash.
  *
  *
  * (*) suspend/resume support is needed to make it safe.
  * (*) suspend/resume support is needed to make it safe.
+ *
+ * If you have any filesystems on USB devices mounted before suspend,
+ * they won't be accessible after resume and you may lose data, as though
+ * you have unplugged the USB devices with mounted filesystems on them
+ * (see the FAQ below for details).
 
 
 You need to append resume=/dev/your_swap_partition to kernel command
 You need to append resume=/dev/your_swap_partition to kernel command
 line. Then you suspend by
 line. Then you suspend by
@@ -27,19 +32,18 @@ echo shutdown > /sys/power/disk; echo disk > /sys/power/state
 
 
 echo platform > /sys/power/disk; echo disk > /sys/power/state
 echo platform > /sys/power/disk; echo disk > /sys/power/state
 
 
+. If you have SATA disks, you'll need recent kernels with SATA suspend
+support. For suspend and resume to work, make sure your disk drivers
+are built into kernel -- not modules. [There's way to make
+suspend/resume with modular disk drivers, see FAQ, but you probably
+should not do that.]
+
 If you want to limit the suspend image size to N bytes, do
 If you want to limit the suspend image size to N bytes, do
 
 
 echo N > /sys/power/image_size
 echo N > /sys/power/image_size
 
 
 before suspend (it is limited to 500 MB by default).
 before suspend (it is limited to 500 MB by default).
 
 
-Encrypted suspend image:
-------------------------
-If you want to store your suspend image encrypted with a temporary
-key to prevent data gathering after resume you must compile
-crypto and the aes algorithm into the kernel - modules won't work
-as they cannot be loaded at resume time.
-
 
 
 Article about goals and implementation of Software Suspend for Linux
 Article about goals and implementation of Software Suspend for Linux
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -333,4 +337,37 @@ init=/bin/bash, then swapon and starting suspend sequence manually
 usually does the trick. Then it is good idea to try with latest
 usually does the trick. Then it is good idea to try with latest
 vanilla kernel.
 vanilla kernel.
 
 
+Q: How can distributions ship a swsusp-supporting kernel with modular
+disk drivers (especially SATA)?
+
+A: Well, it can be done, load the drivers, then do echo into
+/sys/power/disk/resume file from initrd. Be sure not to mount
+anything, not even read-only mount, or you are going to lose your
+data.
+
+Q: How do I make suspend more verbose?
+
+A: If you want to see any non-error kernel messages on the virtual
+terminal the kernel switches to during suspend, you have to set the
+kernel console loglevel to at least 5, for example by doing
+
+	echo 5 > /proc/sys/kernel/printk
+
+Q: Is this true that if I have a mounted filesystem on a USB device and
+I suspend to disk, I can lose data unless the filesystem has been mounted
+with "sync"?
+
+A: That's right.  It depends on your hardware, and it could be true even for
+suspend-to-RAM.  In fact, even with "-o sync" you can lose data if your
+programs have information in buffers they haven't written out to disk.
+
+If you're lucky, your hardware will support low-power modes for USB
+controllers while the system is asleep.  Lots of hardware doesn't,
+however.  Shutting off the power to a USB controller is equivalent to
+unplugging all the attached devices.
+
+Remember that it's always a bad idea to unplug a disk drive containing a
+mounted filesystem.  With USB that's true even when your system is asleep!
+The safest thing is to unmount all USB-based filesystems before suspending
+and remount them after resuming.
 
 

+ 149 - 0
Documentation/power/userland-swsusp.txt

@@ -0,0 +1,149 @@
+Documentation for userland software suspend interface
+	(C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+
+First, the warnings at the beginning of swsusp.txt still apply.
+
+Second, you should read the FAQ in swsusp.txt _now_ if you have not
+done it already.
+
+Now, to use the userland interface for software suspend you need special
+utilities that will read/write the system memory snapshot from/to the
+kernel.  Such utilities are available, for example, from
+<http://www.sisk.pl/kernel/utilities/suspend>.  You may want to have
+a look at them if you are going to develop your own suspend/resume
+utilities.
+
+The interface consists of a character device providing the open(),
+release(), read(), and write() operations as well as several ioctl()
+commands defined in kernel/power/power.h.  The major and minor
+numbers of the device are, respectively, 10 and 231, and they can
+be read from /sys/class/misc/snapshot/dev.
+
+The device can be open either for reading or for writing.  If open for
+reading, it is considered to be in the suspend mode.  Otherwise it is
+assumed to be in the resume mode.  The device cannot be open for reading
+and writing.  It is also impossible to have the device open more than once
+at a time.
+
+The ioctl() commands recognized by the device are:
+
+SNAPSHOT_FREEZE - freeze user space processes (the current process is
+	not frozen); this is required for SNAPSHOT_ATOMIC_SNAPSHOT
+	and SNAPSHOT_ATOMIC_RESTORE to succeed
+
+SNAPSHOT_UNFREEZE - thaw user space processes frozen by SNAPSHOT_FREEZE
+
+SNAPSHOT_ATOMIC_SNAPSHOT - create a snapshot of the system memory; the
+	last argument of ioctl() should be a pointer to an int variable,
+	the value of which will indicate whether the call returned after
+	creating the snapshot (1) or after restoring the system memory state
+	from it (0) (after resume the system finds itself finishing the
+	SNAPSHOT_ATOMIC_SNAPSHOT ioctl() again); after the snapshot
+	has been created the read() operation can be used to transfer
+	it out of the kernel
+
+SNAPSHOT_ATOMIC_RESTORE - restore the system memory state from the
+	uploaded snapshot image; before calling it you should transfer
+	the system memory snapshot back to the kernel using the write()
+	operation; this call will not succeed if the snapshot
+	image is not available to the kernel
+
+SNAPSHOT_FREE - free memory allocated for the snapshot image
+
+SNAPSHOT_SET_IMAGE_SIZE - set the preferred maximum size of the image
+	(the kernel will do its best to ensure the image size will not exceed
+	this number, but if it turns out to be impossible, the kernel will
+	create the smallest image possible)
+
+SNAPSHOT_AVAIL_SWAP - return the amount of available swap in bytes (the last
+	argument should be a pointer to an unsigned int variable that will
+	contain the result if the call is successful).
+
+SNAPSHOT_GET_SWAP_PAGE - allocate a swap page from the resume partition
+	(the last argument should be a pointer to a loff_t variable that
+	will contain the swap page offset if the call is successful)
+
+SNAPSHOT_FREE_SWAP_PAGES - free all swap pages allocated with
+	SNAPSHOT_GET_SWAP_PAGE
+
+SNAPSHOT_SET_SWAP_FILE - set the resume partition (the last ioctl() argument
+	should specify the device's major and minor numbers in the old
+	two-byte format, as returned by the stat() function in the .st_rdev
+	member of the stat structure); it is recommended to always use this
+	call, because the code to set the resume partition could be removed from
+	future kernels
+
+The device's read() operation can be used to transfer the snapshot image from
+the kernel.  It has the following limitations:
+- you cannot read() more than one virtual memory page at a time
+- read()s accross page boundaries are impossible (ie. if ypu read() 1/2 of
+	a page in the previous call, you will only be able to read()
+	_at_ _most_ 1/2 of the page in the next call)
+
+The device's write() operation is used for uploading the system memory snapshot
+into the kernel.  It has the same limitations as the read() operation.
+
+The release() operation frees all memory allocated for the snapshot image
+and all swap pages allocated with SNAPSHOT_GET_SWAP_PAGE (if any).
+Thus it is not necessary to use either SNAPSHOT_FREE or
+SNAPSHOT_FREE_SWAP_PAGES before closing the device (in fact it will also
+unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are
+still frozen when the device is being closed).
+
+Currently it is assumed that the userland utilities reading/writing the
+snapshot image from/to the kernel will use a swap parition, called the resume
+partition, as storage space.  However, this is not really required, as they
+can use, for example, a special (blank) suspend partition or a file on a partition
+that is unmounted before SNAPSHOT_ATOMIC_SNAPSHOT and mounted afterwards.
+
+These utilities SHOULD NOT make any assumptions regarding the ordering of
+data within the snapshot image, except for the image header that MAY be
+assumed to start with an swsusp_info structure, as specified in
+kernel/power/power.h.  This structure MAY be used by the userland utilities
+to obtain some information about the snapshot image, such as the size
+of the snapshot image, including the metadata and the header itself,
+contained in the .size member of swsusp_info.
+
+The snapshot image MUST be written to the kernel unaltered (ie. all of the image
+data, metadata and header MUST be written in _exactly_ the same amount, form
+and order in which they have been read).  Otherwise, the behavior of the
+resumed system may be totally unpredictable.
+
+While executing SNAPSHOT_ATOMIC_RESTORE the kernel checks if the
+structure of the snapshot image is consistent with the information stored
+in the image header.  If any inconsistencies are detected,
+SNAPSHOT_ATOMIC_RESTORE will not succeed.  Still, this is not a fool-proof
+mechanism and the userland utilities using the interface SHOULD use additional
+means, such as checksums, to ensure the integrity of the snapshot image.
+
+The suspending and resuming utilities MUST lock themselves in memory,
+preferrably using mlockall(), before calling SNAPSHOT_FREEZE.
+
+The suspending utility MUST check the value stored by SNAPSHOT_ATOMIC_SNAPSHOT
+in the memory location pointed to by the last argument of ioctl() and proceed
+in accordance with it:
+1. 	If the value is 1 (ie. the system memory snapshot has just been
+	created and the system is ready for saving it):
+	(a)	The suspending utility MUST NOT close the snapshot device
+		_unless_ the whole suspend procedure is to be cancelled, in
+		which case, if the snapshot image has already been saved, the
+		suspending utility SHOULD destroy it, preferrably by zapping
+		its header.  If the suspend is not to be cancelled, the
+		system MUST be powered off or rebooted after the snapshot
+		image has been saved.
+	(b)	The suspending utility SHOULD NOT attempt to perform any
+		file system operations (including reads) on the file systems
+		that were mounted before SNAPSHOT_ATOMIC_SNAPSHOT has been
+		called.  However, it MAY mount a file system that was not
+		mounted at that time and perform some operations on it (eg.
+		use it for saving the image).
+2.	If the value is 0 (ie. the system state has just been restored from
+	the snapshot image), the suspending utility MUST close the snapshot
+	device.  Afterwards it will be treated as a regular userland process,
+	so it need not exit.
+
+The resuming utility SHOULD NOT attempt to mount any file systems that could
+be mounted before suspend and SHOULD NOT attempt to perform any operations
+involving such file systems.
+
+For details, please refer to the source code.

+ 33 - 41
Documentation/power/video.txt

@@ -1,7 +1,7 @@
 
 
 		Video issues with S3 resume
 		Video issues with S3 resume
 		~~~~~~~~~~~~~~~~~~~~~~~~~~~
 		~~~~~~~~~~~~~~~~~~~~~~~~~~~
-		  2003-2005, Pavel Machek
+		  2003-2006, Pavel Machek
 
 
 During S3 resume, hardware needs to be reinitialized. For most
 During S3 resume, hardware needs to be reinitialized. For most
 devices, this is easy, and kernel driver knows how to do
 devices, this is easy, and kernel driver knows how to do
@@ -15,6 +15,27 @@ run normally so video card is normally initialized. It should not be
 problem for S1 standby, because hardware should retain its state over
 problem for S1 standby, because hardware should retain its state over
 that.
 that.
 
 
+We either have to run video BIOS during early resume, or interpret it
+using vbetool later, or maybe nothing is neccessary on particular
+system because video state is preserved. Unfortunately different
+methods work on different systems, and no known method suits all of
+them.
+
+Userland application called s2ram has been developed; it contains long
+whitelist of systems, and automatically selects working method for a
+given system. It can be downloaded from CVS at
+www.sf.net/projects/suspend . If you get a system that is not in the
+whitelist, please try to find a working solution, and submit whitelist
+entry so that work does not need to be repeated.
+
+Currently, VBE_SAVE method (6 below) works on most
+systems. Unfortunately, vbetool only runs after userland is resumed,
+so it makes debugging of early resume problems
+hard/impossible. Methods that do not rely on userland are preferable.
+
+Details
+~~~~~~~
+
 There are a few types of systems where video works after S3 resume:
 There are a few types of systems where video works after S3 resume:
 
 
 (1) systems where video state is preserved over S3.
 (1) systems where video state is preserved over S3.
@@ -104,6 +125,7 @@ HP NX7000			??? (*)
 HP Pavilion ZD7000		vbetool post needed, need open-source nv driver for X
 HP Pavilion ZD7000		vbetool post needed, need open-source nv driver for X
 HP Omnibook XE3	athlon version	none (1)
 HP Omnibook XE3	athlon version	none (1)
 HP Omnibook XE3GC		none (1), video is S3 Savage/IX-MV
 HP Omnibook XE3GC		none (1), video is S3 Savage/IX-MV
+HP Omnibook 5150		none (1), (S1 also works OK)
 IBM TP T20, model 2647-44G	none (1), video is S3 Inc. 86C270-294 Savage/IX-MV, vesafb gets "interesting" but X work.
 IBM TP T20, model 2647-44G	none (1), video is S3 Inc. 86C270-294 Savage/IX-MV, vesafb gets "interesting" but X work.
 IBM TP A31 / Type 2652-M5G      s3_mode (3) [works ok with BIOS 1.04 2002-08-23, but not at all with BIOS 1.11 2004-11-05 :-(]
 IBM TP A31 / Type 2652-M5G      s3_mode (3) [works ok with BIOS 1.04 2002-08-23, but not at all with BIOS 1.11 2004-11-05 :-(]
 IBM TP R32 / Type 2658-MMG      none (1)
 IBM TP R32 / Type 2658-MMG      none (1)
@@ -120,18 +142,24 @@ IBM ThinkPad T42p (2373-GTG)	s3_bios (2)
 IBM TP X20			??? (*)
 IBM TP X20			??? (*)
 IBM TP X30			s3_bios (2)
 IBM TP X30			s3_bios (2)
 IBM TP X31 / Type 2672-XXH      none (1), use radeontool (http://fdd.com/software/radeon/) to turn off backlight.
 IBM TP X31 / Type 2672-XXH      none (1), use radeontool (http://fdd.com/software/radeon/) to turn off backlight.
-IBM TP X32			none (1), but backlight is on and video is trashed after long suspend
+IBM TP X32			none (1), but backlight is on and video is trashed after long suspend. s3_bios,s3_mode (4) works too. Perhaps that gets better results?
 IBM Thinkpad X40 Type 2371-7JG  s3_bios,s3_mode (4)
 IBM Thinkpad X40 Type 2371-7JG  s3_bios,s3_mode (4)
+IBM TP 600e			none(1), but a switch to console and back to X is needed
 Medion MD4220			??? (*)
 Medion MD4220			??? (*)
 Samsung P35			vbetool needed (6)
 Samsung P35			vbetool needed (6)
-Sharp PC-AR10 (ATI rage)	none (1)
+Sharp PC-AR10 (ATI rage)	none (1), backlight does not switch off
 Sony Vaio PCG-C1VRX/K		s3_bios (2)
 Sony Vaio PCG-C1VRX/K		s3_bios (2)
 Sony Vaio PCG-F403		??? (*)
 Sony Vaio PCG-F403		??? (*)
+Sony Vaio PCG-GRT995MP		none (1), works with 'nv' X driver
+Sony Vaio PCG-GR7/K		none (1), but needs radeonfb, use radeontool (http://fdd.com/software/radeon/) to turn off backlight.
 Sony Vaio PCG-N505SN		??? (*)
 Sony Vaio PCG-N505SN		??? (*)
 Sony Vaio vgn-s260		X or boot-radeon can init it (5)
 Sony Vaio vgn-s260		X or boot-radeon can init it (5)
+Sony Vaio vgn-S580BH		vga=normal, but suspend from X. Console will be blank unless you return to X.
+Sony Vaio vgn-FS115B		s3_bios (2),s3_mode (4)
 Toshiba Libretto L5		none (1)
 Toshiba Libretto L5		none (1)
-Toshiba Satellite 4030CDT	s3_mode (3)
-Toshiba Satellite 4080XCDT      s3_mode (3)
+Toshiba Portege 3020CT		s3_mode (3)
+Toshiba Satellite 4030CDT	s3_mode (3) (S1 also works OK)
+Toshiba Satellite 4080XCDT      s3_mode (3) (S1 also works OK)
 Toshiba Satellite 4090XCDT      ??? (*)
 Toshiba Satellite 4090XCDT      ??? (*)
 Toshiba Satellite P10-554       s3_bios,s3_mode (4)(****)
 Toshiba Satellite P10-554       s3_bios,s3_mode (4)(****)
 Toshiba M30                     (2) xor X with nvidia driver using internal AGP
 Toshiba M30                     (2) xor X with nvidia driver using internal AGP
@@ -151,39 +179,3 @@ Asus A7V8X	    nVidia RIVA TNT2 model 64	  s3_bios,s3_mode (4)
 (***) To be tested with a newer kernel.
 (***) To be tested with a newer kernel.
 
 
 (****) Not with SMP kernel, UP only.
 (****) Not with SMP kernel, UP only.
-
-VBEtool details
-~~~~~~~~~~~~~~~
-(with thanks to Carl-Daniel Hailfinger)
-
-First, boot into X and run the following script ONCE:
-#!/bin/bash
-statedir=/root/s3/state
-mkdir -p $statedir
-chvt 2
-sleep 1
-vbetool vbestate save >$statedir/vbe
-
-
-To suspend and resume properly, call the following script as root:
-#!/bin/bash
-statedir=/root/s3/state
-curcons=`fgconsole`
-fuser /dev/tty$curcons 2>/dev/null|xargs ps -o comm= -p|grep -q X && chvt 2
-cat /dev/vcsa >$statedir/vcsa
-sync
-echo 3 >/proc/acpi/sleep
-sync
-vbetool post
-vbetool vbestate restore <$statedir/vbe
-cat $statedir/vcsa >/dev/vcsa
-rckbd restart
-chvt $[curcons%6+1]
-chvt $curcons
-
-
-Unless you change your graphics card or other hardware configuration,
-the state once saved will be OK for every resume afterwards.
-NOTE: The "rckbd restart" command may be different for your
-distribution. Simply replace it with the command you would use to
-set the fonts on screen.

+ 72 - 0
Documentation/powerpc/booting-without-of.txt

@@ -1365,6 +1365,78 @@ platforms are moved over to use the flattened-device-tree model.
 	};
 	};
 
 
 
 
+   g) Freescale SOC SEC Security Engines
+
+   Required properties:
+
+    - device_type : Should be "crypto"
+    - model : Model of the device.  Should be "SEC1" or "SEC2"
+    - compatible : Should be "talitos"
+    - reg : Offset and length of the register set for the device
+    - interrupts : <a b> where a is the interrupt number and b is a
+      field that represents an encoding of the sense and level
+      information for the interrupt.  This should be encoded based on
+      the information in section 2) depending on the type of interrupt
+      controller you have.
+    - interrupt-parent : the phandle for the interrupt controller that
+      services interrupts for this device.
+    - num-channels : An integer representing the number of channels
+      available.
+    - channel-fifo-len : An integer representing the number of
+      descriptor pointers each channel fetch fifo can hold.
+    - exec-units-mask : The bitmask representing what execution units
+      (EUs) are available. It's a single 32 bit cell. EU information
+      should be encoded following the SEC's Descriptor Header Dword
+      EU_SEL0 field documentation, i.e. as follows:
+
+        bit 0 = reserved - should be 0
+        bit 1 = set if SEC has the ARC4 EU (AFEU)
+        bit 2 = set if SEC has the DES/3DES EU (DEU)
+        bit 3 = set if SEC has the message digest EU (MDEU)
+        bit 4 = set if SEC has the random number generator EU (RNG)
+        bit 5 = set if SEC has the public key EU (PKEU)
+        bit 6 = set if SEC has the AES EU (AESU)
+        bit 7 = set if SEC has the Kasumi EU (KEU)
+
+      bits 8 through 31 are reserved for future SEC EUs.
+
+    - descriptor-types-mask : The bitmask representing what descriptors
+      are available. It's a single 32 bit cell. Descriptor type
+      information should be encoded following the SEC's Descriptor
+      Header Dword DESC_TYPE field documentation, i.e. as follows:
+
+        bit 0  = set if SEC supports the aesu_ctr_nonsnoop desc. type
+        bit 1  = set if SEC supports the ipsec_esp descriptor type
+        bit 2  = set if SEC supports the common_nonsnoop desc. type
+        bit 3  = set if SEC supports the 802.11i AES ccmp desc. type
+        bit 4  = set if SEC supports the hmac_snoop_no_afeu desc. type
+        bit 5  = set if SEC supports the srtp descriptor type
+        bit 6  = set if SEC supports the non_hmac_snoop_no_afeu desc.type
+        bit 7  = set if SEC supports the pkeu_assemble descriptor type
+        bit 8  = set if SEC supports the aesu_key_expand_output desc.type
+        bit 9  = set if SEC supports the pkeu_ptmul descriptor type
+        bit 10 = set if SEC supports the common_nonsnoop_afeu desc. type
+        bit 11 = set if SEC supports the pkeu_ptadd_dbl descriptor type
+
+      ..and so on and so forth.
+
+   Example:
+
+       /* MPC8548E */
+       crypto@30000 {
+               device_type = "crypto";
+               model = "SEC2";
+               compatible = "talitos";
+               reg = <30000 10000>;
+               interrupts = <1d 3>;
+               interrupt-parent = <40000>;
+               num-channels = <4>;
+               channel-fifo-len = <24>;
+               exec-units-mask = <000000fe>;
+               descriptor-types-mask = <073f1127>;
+       };
+
+
    More devices will be defined as this spec matures.
    More devices will be defined as this spec matures.
 
 
 
 

+ 8 - 7
Documentation/powerpc/eeh-pci-error-recovery.txt

@@ -121,7 +121,7 @@ accomplished.
 
 
 EEH must be enabled in the PHB's very early during the boot process,
 EEH must be enabled in the PHB's very early during the boot process,
 and if a PCI slot is hot-plugged. The former is performed by
 and if a PCI slot is hot-plugged. The former is performed by
-eeh_init() in arch/ppc64/kernel/eeh.c, and the later by
+eeh_init() in arch/powerpc/platforms/pseries/eeh.c, and the later by
 drivers/pci/hotplug/pSeries_pci.c calling in to the eeh.c code.
 drivers/pci/hotplug/pSeries_pci.c calling in to the eeh.c code.
 EEH must be enabled before a PCI scan of the device can proceed.
 EEH must be enabled before a PCI scan of the device can proceed.
 Current Power5 hardware will not work unless EEH is enabled;
 Current Power5 hardware will not work unless EEH is enabled;
@@ -133,7 +133,7 @@ error.  Given an arbitrary address, the routine
 pci_get_device_by_addr() will find the pci device associated
 pci_get_device_by_addr() will find the pci device associated
 with that address (if any).
 with that address (if any).
 
 
-The default include/asm-ppc64/io.h macros readb(), inb(), insb(),
+The default include/asm-powerpc/io.h macros readb(), inb(), insb(),
 etc. include a check to see if the i/o read returned all-0xff's.
 etc. include a check to see if the i/o read returned all-0xff's.
 If so, these make a call to eeh_dn_check_failure(), which in turn
 If so, these make a call to eeh_dn_check_failure(), which in turn
 asks the firmware if the all-ff's value is the sign of a true EEH
 asks the firmware if the all-ff's value is the sign of a true EEH
@@ -143,11 +143,12 @@ seen in /proc/ppc64/eeh (subject to change).  Normally, almost
 all of these occur during boot, when the PCI bus is scanned, where
 all of these occur during boot, when the PCI bus is scanned, where
 a large number of 0xff reads are part of the bus scan procedure.
 a large number of 0xff reads are part of the bus scan procedure.
 
 
-If a frozen slot is detected, code in arch/ppc64/kernel/eeh.c will
-print a stack trace to syslog (/var/log/messages).  This stack trace
-has proven to be very useful to device-driver authors for finding
-out at what point the EEH error was detected, as the error itself
-usually occurs slightly beforehand.
+If a frozen slot is detected, code in 
+arch/powerpc/platforms/pseries/eeh.c will print a stack trace to 
+syslog (/var/log/messages).  This stack trace has proven to be very 
+useful to device-driver authors for finding out at what point the EEH 
+error was detected, as the error itself usually occurs slightly 
+beforehand.
 
 
 Next, it uses the Linux kernel notifier chain/work queue mechanism to
 Next, it uses the Linux kernel notifier chain/work queue mechanism to
 allow any interested parties to find out about the failure.  Device
 allow any interested parties to find out about the failure.  Device

+ 2 - 2
Documentation/powerpc/hvcs.txt

@@ -558,9 +558,9 @@ partitions.
 
 
 The proper channel for reporting bugs is either through the Linux OS
 The proper channel for reporting bugs is either through the Linux OS
 distribution company that provided your OS or by posting issues to the
 distribution company that provided your OS or by posting issues to the
-ppc64 development mailing list at:
+PowerPC development mailing list at:
 
 
-linuxppc64-dev@lists.linuxppc.org
+linuxppc-dev@ozlabs.org
 
 
 This request is to provide a documented and searchable public exchange
 This request is to provide a documented and searchable public exchange
 of the problems and solutions surrounding this driver for the benefit of
 of the problems and solutions surrounding this driver for the benefit of

+ 2 - 2
MAINTAINERS

@@ -534,7 +534,7 @@ S:   Supported
 BROADBAND PROCESSOR ARCHITECTURE
 BROADBAND PROCESSOR ARCHITECTURE
 P:	Arnd Bergmann
 P:	Arnd Bergmann
 M:	arnd@arndb.de
 M:	arnd@arndb.de
-L:	linuxppc64-dev@ozlabs.org
+L:	linuxppc-dev@ozlabs.org
 W:	http://linuxppc64.org
 W:	http://linuxppc64.org
 S:	Supported
 S:	Supported
 
 
@@ -1624,7 +1624,7 @@ P:	Anton Blanchard
 M:	anton@samba.org
 M:	anton@samba.org
 M:	anton@au.ibm.com
 M:	anton@au.ibm.com
 W:	http://linuxppc64.org
 W:	http://linuxppc64.org
-L:	linuxppc64-dev@ozlabs.org
+L:	linuxppc-dev@ozlabs.org
 S:	Supported
 S:	Supported
 
 
 LINUX SECURITY MODULE (LSM) FRAMEWORK
 LINUX SECURITY MODULE (LSM) FRAMEWORK

+ 4 - 6
arch/cris/kernel/irq.c

@@ -52,9 +52,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 
 	if (i == 0) {
 	if (i == 0) {
 		seq_printf(p, "           ");
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 		seq_putc(p, '\n');
 	}
 	}
 
 
@@ -67,9 +66,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, "  %s", action->name);
 		seq_printf(p, "  %s", action->name);

+ 4 - 6
arch/frv/kernel/irq.c

@@ -75,9 +75,8 @@ int show_interrupts(struct seq_file *p, void *v)
 	switch (i) {
 	switch (i) {
 	case 0:
 	case 0:
 		seq_printf(p, "           ");
 		seq_printf(p, "           ");
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 
 
 		seq_putc(p, '\n');
 		seq_putc(p, '\n');
 		break;
 		break;
@@ -100,9 +99,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i - 1]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i - 1]);
 #endif
 #endif
 
 
 		level = group->sources[ix]->level - frv_irq_levels;
 		level = group->sources[ix]->level - frv_irq_levels;

+ 14 - 10
arch/i386/Kconfig

@@ -80,6 +80,7 @@ config X86_VOYAGER
 
 
 config X86_NUMAQ
 config X86_NUMAQ
 	bool "NUMAQ (IBM/Sequent)"
 	bool "NUMAQ (IBM/Sequent)"
+	select SMP
 	select NUMA
 	select NUMA
 	help
 	help
 	  This option is used for getting Linux to run on a (IBM/Sequent) NUMA
 	  This option is used for getting Linux to run on a (IBM/Sequent) NUMA
@@ -400,6 +401,7 @@ choice
 
 
 config NOHIGHMEM
 config NOHIGHMEM
 	bool "off"
 	bool "off"
+	depends on !X86_NUMAQ
 	---help---
 	---help---
 	  Linux can use up to 64 Gigabytes of physical memory on x86 systems.
 	  Linux can use up to 64 Gigabytes of physical memory on x86 systems.
 	  However, the address space of 32-bit x86 processors is only 4
 	  However, the address space of 32-bit x86 processors is only 4
@@ -436,6 +438,7 @@ config NOHIGHMEM
 
 
 config HIGHMEM4G
 config HIGHMEM4G
 	bool "4GB"
 	bool "4GB"
+	depends on !X86_NUMAQ
 	help
 	help
 	  Select this if you have a 32-bit processor and between 1 and 4
 	  Select this if you have a 32-bit processor and between 1 and 4
 	  gigabytes of physical RAM.
 	  gigabytes of physical RAM.
@@ -503,10 +506,6 @@ config NUMA
 	default n if X86_PC
 	default n if X86_PC
 	default y if (X86_NUMAQ || X86_SUMMIT)
 	default y if (X86_NUMAQ || X86_SUMMIT)
 
 
-# Need comments to help the hapless user trying to turn on NUMA support
-comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support"
-	depends on X86_NUMAQ && (!HIGHMEM64G || !SMP)
-
 comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
 comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
 	depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI)
 	depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI)
 
 
@@ -660,13 +659,18 @@ config BOOT_IOREMAP
 	default y
 	default y
 
 
 config REGPARM
 config REGPARM
-	bool "Use register arguments (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
-	default n
+	bool "Use register arguments"
+	default y
 	help
 	help
-	Compile the kernel with -mregparm=3. This uses a different ABI
-	and passes the first three arguments of a function call in registers.
-	This will probably break binary only modules.
+	Compile the kernel with -mregparm=3. This instructs gcc to use
+	a more efficient function call ABI which passes the first three
+	arguments of a function call via registers, which results in denser
+	and faster code.
+
+	If this option is disabled, then the default ABI of passing
+	arguments via the stack is used.
+
+	If unsure, say Y.
 
 
 config SECCOMP
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	bool "Enable seccomp to safely compute untrusted bytecode"

+ 9 - 0
arch/i386/Kconfig.debug

@@ -31,6 +31,15 @@ config DEBUG_STACK_USAGE
 
 
 	  This option will slow down process creation somewhat.
 	  This option will slow down process creation somewhat.
 
 
+config STACK_BACKTRACE_COLS
+	int "Stack backtraces per line" if DEBUG_KERNEL
+	range 1 3
+	default 2
+	help
+	  Selects how many stack backtrace entries per line to display.
+
+	  This can save screen space when displaying traces.
+
 comment "Page alloc debug is incompatible with Software Suspend on i386"
 comment "Page alloc debug is incompatible with Software Suspend on i386"
 	depends on DEBUG_KERNEL && SOFTWARE_SUSPEND
 	depends on DEBUG_KERNEL && SOFTWARE_SUSPEND
 
 

+ 1 - 1
arch/i386/kernel/Makefile

@@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds
 obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o \
 obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o \
 		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
 		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
 		pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
 		pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
-		quirks.o i8237.o topology.o
+		quirks.o i8237.o topology.o alternative.o
 
 
 obj-y				+= cpu/
 obj-y				+= cpu/
 obj-y				+= timers/
 obj-y				+= timers/

+ 321 - 0
arch/i386/kernel/alternative.c

@@ -0,0 +1,321 @@
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <asm/alternative.h>
+#include <asm/sections.h>
+
+#define DEBUG 0
+#if DEBUG
+# define DPRINTK(fmt, args...) printk(fmt, args)
+#else
+# define DPRINTK(fmt, args...)
+#endif
+
+/* Use inline assembly to define this because the nops are defined
+   as inline assembly strings in the include files and we cannot
+   get them easily into strings. */
+asm("\t.data\nintelnops: "
+	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
+	GENERIC_NOP7 GENERIC_NOP8);
+asm("\t.data\nk8nops: "
+	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
+	K8_NOP7 K8_NOP8);
+asm("\t.data\nk7nops: "
+	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
+	K7_NOP7 K7_NOP8);
+
+extern unsigned char intelnops[], k8nops[], k7nops[];
+static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
+	NULL,
+	intelnops,
+	intelnops + 1,
+	intelnops + 1 + 2,
+	intelnops + 1 + 2 + 3,
+	intelnops + 1 + 2 + 3 + 4,
+	intelnops + 1 + 2 + 3 + 4 + 5,
+	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
+	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+};
+static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
+	NULL,
+	k8nops,
+	k8nops + 1,
+	k8nops + 1 + 2,
+	k8nops + 1 + 2 + 3,
+	k8nops + 1 + 2 + 3 + 4,
+	k8nops + 1 + 2 + 3 + 4 + 5,
+	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
+	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+};
+static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
+	NULL,
+	k7nops,
+	k7nops + 1,
+	k7nops + 1 + 2,
+	k7nops + 1 + 2 + 3,
+	k7nops + 1 + 2 + 3 + 4,
+	k7nops + 1 + 2 + 3 + 4 + 5,
+	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
+	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+};
+static struct nop {
+	int cpuid;
+	unsigned char **noptable;
+} noptypes[] = {
+	{ X86_FEATURE_K8, k8_nops },
+	{ X86_FEATURE_K7, k7_nops },
+	{ -1, NULL }
+};
+
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
+extern u8 *__smp_locks[], *__smp_locks_end[];
+
+extern u8 __smp_alt_begin[], __smp_alt_end[];
+
+
+static unsigned char** find_nop_table(void)
+{
+	unsigned char **noptable = intel_nops;
+	int i;
+
+	for (i = 0; noptypes[i].cpuid >= 0; i++) {
+		if (boot_cpu_has(noptypes[i].cpuid)) {
+			noptable = noptypes[i].noptable;
+			break;
+		}
+	}
+	return noptable;
+}
+
+/* Replace instructions with better alternatives for this CPU type.
+   This runs before SMP is initialized to avoid SMP problems with
+   self modifying code. This implies that assymetric systems where
+   APs have less capabilities than the boot processor are not handled.
+   Tough. Make sure you disable such features by hand. */
+
+void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+{
+	unsigned char **noptable = find_nop_table();
+	struct alt_instr *a;
+	int diff, i, k;
+
+	DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
+	for (a = start; a < end; a++) {
+		BUG_ON(a->replacementlen > a->instrlen);
+		if (!boot_cpu_has(a->cpuid))
+			continue;
+		memcpy(a->instr, a->replacement, a->replacementlen);
+		diff = a->instrlen - a->replacementlen;
+		/* Pad the rest with nops */
+		for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
+			k = diff;
+			if (k > ASM_NOP_MAX)
+				k = ASM_NOP_MAX;
+			memcpy(a->instr + i, noptable[k], k);
+		}
+	}
+}
+
+static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end)
+{
+	struct alt_instr *a;
+
+	DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end);
+	for (a = start; a < end; a++) {
+		memcpy(a->replacement + a->replacementlen,
+		       a->instr,
+		       a->instrlen);
+	}
+}
+
+static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end)
+{
+	struct alt_instr *a;
+
+	for (a = start; a < end; a++) {
+		memcpy(a->instr,
+		       a->replacement + a->replacementlen,
+		       a->instrlen);
+	}
+}
+
+static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
+{
+	u8 **ptr;
+
+	for (ptr = start; ptr < end; ptr++) {
+		if (*ptr < text)
+			continue;
+		if (*ptr > text_end)
+			continue;
+		**ptr = 0xf0; /* lock prefix */
+	};
+}
+
+static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
+{
+	unsigned char **noptable = find_nop_table();
+	u8 **ptr;
+
+	for (ptr = start; ptr < end; ptr++) {
+		if (*ptr < text)
+			continue;
+		if (*ptr > text_end)
+			continue;
+		**ptr = noptable[1][0];
+	};
+}
+
+struct smp_alt_module {
+	/* what is this ??? */
+	struct module	*mod;
+	char		*name;
+
+	/* ptrs to lock prefixes */
+	u8		**locks;
+	u8		**locks_end;
+
+	/* .text segment, needed to avoid patching init code ;) */
+	u8		*text;
+	u8		*text_end;
+
+	struct list_head next;
+};
+static LIST_HEAD(smp_alt_modules);
+static DEFINE_SPINLOCK(smp_alt);
+
+static int smp_alt_once = 0;
+static int __init bootonly(char *str)
+{
+	smp_alt_once = 1;
+	return 1;
+}
+__setup("smp-alt-boot", bootonly);
+
+void alternatives_smp_module_add(struct module *mod, char *name,
+				 void *locks, void *locks_end,
+				 void *text,  void *text_end)
+{
+	struct smp_alt_module *smp;
+	unsigned long flags;
+
+	if (smp_alt_once) {
+		if (boot_cpu_has(X86_FEATURE_UP))
+			alternatives_smp_unlock(locks, locks_end,
+						text, text_end);
+		return;
+	}
+
+	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
+	if (NULL == smp)
+		return; /* we'll run the (safe but slow) SMP code then ... */
+
+	smp->mod	= mod;
+	smp->name	= name;
+	smp->locks	= locks;
+	smp->locks_end	= locks_end;
+	smp->text	= text;
+	smp->text_end	= text_end;
+	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
+		__FUNCTION__, smp->locks, smp->locks_end,
+		smp->text, smp->text_end, smp->name);
+
+	spin_lock_irqsave(&smp_alt, flags);
+	list_add_tail(&smp->next, &smp_alt_modules);
+	if (boot_cpu_has(X86_FEATURE_UP))
+		alternatives_smp_unlock(smp->locks, smp->locks_end,
+					smp->text, smp->text_end);
+	spin_unlock_irqrestore(&smp_alt, flags);
+}
+
+void alternatives_smp_module_del(struct module *mod)
+{
+	struct smp_alt_module *item;
+	unsigned long flags;
+
+	if (smp_alt_once)
+		return;
+
+	spin_lock_irqsave(&smp_alt, flags);
+	list_for_each_entry(item, &smp_alt_modules, next) {
+		if (mod != item->mod)
+			continue;
+		list_del(&item->next);
+		spin_unlock_irqrestore(&smp_alt, flags);
+		DPRINTK("%s: %s\n", __FUNCTION__, item->name);
+		kfree(item);
+		return;
+	}
+	spin_unlock_irqrestore(&smp_alt, flags);
+}
+
+void alternatives_smp_switch(int smp)
+{
+	struct smp_alt_module *mod;
+	unsigned long flags;
+
+	if (smp_alt_once)
+		return;
+	BUG_ON(!smp && (num_online_cpus() > 1));
+
+	spin_lock_irqsave(&smp_alt, flags);
+	if (smp) {
+		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
+		clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
+		clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+		alternatives_smp_apply(__smp_alt_instructions,
+				       __smp_alt_instructions_end);
+		list_for_each_entry(mod, &smp_alt_modules, next)
+			alternatives_smp_lock(mod->locks, mod->locks_end,
+					      mod->text, mod->text_end);
+	} else {
+		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
+		set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
+		set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+		apply_alternatives(__smp_alt_instructions,
+				   __smp_alt_instructions_end);
+		list_for_each_entry(mod, &smp_alt_modules, next)
+			alternatives_smp_unlock(mod->locks, mod->locks_end,
+						mod->text, mod->text_end);
+	}
+	spin_unlock_irqrestore(&smp_alt, flags);
+}
+
+void __init alternative_instructions(void)
+{
+	apply_alternatives(__alt_instructions, __alt_instructions_end);
+
+	/* switch to patch-once-at-boottime-only mode and free the
+	 * tables in case we know the number of CPUs will never ever
+	 * change */
+#ifdef CONFIG_HOTPLUG_CPU
+	if (num_possible_cpus() < 2)
+		smp_alt_once = 1;
+#else
+	smp_alt_once = 1;
+#endif
+
+	if (smp_alt_once) {
+		if (1 == num_possible_cpus()) {
+			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
+			set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
+			set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+			apply_alternatives(__smp_alt_instructions,
+					   __smp_alt_instructions_end);
+			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
+						_text, _etext);
+		}
+		free_init_pages("SMP alternatives",
+				(unsigned long)__smp_alt_begin,
+				(unsigned long)__smp_alt_end);
+	} else {
+		alternatives_smp_save(__smp_alt_instructions,
+				      __smp_alt_instructions_end);
+		alternatives_smp_module_add(NULL, "core kernel",
+					    __smp_locks, __smp_locks_end,
+					    _text, _etext);
+		alternatives_smp_switch(0);
+	}
+}

+ 1 - 0
arch/i386/kernel/apic.c

@@ -38,6 +38,7 @@
 #include <asm/i8253.h>
 #include <asm/i8253.h>
 
 
 #include <mach_apic.h>
 #include <mach_apic.h>
+#include <mach_apicdef.h>
 #include <mach_ipi.h>
 #include <mach_ipi.h>
 
 
 #include "io_ports.h"
 #include "io_ports.h"

+ 1 - 0
arch/i386/kernel/cpu/centaur.c

@@ -4,6 +4,7 @@
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/msr.h>
 #include <asm/e820.h>
 #include <asm/e820.h>
+#include <asm/mtrr.h>
 #include "cpu.h"
 #include "cpu.h"
 
 
 #ifdef CONFIG_X86_OOSTORE
 #ifdef CONFIG_X86_OOSTORE

+ 30 - 17
arch/i386/kernel/cpu/common.c

@@ -25,9 +25,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
 DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
 DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
 EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
 EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
 
 
-static int cachesize_override __devinitdata = -1;
-static int disable_x86_fxsr __devinitdata = 0;
-static int disable_x86_serial_nr __devinitdata = 1;
+static int cachesize_override __cpuinitdata = -1;
+static int disable_x86_fxsr __cpuinitdata;
+static int disable_x86_serial_nr __cpuinitdata = 1;
+static int disable_x86_sep __cpuinitdata;
 
 
 struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
 struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
 
 
@@ -59,7 +60,7 @@ static int __init cachesize_setup(char *str)
 }
 }
 __setup("cachesize=", cachesize_setup);
 __setup("cachesize=", cachesize_setup);
 
 
-int __devinit get_model_name(struct cpuinfo_x86 *c)
+int __cpuinit get_model_name(struct cpuinfo_x86 *c)
 {
 {
 	unsigned int *v;
 	unsigned int *v;
 	char *p, *q;
 	char *p, *q;
@@ -89,7 +90,7 @@ int __devinit get_model_name(struct cpuinfo_x86 *c)
 }
 }
 
 
 
 
-void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
+void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 {
 {
 	unsigned int n, dummy, ecx, edx, l2size;
 	unsigned int n, dummy, ecx, edx, l2size;
 
 
@@ -130,7 +131,7 @@ void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
 /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
 /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
 
 
 /* Look up CPU names by table lookup. */
 /* Look up CPU names by table lookup. */
-static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
+static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
 {
 {
 	struct cpu_model_info *info;
 	struct cpu_model_info *info;
 
 
@@ -151,7 +152,7 @@ static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
 }
 }
 
 
 
 
-static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
 {
 {
 	char *v = c->x86_vendor_id;
 	char *v = c->x86_vendor_id;
 	int i;
 	int i;
@@ -187,6 +188,14 @@ static int __init x86_fxsr_setup(char * s)
 __setup("nofxsr", x86_fxsr_setup);
 __setup("nofxsr", x86_fxsr_setup);
 
 
 
 
+static int __init x86_sep_setup(char * s)
+{
+	disable_x86_sep = 1;
+	return 1;
+}
+__setup("nosep", x86_sep_setup);
+
+
 /* Standard macro to see if a specific flag is changeable */
 /* Standard macro to see if a specific flag is changeable */
 static inline int flag_is_changeable_p(u32 flag)
 static inline int flag_is_changeable_p(u32 flag)
 {
 {
@@ -210,7 +219,7 @@ static inline int flag_is_changeable_p(u32 flag)
 
 
 
 
 /* Probe for the CPUID instruction */
 /* Probe for the CPUID instruction */
-static int __devinit have_cpuid_p(void)
+static int __cpuinit have_cpuid_p(void)
 {
 {
 	return flag_is_changeable_p(X86_EFLAGS_ID);
 	return flag_is_changeable_p(X86_EFLAGS_ID);
 }
 }
@@ -254,7 +263,7 @@ static void __init early_cpu_detect(void)
 	}
 	}
 }
 }
 
 
-void __devinit generic_identify(struct cpuinfo_x86 * c)
+void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 {
 {
 	u32 tfms, xlvl;
 	u32 tfms, xlvl;
 	int junk;
 	int junk;
@@ -307,7 +316,7 @@ void __devinit generic_identify(struct cpuinfo_x86 * c)
 #endif
 #endif
 }
 }
 
 
-static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 {
 {
 	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
 	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
 		/* Disable processor serial number */
 		/* Disable processor serial number */
@@ -335,7 +344,7 @@ __setup("serialnumber", x86_serial_nr_setup);
 /*
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  * This does the hard work of actually picking apart the CPU stuff...
  */
  */
-void __devinit identify_cpu(struct cpuinfo_x86 *c)
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 {
 {
 	int i;
 	int i;
 
 
@@ -405,6 +414,10 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 		clear_bit(X86_FEATURE_XMM, c->x86_capability);
 		clear_bit(X86_FEATURE_XMM, c->x86_capability);
 	}
 	}
 
 
+	/* SEP disabled? */
+	if (disable_x86_sep)
+		clear_bit(X86_FEATURE_SEP, c->x86_capability);
+
 	if (disable_pse)
 	if (disable_pse)
 		clear_bit(X86_FEATURE_PSE, c->x86_capability);
 		clear_bit(X86_FEATURE_PSE, c->x86_capability);
 
 
@@ -417,7 +430,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 		else
 		else
 			/* Last resort... */
 			/* Last resort... */
 			sprintf(c->x86_model_id, "%02x/%02x",
 			sprintf(c->x86_model_id, "%02x/%02x",
-				c->x86_vendor, c->x86_model);
+				c->x86, c->x86_model);
 	}
 	}
 
 
 	/* Now the feature flags better reflect actual CPU features! */
 	/* Now the feature flags better reflect actual CPU features! */
@@ -453,7 +466,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 }
 }
 
 
 #ifdef CONFIG_X86_HT
 #ifdef CONFIG_X86_HT
-void __devinit detect_ht(struct cpuinfo_x86 *c)
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 {
 {
 	u32 	eax, ebx, ecx, edx;
 	u32 	eax, ebx, ecx, edx;
 	int 	index_msb, core_bits;
 	int 	index_msb, core_bits;
@@ -500,7 +513,7 @@ void __devinit detect_ht(struct cpuinfo_x86 *c)
 }
 }
 #endif
 #endif
 
 
-void __devinit print_cpu_info(struct cpuinfo_x86 *c)
+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 {
 {
 	char *vendor = NULL;
 	char *vendor = NULL;
 
 
@@ -523,7 +536,7 @@ void __devinit print_cpu_info(struct cpuinfo_x86 *c)
 		printk("\n");
 		printk("\n");
 }
 }
 
 
-cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE;
+cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
 
 
 /* This is hacky. :)
 /* This is hacky. :)
  * We're emulating future behavior.
  * We're emulating future behavior.
@@ -570,7 +583,7 @@ void __init early_cpu_init(void)
  * and IDT. We reload them nevertheless, this function acts as a
  * and IDT. We reload them nevertheless, this function acts as a
  * 'CPU state barrier', nothing should get across.
  * 'CPU state barrier', nothing should get across.
  */
  */
-void __devinit cpu_init(void)
+void __cpuinit cpu_init(void)
 {
 {
 	int cpu = smp_processor_id();
 	int cpu = smp_processor_id();
 	struct tss_struct * t = &per_cpu(init_tss, cpu);
 	struct tss_struct * t = &per_cpu(init_tss, cpu);
@@ -670,7 +683,7 @@ void __devinit cpu_init(void)
 }
 }
 
 
 #ifdef CONFIG_HOTPLUG_CPU
 #ifdef CONFIG_HOTPLUG_CPU
-void __devinit cpu_uninit(void)
+void __cpuinit cpu_uninit(void)
 {
 {
 	int cpu = raw_smp_processor_id();
 	int cpu = raw_smp_processor_id();
 	cpu_clear(cpu, cpu_initialized);
 	cpu_clear(cpu, cpu_initialized);

+ 1 - 3
arch/i386/kernel/cpu/cpufreq/powernow-k8.c

@@ -1145,9 +1145,7 @@ static int __cpuinit powernowk8_init(void)
 {
 {
 	unsigned int i, supported_cpus = 0;
 	unsigned int i, supported_cpus = 0;
 
 
-	for (i=0; i<NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_cpu(i) {
 		if (check_supported_cpu(i))
 		if (check_supported_cpu(i))
 			supported_cpus++;
 			supported_cpus++;
 	}
 	}

+ 6 - 6
arch/i386/kernel/cpu/intel.c

@@ -29,7 +29,7 @@ extern int trap_init_f00f_bug(void);
 struct movsl_mask movsl_mask __read_mostly;
 struct movsl_mask movsl_mask __read_mostly;
 #endif
 #endif
 
 
-void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
+void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c)
 {
 {
 	if (c->x86_vendor != X86_VENDOR_INTEL)
 	if (c->x86_vendor != X86_VENDOR_INTEL)
 		return;
 		return;
@@ -44,7 +44,7 @@ void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
  *	This is called before we do cpu ident work
  *	This is called before we do cpu ident work
  */
  */
  
  
-int __devinit ppro_with_ram_bug(void)
+int __cpuinit ppro_with_ram_bug(void)
 {
 {
 	/* Uses data from early_cpu_detect now */
 	/* Uses data from early_cpu_detect now */
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -62,7 +62,7 @@ int __devinit ppro_with_ram_bug(void)
  * P4 Xeon errata 037 workaround.
  * P4 Xeon errata 037 workaround.
  * Hardware prefetcher may cause stale data to be loaded into the cache.
  * Hardware prefetcher may cause stale data to be loaded into the cache.
  */
  */
-static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
+static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
 {
 {
 	unsigned long lo, hi;
 	unsigned long lo, hi;
 
 
@@ -81,7 +81,7 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
 /*
 /*
  * find out the number of processor cores on the die
  * find out the number of processor cores on the die
  */
  */
-static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
+static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c)
 {
 {
 	unsigned int eax, ebx, ecx, edx;
 	unsigned int eax, ebx, ecx, edx;
 
 
@@ -96,7 +96,7 @@ static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
 		return 1;
 		return 1;
 }
 }
 
 
-static void __devinit init_intel(struct cpuinfo_x86 *c)
+static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 {
 {
 	unsigned int l2 = 0;
 	unsigned int l2 = 0;
 	char *p = NULL;
 	char *p = NULL;
@@ -205,7 +205,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
 	return size;
 	return size;
 }
 }
 
 
-static struct cpu_dev intel_cpu_dev __devinitdata = {
+static struct cpu_dev intel_cpu_dev __cpuinitdata = {
 	.c_vendor	= "Intel",
 	.c_vendor	= "Intel",
 	.c_ident 	= { "GenuineIntel" },
 	.c_ident 	= { "GenuineIntel" },
 	.c_models = {
 	.c_models = {

+ 2 - 2
arch/i386/kernel/cpu/intel_cacheinfo.c

@@ -174,7 +174,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 
 
-	if (c->cpuid_level > 4) {
+	if (c->cpuid_level > 3) {
 		static int is_initialized;
 		static int is_initialized;
 
 
 		if (is_initialized == 0) {
 		if (is_initialized == 0) {
@@ -330,7 +330,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 		}
 		}
 	}
 	}
 }
 }
-static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
+static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 {
 {
 	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 	int sibling;
 	int sibling;

+ 1 - 1
arch/i386/kernel/cpu/proc.c

@@ -40,7 +40,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		/* Other (Linux-defined) */
 		/* Other (Linux-defined) */
 		"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
 		"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
 		NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL,
-		"constant_tsc", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		"constant_tsc", "up", NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 
 

+ 1 - 1
arch/i386/kernel/crash.c

@@ -105,7 +105,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
 		return 1;
 		return 1;
 	local_irq_disable();
 	local_irq_disable();
 
 
-	if (!user_mode(regs)) {
+	if (!user_mode_vm(regs)) {
 		crash_fixup_ss_esp(&fixed_regs, regs);
 		crash_fixup_ss_esp(&fixed_regs, regs);
 		regs = &fixed_regs;
 		regs = &fixed_regs;
 	}
 	}

+ 4 - 0
arch/i386/kernel/entry.S

@@ -226,6 +226,10 @@ ENTRY(system_call)
 	pushl %eax			# save orig_eax
 	pushl %eax			# save orig_eax
 	SAVE_ALL
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
 	GET_THREAD_INFO(%ebp)
+	testl $TF_MASK,EFLAGS(%esp)
+	jz no_singlestep
+	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
+no_singlestep:
 					# system call tracing in operation / emulation
 					# system call tracing in operation / emulation
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
 	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
 	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)

+ 1 - 4
arch/i386/kernel/head.S

@@ -450,7 +450,6 @@ int_msg:
 
 
 .globl boot_gdt_descr
 .globl boot_gdt_descr
 .globl idt_descr
 .globl idt_descr
-.globl cpu_gdt_descr
 
 
 	ALIGN
 	ALIGN
 # early boot GDT descriptor (must use 1:1 address mapping)
 # early boot GDT descriptor (must use 1:1 address mapping)
@@ -470,8 +469,6 @@ cpu_gdt_descr:
 	.word GDT_ENTRIES*8-1
 	.word GDT_ENTRIES*8-1
 	.long cpu_gdt_table
 	.long cpu_gdt_table
 
 
-	.fill NR_CPUS-1,8,0		# space for the other GDT descriptors
-
 /*
 /*
  * The boot_gdt_table must mirror the equivalent in setup.S and is
  * The boot_gdt_table must mirror the equivalent in setup.S and is
  * used only for booting.
  * used only for booting.
@@ -485,7 +482,7 @@ ENTRY(boot_gdt_table)
 /*
 /*
  * The Global Descriptor Table contains 28 quadwords, per-CPU.
  * The Global Descriptor Table contains 28 quadwords, per-CPU.
  */
  */
-	.align PAGE_SIZE_asm
+	.align L1_CACHE_BYTES
 ENTRY(cpu_gdt_table)
 ENTRY(cpu_gdt_table)
 	.quad 0x0000000000000000	/* NULL descriptor */
 	.quad 0x0000000000000000	/* NULL descriptor */
 	.quad 0x0000000000000000	/* 0x0b reserved */
 	.quad 0x0000000000000000	/* 0x0b reserved */

+ 11 - 14
arch/i386/kernel/io_apic.c

@@ -351,8 +351,8 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
 {
 {
 	int i, j;
 	int i, j;
 	Dprintk("Rotating IRQs among CPUs.\n");
 	Dprintk("Rotating IRQs among CPUs.\n");
-	for (i = 0; i < NR_CPUS; i++) {
-		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
+	for_each_online_cpu(i) {
+		for (j = 0; j < NR_IRQS; j++) {
 			if (!irq_desc[j].action)
 			if (!irq_desc[j].action)
 				continue;
 				continue;
 			/* Is it a significant load ?  */
 			/* Is it a significant load ?  */
@@ -381,7 +381,7 @@ static void do_irq_balance(void)
 	unsigned long imbalance = 0;
 	unsigned long imbalance = 0;
 	cpumask_t allowed_mask, target_cpu_mask, tmp;
 	cpumask_t allowed_mask, target_cpu_mask, tmp;
 
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_cpu(i) {
 		int package_index;
 		int package_index;
 		CPU_IRQ(i) = 0;
 		CPU_IRQ(i) = 0;
 		if (!cpu_online(i))
 		if (!cpu_online(i))
@@ -422,9 +422,7 @@ static void do_irq_balance(void)
 		}
 		}
 	}
 	}
 	/* Find the least loaded processor package */
 	/* Find the least loaded processor package */
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		if (i != CPU_TO_PACKAGEINDEX(i))
 		if (i != CPU_TO_PACKAGEINDEX(i))
 			continue;
 			continue;
 		if (min_cpu_irq > CPU_IRQ(i)) {
 		if (min_cpu_irq > CPU_IRQ(i)) {
@@ -441,9 +439,7 @@ tryanothercpu:
 	 */
 	 */
 	tmp_cpu_irq = 0;
 	tmp_cpu_irq = 0;
 	tmp_loaded = -1;
 	tmp_loaded = -1;
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		if (i != CPU_TO_PACKAGEINDEX(i))
 		if (i != CPU_TO_PACKAGEINDEX(i))
 			continue;
 			continue;
 		if (max_cpu_irq <= CPU_IRQ(i)) 
 		if (max_cpu_irq <= CPU_IRQ(i)) 
@@ -619,9 +615,7 @@ static int __init balanced_irq_init(void)
 	if (smp_num_siblings > 1 && !cpus_empty(tmp))
 	if (smp_num_siblings > 1 && !cpus_empty(tmp))
 		physical_balance = 1;
 		physical_balance = 1;
 
 
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
 		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
 		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
 		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
 		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
 		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
@@ -638,9 +632,11 @@ static int __init balanced_irq_init(void)
 	else 
 	else 
 		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
 		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
 failed:
 failed:
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_cpu(i) {
 		kfree(irq_cpu_data[i].irq_delta);
 		kfree(irq_cpu_data[i].irq_delta);
+		irq_cpu_data[i].irq_delta = NULL;
 		kfree(irq_cpu_data[i].last_irq);
 		kfree(irq_cpu_data[i].last_irq);
+		irq_cpu_data[i].last_irq = NULL;
 	}
 	}
 	return 0;
 	return 0;
 }
 }
@@ -1761,7 +1757,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
 	 * Don't check I/O APIC IDs for xAPIC systems.  They have
 	 * Don't check I/O APIC IDs for xAPIC systems.  They have
 	 * no meaning without the serial APIC bus.
 	 * no meaning without the serial APIC bus.
 	 */
 	 */
-	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15))
+	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
 		return;
 		return;
 	/*
 	/*
 	 * This is broken; anything with a real cpu count has to
 	 * This is broken; anything with a real cpu count has to

+ 2 - 2
arch/i386/kernel/kprobes.c

@@ -84,9 +84,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 {
-	down(&kprobe_mutex);
+	mutex_lock(&kprobe_mutex);
 	free_insn_slot(p->ainsn.insn);
 	free_insn_slot(p->ainsn.insn);
-	up(&kprobe_mutex);
+	mutex_unlock(&kprobe_mutex);
 }
 }
 
 
 static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
 static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)

+ 22 - 10
arch/i386/kernel/module.c

@@ -104,26 +104,38 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
 	return -ENOEXEC;
 	return -ENOEXEC;
 }
 }
 
 
-extern void apply_alternatives(void *start, void *end); 
-
 int module_finalize(const Elf_Ehdr *hdr,
 int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 		    struct module *me)
 {
 {
-	const Elf_Shdr *s;
+	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
 
-	/* look for .altinstructions to patch */ 
 	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 
 	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 
-		void *seg; 		
-		if (strcmp(".altinstructions", secstrings + s->sh_name))
-			continue;
-		seg = (void *)s->sh_addr; 
-		apply_alternatives(seg, seg + s->sh_size); 
-	} 	
+		if (!strcmp(".text", secstrings + s->sh_name))
+			text = s;
+		if (!strcmp(".altinstructions", secstrings + s->sh_name))
+			alt = s;
+		if (!strcmp(".smp_locks", secstrings + s->sh_name))
+			locks= s;
+	}
+
+	if (alt) {
+		/* patch .altinstructions */
+		void *aseg = (void *)alt->sh_addr;
+		apply_alternatives(aseg, aseg + alt->sh_size);
+	}
+	if (locks && text) {
+		void *lseg = (void *)locks->sh_addr;
+		void *tseg = (void *)text->sh_addr;
+		alternatives_smp_module_add(me, me->name,
+					    lseg, lseg + locks->sh_size,
+					    tseg, tseg + text->sh_size);
+	}
 	return 0;
 	return 0;
 }
 }
 
 
 void module_arch_cleanup(struct module *mod)
 void module_arch_cleanup(struct module *mod)
 {
 {
+	alternatives_smp_module_del(mod);
 }
 }

+ 4 - 3
arch/i386/kernel/mpparse.c

@@ -828,6 +828,8 @@ void __init find_smp_config (void)
 		smp_scan_config(address, 0x400);
 		smp_scan_config(address, 0x400);
 }
 }
 
 
+int es7000_plat;
+
 /* --------------------------------------------------------------------------
 /* --------------------------------------------------------------------------
                             ACPI-based MP Configuration
                             ACPI-based MP Configuration
    -------------------------------------------------------------------------- */
    -------------------------------------------------------------------------- */
@@ -935,7 +937,8 @@ void __init mp_register_ioapic (
 	mp_ioapics[idx].mpc_apicaddr = address;
 	mp_ioapics[idx].mpc_apicaddr = address;
 
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15))
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		&& !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
 		tmpid = io_apic_get_unique_id(idx, id);
 		tmpid = io_apic_get_unique_id(idx, id);
 	else
 	else
 		tmpid = id;
 		tmpid = id;
@@ -1011,8 +1014,6 @@ void __init mp_override_legacy_irq (
 	return;
 	return;
 }
 }
 
 
-int es7000_plat;
-
 void __init mp_config_acpi_legacy_irqs (void)
 void __init mp_config_acpi_legacy_irqs (void)
 {
 {
 	struct mpc_config_intsrc intsrc;
 	struct mpc_config_intsrc intsrc;

+ 3 - 3
arch/i386/kernel/nmi.c

@@ -143,7 +143,7 @@ static int __init check_nmi_watchdog(void)
 	local_irq_enable();
 	local_irq_enable();
 	mdelay((10*1000)/nmi_hz); // wait 10 ticks
 	mdelay((10*1000)/nmi_hz); // wait 10 ticks
 
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_cpu(cpu) {
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 		/* Check cpu_callin_map here because that is set
 		/* Check cpu_callin_map here because that is set
 		   after the timer is started. */
 		   after the timer is started. */
@@ -510,7 +510,7 @@ void touch_nmi_watchdog (void)
 	 * Just reset the alert counters, (other CPUs might be
 	 * Just reset the alert counters, (other CPUs might be
 	 * spinning on locks we hold):
 	 * spinning on locks we hold):
 	 */
 	 */
-	for (i = 0; i < NR_CPUS; i++)
+	for_each_cpu(i)
 		alert_counter[i] = 0;
 		alert_counter[i] = 0;
 
 
 	/*
 	/*
@@ -543,7 +543,7 @@ void nmi_watchdog_tick (struct pt_regs * regs)
 			/*
 			/*
 			 * die_nmi will return ONLY if NOTIFY_STOP happens..
 			 * die_nmi will return ONLY if NOTIFY_STOP happens..
 			 */
 			 */
-			die_nmi(regs, "NMI Watchdog detected LOCKUP");
+			die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
 	} else {
 	} else {
 		last_irq_sums[cpu] = sum;
 		last_irq_sums[cpu] = sum;
 		alert_counter[cpu] = 0;
 		alert_counter[cpu] = 0;

+ 1 - 1
arch/i386/kernel/process.c

@@ -295,7 +295,7 @@ void show_regs(struct pt_regs * regs)
 	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
 	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
 	print_symbol("EIP is at %s\n", regs->eip);
 	print_symbol("EIP is at %s\n", regs->eip);
 
 
-	if (user_mode(regs))
+	if (user_mode_vm(regs))
 		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
 		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
 	printk(" EFLAGS: %08lx    %s  (%s %.*s)\n",
 	printk(" EFLAGS: %08lx    %s  (%s %.*s)\n",
 	       regs->eflags, print_tainted(), system_utsname.release,
 	       regs->eflags, print_tainted(), system_utsname.release,

+ 2 - 2
arch/i386/kernel/ptrace.c

@@ -34,10 +34,10 @@
 
 
 /*
 /*
  * Determines which flags the user has access to [1 = access, 0 = no access].
  * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9).
+ * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9).
  * Also masks reserved bits (31-22, 15, 5, 3, 1).
  * Also masks reserved bits (31-22, 15, 5, 3, 1).
  */
  */
-#define FLAG_MASK 0x00054dd5
+#define FLAG_MASK 0x00050dd5
 
 
 /* set's the trap flag. */
 /* set's the trap flag. */
 #define TRAP_FLAG 0x100
 #define TRAP_FLAG 0x100

+ 4 - 4
arch/i386/kernel/semaphore.c

@@ -110,11 +110,11 @@ asm(
 ".align	4\n"
 ".align	4\n"
 ".globl	__write_lock_failed\n"
 ".globl	__write_lock_failed\n"
 "__write_lock_failed:\n\t"
 "__write_lock_failed:\n\t"
-	LOCK "addl	$" RW_LOCK_BIAS_STR ",(%eax)\n"
+	LOCK_PREFIX "addl	$" RW_LOCK_BIAS_STR ",(%eax)\n"
 "1:	rep; nop\n\t"
 "1:	rep; nop\n\t"
 	"cmpl	$" RW_LOCK_BIAS_STR ",(%eax)\n\t"
 	"cmpl	$" RW_LOCK_BIAS_STR ",(%eax)\n\t"
 	"jne	1b\n\t"
 	"jne	1b\n\t"
-	LOCK "subl	$" RW_LOCK_BIAS_STR ",(%eax)\n\t"
+	LOCK_PREFIX "subl	$" RW_LOCK_BIAS_STR ",(%eax)\n\t"
 	"jnz	__write_lock_failed\n\t"
 	"jnz	__write_lock_failed\n\t"
 	"ret"
 	"ret"
 );
 );
@@ -124,11 +124,11 @@ asm(
 ".align	4\n"
 ".align	4\n"
 ".globl	__read_lock_failed\n"
 ".globl	__read_lock_failed\n"
 "__read_lock_failed:\n\t"
 "__read_lock_failed:\n\t"
-	LOCK "incl	(%eax)\n"
+	LOCK_PREFIX "incl	(%eax)\n"
 "1:	rep; nop\n\t"
 "1:	rep; nop\n\t"
 	"cmpl	$1,(%eax)\n\t"
 	"cmpl	$1,(%eax)\n\t"
 	"js	1b\n\t"
 	"js	1b\n\t"
-	LOCK "decl	(%eax)\n\t"
+	LOCK_PREFIX "decl	(%eax)\n\t"
 	"js	__read_lock_failed\n\t"
 	"js	__read_lock_failed\n\t"
 	"ret"
 	"ret"
 );
 );

+ 10 - 108
arch/i386/kernel/setup.c

@@ -1377,101 +1377,6 @@ static void __init register_memory(void)
 		pci_mem_start, gapstart, gapsize);
 		pci_mem_start, gapstart, gapsize);
 }
 }
 
 
-/* Use inline assembly to define this because the nops are defined 
-   as inline assembly strings in the include files and we cannot 
-   get them easily into strings. */
-asm("\t.data\nintelnops: " 
-    GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
-    GENERIC_NOP7 GENERIC_NOP8); 
-asm("\t.data\nk8nops: " 
-    K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
-    K8_NOP7 K8_NOP8); 
-asm("\t.data\nk7nops: " 
-    K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
-    K7_NOP7 K7_NOP8); 
-    
-extern unsigned char intelnops[], k8nops[], k7nops[];
-static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 
-     NULL,
-     intelnops,
-     intelnops + 1,
-     intelnops + 1 + 2,
-     intelnops + 1 + 2 + 3,
-     intelnops + 1 + 2 + 3 + 4,
-     intelnops + 1 + 2 + 3 + 4 + 5,
-     intelnops + 1 + 2 + 3 + 4 + 5 + 6,
-     intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-}; 
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
-     NULL,
-     k8nops,
-     k8nops + 1,
-     k8nops + 1 + 2,
-     k8nops + 1 + 2 + 3,
-     k8nops + 1 + 2 + 3 + 4,
-     k8nops + 1 + 2 + 3 + 4 + 5,
-     k8nops + 1 + 2 + 3 + 4 + 5 + 6,
-     k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-}; 
-static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 
-     NULL,
-     k7nops,
-     k7nops + 1,
-     k7nops + 1 + 2,
-     k7nops + 1 + 2 + 3,
-     k7nops + 1 + 2 + 3 + 4,
-     k7nops + 1 + 2 + 3 + 4 + 5,
-     k7nops + 1 + 2 + 3 + 4 + 5 + 6,
-     k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-}; 
-static struct nop { 
-     int cpuid; 
-     unsigned char **noptable; 
-} noptypes[] = { 
-     { X86_FEATURE_K8, k8_nops }, 
-     { X86_FEATURE_K7, k7_nops }, 
-     { -1, NULL }
-}; 
-
-/* Replace instructions with better alternatives for this CPU type.
-
-   This runs before SMP is initialized to avoid SMP problems with
-   self modifying code. This implies that assymetric systems where
-   APs have less capabilities than the boot processor are not handled. 
-   Tough. Make sure you disable such features by hand. */ 
-void apply_alternatives(void *start, void *end) 
-{ 
-	struct alt_instr *a; 
-	int diff, i, k;
-        unsigned char **noptable = intel_nops; 
-	for (i = 0; noptypes[i].cpuid >= 0; i++) { 
-		if (boot_cpu_has(noptypes[i].cpuid)) { 
-			noptable = noptypes[i].noptable;
-			break;
-		}
-	} 
-	for (a = start; (void *)a < end; a++) { 
-		if (!boot_cpu_has(a->cpuid))
-			continue;
-		BUG_ON(a->replacementlen > a->instrlen); 
-		memcpy(a->instr, a->replacement, a->replacementlen); 
-		diff = a->instrlen - a->replacementlen; 
-		/* Pad the rest with nops */
-		for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
-			k = diff;
-			if (k > ASM_NOP_MAX)
-				k = ASM_NOP_MAX;
-			memcpy(a->instr + i, noptable[k], k); 
-		} 
-	}
-} 
-
-void __init alternative_instructions(void)
-{
-	extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-	apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
 static char * __init machine_specific_memory_setup(void);
 static char * __init machine_specific_memory_setup(void);
 
 
 #ifdef CONFIG_MCA
 #ifdef CONFIG_MCA
@@ -1554,6 +1459,16 @@ void __init setup_arch(char **cmdline_p)
 
 
 	parse_cmdline_early(cmdline_p);
 	parse_cmdline_early(cmdline_p);
 
 
+#ifdef CONFIG_EARLY_PRINTK
+	{
+		char *s = strstr(*cmdline_p, "earlyprintk=");
+		if (s) {
+			setup_early_printk(strchr(s, '=') + 1);
+			printk("early console enabled\n");
+		}
+	}
+#endif
+
 	max_low_pfn = setup_memory();
 	max_low_pfn = setup_memory();
 
 
 	/*
 	/*
@@ -1578,19 +1493,6 @@ void __init setup_arch(char **cmdline_p)
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
 	 */
 
 
-#ifdef CONFIG_EARLY_PRINTK
-	{
-		char *s = strstr(*cmdline_p, "earlyprintk=");
-		if (s) {
-			extern void setup_early_printk(char *);
-
-			setup_early_printk(strchr(s, '=') + 1);
-			printk("early console enabled\n");
-		}
-	}
-#endif
-
-
 	dmi_scan_machine();
 	dmi_scan_machine();
 
 
 #ifdef CONFIG_X86_GENERICARCH
 #ifdef CONFIG_X86_GENERICARCH

+ 2 - 5
arch/i386/kernel/signal.c

@@ -123,7 +123,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
 	  err |= __get_user(tmp, &sc->seg);				\
 	  err |= __get_user(tmp, &sc->seg);				\
 	  loadsegment(seg,tmp); }
 	  loadsegment(seg,tmp); }
 
 
-#define	FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \
+#define	FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_RF |		 \
+			 X86_EFLAGS_OF | X86_EFLAGS_DF |		 \
 			 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
 			 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
 			 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
 			 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
 
 
@@ -582,9 +583,6 @@ static void fastcall do_signal(struct pt_regs *regs)
 	if (!user_mode(regs))
 	if (!user_mode(regs))
 		return;
 		return;
 
 
-	if (try_to_freeze())
-		goto no_signal;
-
 	if (test_thread_flag(TIF_RESTORE_SIGMASK))
 	if (test_thread_flag(TIF_RESTORE_SIGMASK))
 		oldset = &current->saved_sigmask;
 		oldset = &current->saved_sigmask;
 	else
 	else
@@ -613,7 +611,6 @@ static void fastcall do_signal(struct pt_regs *regs)
 		return;
 		return;
 	}
 	}
 
 
-no_signal:
 	/* Did we come from a system call? */
 	/* Did we come from a system call? */
 	if (regs->orig_eax >= 0) {
 	if (regs->orig_eax >= 0) {
 		/* Restart the system call - no handlers present */
 		/* Restart the system call - no handlers present */

+ 3 - 0
arch/i386/kernel/smpboot.c

@@ -899,6 +899,7 @@ static int __devinit do_boot_cpu(int apicid, int cpu)
 	unsigned short nmi_high = 0, nmi_low = 0;
 	unsigned short nmi_high = 0, nmi_low = 0;
 
 
 	++cpucount;
 	++cpucount;
+	alternatives_smp_switch(1);
 
 
 	/*
 	/*
 	 * We can't use kernel_thread since we must avoid to
 	 * We can't use kernel_thread since we must avoid to
@@ -1368,6 +1369,8 @@ void __cpu_die(unsigned int cpu)
 		/* They ack this in play_dead by setting CPU_DEAD */
 		/* They ack this in play_dead by setting CPU_DEAD */
 		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
 		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
 			printk ("CPU %d is now offline\n", cpu);
 			printk ("CPU %d is now offline\n", cpu);
+			if (1 == num_online_cpus())
+				alternatives_smp_switch(0);
 			return;
 			return;
 		}
 		}
 		msleep(100);
 		msleep(100);

+ 9 - 0
arch/i386/kernel/topology.c

@@ -41,6 +41,15 @@ int arch_register_cpu(int num){
 		parent = &node_devices[node].node;
 		parent = &node_devices[node].node;
 #endif /* CONFIG_NUMA */
 #endif /* CONFIG_NUMA */
 
 
+	/*
+	 * CPU0 cannot be offlined due to several
+	 * restrictions and assumptions in kernel. This basically
+	 * doesnt add a control file, one cannot attempt to offline
+	 * BSP.
+	 */
+	if (!num)
+		cpu_devices[num].cpu.no_control = 1;
+
 	return register_cpu(&cpu_devices[num].cpu, num, parent);
 	return register_cpu(&cpu_devices[num].cpu, num, parent);
 }
 }
 
 

+ 40 - 17
arch/i386/kernel/traps.c

@@ -99,6 +99,8 @@ int register_die_notifier(struct notifier_block *nb)
 {
 {
 	int err = 0;
 	int err = 0;
 	unsigned long flags;
 	unsigned long flags;
+
+	vmalloc_sync_all();
 	spin_lock_irqsave(&die_notifier_lock, flags);
 	spin_lock_irqsave(&die_notifier_lock, flags);
 	err = notifier_chain_register(&i386die_chain, nb);
 	err = notifier_chain_register(&i386die_chain, nb);
 	spin_unlock_irqrestore(&die_notifier_lock, flags);
 	spin_unlock_irqrestore(&die_notifier_lock, flags);
@@ -112,12 +114,30 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
 		p < (void *)tinfo + THREAD_SIZE - 3;
 		p < (void *)tinfo + THREAD_SIZE - 3;
 }
 }
 
 
-static void print_addr_and_symbol(unsigned long addr, char *log_lvl)
+/*
+ * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line.
+ */
+static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl,
+					int printed)
 {
 {
-	printk(log_lvl);
+	if (!printed)
+		printk(log_lvl);
+
+#if CONFIG_STACK_BACKTRACE_COLS == 1
 	printk(" [<%08lx>] ", addr);
 	printk(" [<%08lx>] ", addr);
+#else
+	printk(" <%08lx> ", addr);
+#endif
 	print_symbol("%s", addr);
 	print_symbol("%s", addr);
-	printk("\n");
+
+	printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS;
+
+	if (printed)
+		printk("  ");
+	else
+		printk("\n");
+
+	return printed;
 }
 }
 
 
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
@@ -125,20 +145,24 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 				char *log_lvl)
 				char *log_lvl)
 {
 {
 	unsigned long addr;
 	unsigned long addr;
+	int printed = 0; /* nr of entries already printed on current line */
 
 
 #ifdef	CONFIG_FRAME_POINTER
 #ifdef	CONFIG_FRAME_POINTER
 	while (valid_stack_ptr(tinfo, (void *)ebp)) {
 	while (valid_stack_ptr(tinfo, (void *)ebp)) {
 		addr = *(unsigned long *)(ebp + 4);
 		addr = *(unsigned long *)(ebp + 4);
-		print_addr_and_symbol(addr, log_lvl);
+		printed = print_addr_and_symbol(addr, log_lvl, printed);
 		ebp = *(unsigned long *)ebp;
 		ebp = *(unsigned long *)ebp;
 	}
 	}
 #else
 #else
 	while (valid_stack_ptr(tinfo, stack)) {
 	while (valid_stack_ptr(tinfo, stack)) {
 		addr = *stack++;
 		addr = *stack++;
 		if (__kernel_text_address(addr))
 		if (__kernel_text_address(addr))
-			print_addr_and_symbol(addr, log_lvl);
+			printed = print_addr_and_symbol(addr, log_lvl, printed);
 	}
 	}
 #endif
 #endif
+	if (printed)
+		printk("\n");
+
 	return ebp;
 	return ebp;
 }
 }
 
 
@@ -166,8 +190,7 @@ static void show_trace_log_lvl(struct task_struct *task,
 		stack = (unsigned long*)context->previous_esp;
 		stack = (unsigned long*)context->previous_esp;
 		if (!stack)
 		if (!stack)
 			break;
 			break;
-		printk(log_lvl);
-		printk(" =======================\n");
+		printk("%s =======================\n", log_lvl);
 	}
 	}
 }
 }
 
 
@@ -194,21 +217,17 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
 	for(i = 0; i < kstack_depth_to_print; i++) {
 	for(i = 0; i < kstack_depth_to_print; i++) {
 		if (kstack_end(stack))
 		if (kstack_end(stack))
 			break;
 			break;
-		if (i && ((i % 8) == 0)) {
-			printk("\n");
-			printk(log_lvl);
-			printk("       ");
-		}
+		if (i && ((i % 8) == 0))
+			printk("\n%s       ", log_lvl);
 		printk("%08lx ", *stack++);
 		printk("%08lx ", *stack++);
 	}
 	}
-	printk("\n");
-	printk(log_lvl);
-	printk("Call Trace:\n");
+	printk("\n%sCall Trace:\n", log_lvl);
 	show_trace_log_lvl(task, esp, log_lvl);
 	show_trace_log_lvl(task, esp, log_lvl);
 }
 }
 
 
 void show_stack(struct task_struct *task, unsigned long *esp)
 void show_stack(struct task_struct *task, unsigned long *esp)
 {
 {
+	printk("       ");
 	show_stack_log_lvl(task, esp, "");
 	show_stack_log_lvl(task, esp, "");
 }
 }
 
 
@@ -233,7 +252,7 @@ void show_registers(struct pt_regs *regs)
 
 
 	esp = (unsigned long) (&regs->esp);
 	esp = (unsigned long) (&regs->esp);
 	savesegment(ss, ss);
 	savesegment(ss, ss);
-	if (user_mode(regs)) {
+	if (user_mode_vm(regs)) {
 		in_kernel = 0;
 		in_kernel = 0;
 		esp = regs->esp;
 		esp = regs->esp;
 		ss = regs->xss & 0xffff;
 		ss = regs->xss & 0xffff;
@@ -333,6 +352,8 @@ void die(const char * str, struct pt_regs * regs, long err)
 	static int die_counter;
 	static int die_counter;
 	unsigned long flags;
 	unsigned long flags;
 
 
+	oops_enter();
+
 	if (die.lock_owner != raw_smp_processor_id()) {
 	if (die.lock_owner != raw_smp_processor_id()) {
 		console_verbose();
 		console_verbose();
 		spin_lock_irqsave(&die.lock, flags);
 		spin_lock_irqsave(&die.lock, flags);
@@ -385,6 +406,7 @@ void die(const char * str, struct pt_regs * regs, long err)
 		ssleep(5);
 		ssleep(5);
 		panic("Fatal exception");
 		panic("Fatal exception");
 	}
 	}
+	oops_exit();
 	do_exit(SIGSEGV);
 	do_exit(SIGSEGV);
 }
 }
 
 
@@ -623,7 +645,7 @@ void die_nmi (struct pt_regs *regs, const char *msg)
 	/* If we are in kernel we are probably nested up pretty bad
 	/* If we are in kernel we are probably nested up pretty bad
 	 * and might aswell get out now while we still can.
 	 * and might aswell get out now while we still can.
 	*/
 	*/
-	if (!user_mode(regs)) {
+	if (!user_mode_vm(regs)) {
 		current->thread.trap_no = 2;
 		current->thread.trap_no = 2;
 		crash_kexec(regs);
 		crash_kexec(regs);
 	}
 	}
@@ -694,6 +716,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
 
 
 void set_nmi_callback(nmi_callback_t callback)
 void set_nmi_callback(nmi_callback_t callback)
 {
 {
+	vmalloc_sync_all();
 	rcu_assign_pointer(nmi_callback, callback);
 	rcu_assign_pointer(nmi_callback, callback);
 }
 }
 EXPORT_SYMBOL_GPL(set_nmi_callback);
 EXPORT_SYMBOL_GPL(set_nmi_callback);

+ 20 - 0
arch/i386/kernel/vmlinux.lds.S

@@ -68,6 +68,26 @@ SECTIONS
 	*(.data.init_task)
 	*(.data.init_task)
   }
   }
 
 
+  /* might get freed after init */
+  . = ALIGN(4096);
+  __smp_alt_begin = .;
+  __smp_alt_instructions = .;
+  .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
+	*(.smp_altinstructions)
+  }
+  __smp_alt_instructions_end = .;
+  . = ALIGN(4);
+  __smp_locks = .;
+  .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+	*(.smp_locks)
+  }
+  __smp_locks_end = .;
+  .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
+	*(.smp_altinstr_replacement)
+  }
+  . = ALIGN(4096);
+  __smp_alt_end = .;
+
   /* will be freed after init */
   /* will be freed after init */
   . = ALIGN(4096);		/* Init code and data */
   . = ALIGN(4096);		/* Init code and data */
   __init_begin = .;
   __init_begin = .;

+ 3 - 0
arch/i386/kernel/vsyscall-sysenter.S

@@ -21,6 +21,9 @@
  * instruction clobbers %esp, the user's %esp won't even survive entry
  * instruction clobbers %esp, the user's %esp won't even survive entry
  * into the kernel. We store %esp in %ebp. Code in entry.S must fetch
  * into the kernel. We store %esp in %ebp. Code in entry.S must fetch
  * arg6 from the stack.
  * arg6 from the stack.
+ *
+ * You can not use this vsyscall for the clone() syscall because the
+ * three dwords on the parent stack do not get copied to the child.
  */
  */
 	.text
 	.text
 	.globl __kernel_vsyscall
 	.globl __kernel_vsyscall

+ 4 - 1
arch/i386/mach-es7000/es7000.h

@@ -83,6 +83,7 @@ struct es7000_oem_table {
 	struct psai psai;
 	struct psai psai;
 };
 };
 
 
+#ifdef CONFIG_ACPI
 struct acpi_table_sdt {
 struct acpi_table_sdt {
 	unsigned long pa;
 	unsigned long pa;
 	unsigned long count;
 	unsigned long count;
@@ -99,6 +100,9 @@ struct oem_table {
 	u32 OEMTableSize;
 	u32 OEMTableSize;
 };
 };
 
 
+extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
+#endif
+
 struct mip_reg {
 struct mip_reg {
 	unsigned long long off_0;
 	unsigned long long off_0;
 	unsigned long long off_8;
 	unsigned long long off_8;
@@ -114,7 +118,6 @@ struct mip_reg {
 #define	MIP_FUNC(VALUE) 	(VALUE & 0xff)
 #define	MIP_FUNC(VALUE) 	(VALUE & 0xff)
 
 
 extern int parse_unisys_oem (char *oemptr);
 extern int parse_unisys_oem (char *oemptr);
-extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
 extern void setup_unisys(void);
 extern void setup_unisys(void);
 extern int es7000_start_cpu(int cpu, unsigned long eip);
 extern int es7000_start_cpu(int cpu, unsigned long eip);
 extern void es7000_sw_apic(void);
 extern void es7000_sw_apic(void);

+ 2 - 4
arch/i386/mach-es7000/es7000plat.c

@@ -51,8 +51,6 @@ struct mip_reg		*host_reg;
 int 			mip_port;
 int 			mip_port;
 unsigned long		mip_addr, host_addr;
 unsigned long		mip_addr, host_addr;
 
 
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI)
-
 /*
 /*
  * GSI override for ES7000 platforms.
  * GSI override for ES7000 platforms.
  */
  */
@@ -76,8 +74,6 @@ es7000_rename_gsi(int ioapic, int gsi)
 	return gsi;
 	return gsi;
 }
 }
 
 
-#endif	/* (CONFIG_X86_IO_APIC) && (CONFIG_ACPI) */
-
 void __init
 void __init
 setup_unisys(void)
 setup_unisys(void)
 {
 {
@@ -160,6 +156,7 @@ parse_unisys_oem (char *oemptr)
 	return es7000_plat;
 	return es7000_plat;
 }
 }
 
 
+#ifdef CONFIG_ACPI
 int __init
 int __init
 find_unisys_acpi_oem_table(unsigned long *oem_addr)
 find_unisys_acpi_oem_table(unsigned long *oem_addr)
 {
 {
@@ -212,6 +209,7 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr)
 	}
 	}
 	return -1;
 	return -1;
 }
 }
+#endif
 
 
 static void
 static void
 es7000_spin(int n)
 es7000_spin(int n)

+ 138 - 72
arch/i386/mm/fault.c

@@ -214,6 +214,68 @@ static noinline void force_sig_info_fault(int si_signo, int si_code,
 
 
 fastcall void do_invalid_op(struct pt_regs *, unsigned long);
 fastcall void do_invalid_op(struct pt_regs *, unsigned long);
 
 
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
+	unsigned index = pgd_index(address);
+	pgd_t *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+
+	pgd += index;
+	pgd_k = init_mm.pgd + index;
+
+	if (!pgd_present(*pgd_k))
+		return NULL;
+
+	/*
+	 * set_pgd(pgd, *pgd_k); here would be useless on PAE
+	 * and redundant with the set_pmd() on non-PAE. As would
+	 * set_pud.
+	 */
+
+	pud = pud_offset(pgd, address);
+	pud_k = pud_offset(pgd_k, address);
+	if (!pud_present(*pud_k))
+		return NULL;
+
+	pmd = pmd_offset(pud, address);
+	pmd_k = pmd_offset(pud_k, address);
+	if (!pmd_present(*pmd_k))
+		return NULL;
+	if (!pmd_present(*pmd))
+		set_pmd(pmd, *pmd_k);
+	else
+		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+	return pmd_k;
+}
+
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ *
+ * This assumes no large pages in there.
+ */
+static inline int vmalloc_fault(unsigned long address)
+{
+	unsigned long pgd_paddr;
+	pmd_t *pmd_k;
+	pte_t *pte_k;
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 *
+	 * Do _not_ use "current" here. We might be inside
+	 * an interrupt in the middle of a task switch..
+	 */
+	pgd_paddr = read_cr3();
+	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+	if (!pmd_k)
+		return -1;
+	pte_k = pte_offset_kernel(pmd_k, address);
+	if (!pte_present(*pte_k))
+		return -1;
+	return 0;
+}
+
 /*
 /*
  * This routine handles page faults.  It determines the address,
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
  * and the problem, and then passes it off to one of the appropriate
@@ -223,6 +285,8 @@ fastcall void do_invalid_op(struct pt_regs *, unsigned long);
  *	bit 0 == 0 means no page found, 1 means protection fault
  *	bit 0 == 0 means no page found, 1 means protection fault
  *	bit 1 == 0 means read, 1 means write
  *	bit 1 == 0 means read, 1 means write
  *	bit 2 == 0 means kernel, 1 means user-mode
  *	bit 2 == 0 means kernel, 1 means user-mode
+ *	bit 3 == 1 means use of reserved bit detected
+ *	bit 4 == 1 means fault was an instruction fetch
  */
  */
 fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 				      unsigned long error_code)
 				      unsigned long error_code)
@@ -237,13 +301,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 	/* get the address */
 	/* get the address */
         address = read_cr2();
         address = read_cr2();
 
 
-	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
-					SIGSEGV) == NOTIFY_STOP)
-		return;
-	/* It's safe to allow irq's after cr2 has been saved */
-	if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
-		local_irq_enable();
-
 	tsk = current;
 	tsk = current;
 
 
 	si_code = SEGV_MAPERR;
 	si_code = SEGV_MAPERR;
@@ -259,17 +316,29 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 	 *
 	 *
 	 * This verifies that the fault happens in kernel space
 	 * This verifies that the fault happens in kernel space
 	 * (error_code & 4) == 0, and that the fault was not a
 	 * (error_code & 4) == 0, and that the fault was not a
-	 * protection error (error_code & 1) == 0.
+	 * protection error (error_code & 9) == 0.
 	 */
 	 */
-	if (unlikely(address >= TASK_SIZE)) { 
-		if (!(error_code & 5))
-			goto vmalloc_fault;
-		/* 
+	if (unlikely(address >= TASK_SIZE)) {
+		if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
+			return;
+		if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+						SIGSEGV) == NOTIFY_STOP)
+			return;
+		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.
 		 * fault we could otherwise deadlock.
 		 */
 		 */
 		goto bad_area_nosemaphore;
 		goto bad_area_nosemaphore;
-	} 
+	}
+
+	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+					SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
+	   fault has been handled. */
+	if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
+		local_irq_enable();
 
 
 	mm = tsk->mm;
 	mm = tsk->mm;
 
 
@@ -440,24 +509,31 @@ no_context:
 
 
 	bust_spinlocks(1);
 	bust_spinlocks(1);
 
 
-#ifdef CONFIG_X86_PAE
-	if (error_code & 16) {
-		pte_t *pte = lookup_address(address);
+	if (oops_may_print()) {
+	#ifdef CONFIG_X86_PAE
+		if (error_code & 16) {
+			pte_t *pte = lookup_address(address);
 
 
-		if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
-			printk(KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", current->uid);
+			if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
+				printk(KERN_CRIT "kernel tried to execute "
+					"NX-protected page - exploit attempt? "
+					"(uid: %d)\n", current->uid);
+		}
+	#endif
+		if (address < PAGE_SIZE)
+			printk(KERN_ALERT "BUG: unable to handle kernel NULL "
+					"pointer dereference");
+		else
+			printk(KERN_ALERT "BUG: unable to handle kernel paging"
+					" request");
+		printk(" at virtual address %08lx\n",address);
+		printk(KERN_ALERT " printing eip:\n");
+		printk("%08lx\n", regs->eip);
 	}
 	}
-#endif
-	if (address < PAGE_SIZE)
-		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
-	else
-		printk(KERN_ALERT "Unable to handle kernel paging request");
-	printk(" at virtual address %08lx\n",address);
-	printk(KERN_ALERT " printing eip:\n");
-	printk("%08lx\n", regs->eip);
 	page = read_cr3();
 	page = read_cr3();
 	page = ((unsigned long *) __va(page))[address >> 22];
 	page = ((unsigned long *) __va(page))[address >> 22];
-	printk(KERN_ALERT "*pde = %08lx\n", page);
+	if (oops_may_print())
+		printk(KERN_ALERT "*pde = %08lx\n", page);
 	/*
 	/*
 	 * We must not directly access the pte in the highpte
 	 * We must not directly access the pte in the highpte
 	 * case, the page table might be allocated in highmem.
 	 * case, the page table might be allocated in highmem.
@@ -465,7 +541,7 @@ no_context:
 	 * it's allocated already.
 	 * it's allocated already.
 	 */
 	 */
 #ifndef CONFIG_HIGHPTE
 #ifndef CONFIG_HIGHPTE
-	if (page & 1) {
+	if ((page & 1) && oops_may_print()) {
 		page &= PAGE_MASK;
 		page &= PAGE_MASK;
 		address &= 0x003ff000;
 		address &= 0x003ff000;
 		page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
 		page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
@@ -510,51 +586,41 @@ do_sigbus:
 	tsk->thread.error_code = error_code;
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 14;
 	tsk->thread.trap_no = 14;
 	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
 	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
-	return;
-
-vmalloc_fault:
-	{
-		/*
-		 * Synchronize this task's top level page-table
-		 * with the 'reference' page table.
-		 *
-		 * Do _not_ use "tsk" here. We might be inside
-		 * an interrupt in the middle of a task switch..
-		 */
-		int index = pgd_index(address);
-		unsigned long pgd_paddr;
-		pgd_t *pgd, *pgd_k;
-		pud_t *pud, *pud_k;
-		pmd_t *pmd, *pmd_k;
-		pte_t *pte_k;
-
-		pgd_paddr = read_cr3();
-		pgd = index + (pgd_t *)__va(pgd_paddr);
-		pgd_k = init_mm.pgd + index;
-
-		if (!pgd_present(*pgd_k))
-			goto no_context;
-
-		/*
-		 * set_pgd(pgd, *pgd_k); here would be useless on PAE
-		 * and redundant with the set_pmd() on non-PAE. As would
-		 * set_pud.
-		 */
+}
 
 
-		pud = pud_offset(pgd, address);
-		pud_k = pud_offset(pgd_k, address);
-		if (!pud_present(*pud_k))
-			goto no_context;
-		
-		pmd = pmd_offset(pud, address);
-		pmd_k = pmd_offset(pud_k, address);
-		if (!pmd_present(*pmd_k))
-			goto no_context;
-		set_pmd(pmd, *pmd_k);
+#ifndef CONFIG_X86_PAE
+void vmalloc_sync_all(void)
+{
+	/*
+	 * Note that races in the updates of insync and start aren't
+	 * problematic: insync can only get set bits added, and updates to
+	 * start are only improving performance (without affecting correctness
+	 * if undone).
+	 */
+	static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+	static unsigned long start = TASK_SIZE;
+	unsigned long address;
 
 
-		pte_k = pte_offset_kernel(pmd_k, address);
-		if (!pte_present(*pte_k))
-			goto no_context;
-		return;
+	BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
+	for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
+		if (!test_bit(pgd_index(address), insync)) {
+			unsigned long flags;
+			struct page *page;
+
+			spin_lock_irqsave(&pgd_lock, flags);
+			for (page = pgd_list; page; page =
+					(struct page *)page->index)
+				if (!vmalloc_sync_one(page_address(page),
+								address)) {
+					BUG_ON(page != pgd_list);
+					break;
+				}
+			spin_unlock_irqrestore(&pgd_lock, flags);
+			if (!page)
+				set_bit(pgd_index(address), insync);
+		}
+		if (address == start && test_bit(pgd_index(address), insync))
+			start = address + PGDIR_SIZE;
 	}
 	}
 }
 }
+#endif

+ 22 - 23
arch/i386/mm/init.c

@@ -720,21 +720,6 @@ static int noinline do_test_wp_bit(void)
 	return flag;
 	return flag;
 }
 }
 
 
-void free_initmem(void)
-{
-	unsigned long addr;
-
-	addr = (unsigned long)(&__init_begin);
-	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(addr));
-		init_page_count(virt_to_page(addr));
-		memset((void *)addr, 0xcc, PAGE_SIZE);
-		free_page(addr);
-		totalram_pages++;
-	}
-	printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10);
-}
-
 #ifdef CONFIG_DEBUG_RODATA
 #ifdef CONFIG_DEBUG_RODATA
 
 
 extern char __start_rodata, __end_rodata;
 extern char __start_rodata, __end_rodata;
@@ -758,17 +743,31 @@ void mark_rodata_ro(void)
 }
 }
 #endif
 #endif
 
 
+void free_init_pages(char *what, unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		memset((void *)addr, 0xcc, PAGE_SIZE);
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+}
+
+void free_initmem(void)
+{
+	free_init_pages("unused kernel memory",
+			(unsigned long)(&__init_begin),
+			(unsigned long)(&__init_end));
+}
 
 
 #ifdef CONFIG_BLK_DEV_INITRD
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
 {
-	if (start < end)
-		printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
-	for (; start < end; start += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(start));
-		init_page_count(virt_to_page(start));
-		free_page(start);
-		totalram_pages++;
-	}
+	free_init_pages("initrd memory", start, end);
 }
 }
 #endif
 #endif
+

+ 2 - 5
arch/i386/oprofile/nmi_int.c

@@ -122,7 +122,7 @@ static void nmi_save_registers(void * dummy)
 static void free_msrs(void)
 static void free_msrs(void)
 {
 {
 	int i;
 	int i;
-	for (i = 0; i < NR_CPUS; ++i) {
+	for_each_cpu(i) {
 		kfree(cpu_msrs[i].counters);
 		kfree(cpu_msrs[i].counters);
 		cpu_msrs[i].counters = NULL;
 		cpu_msrs[i].counters = NULL;
 		kfree(cpu_msrs[i].controls);
 		kfree(cpu_msrs[i].controls);
@@ -138,10 +138,7 @@ static int allocate_msrs(void)
 	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
 	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
 
 
 	int i;
 	int i;
-	for (i = 0; i < NR_CPUS; ++i) {
-		if (!cpu_online(i))
-			continue;
-
+	for_each_online_cpu(i) {
 		cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
 		cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
 		if (!cpu_msrs[i].counters) {
 		if (!cpu_msrs[i].counters) {
 			success = 0;
 			success = 0;

+ 1 - 6
arch/ia64/hp/sim/simserial.c

@@ -46,11 +46,6 @@
 #define KEYBOARD_INTR	3	/* must match with simulator! */
 #define KEYBOARD_INTR	3	/* must match with simulator! */
 
 
 #define NR_PORTS	1	/* only one port for now */
 #define NR_PORTS	1	/* only one port for now */
-#define SERIAL_INLINE	1
-
-#ifdef SERIAL_INLINE
-#define _INLINE_ inline
-#endif
 
 
 #define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT)
 #define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT)
 
 
@@ -237,7 +232,7 @@ static void rs_put_char(struct tty_struct *tty, unsigned char ch)
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 }
 }
 
 
-static _INLINE_ void transmit_chars(struct async_struct *info, int *intr_done)
+static void transmit_chars(struct async_struct *info, int *intr_done)
 {
 {
 	int count;
 	int count;
 	unsigned long flags;
 	unsigned long flags;

+ 4 - 6
arch/m32r/kernel/irq.c

@@ -37,9 +37,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 
 	if (i == 0) {
 	if (i == 0) {
 		seq_printf(p, "           ");
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 		seq_putc(p, '\n');
 	}
 	}
 
 
@@ -52,9 +51,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, "  %s", action->name);
 		seq_printf(p, "  %s", action->name);

+ 1 - 3
arch/m68k/bvme6000/rtc.c

@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/mc146818rtc.h>	/* For struct rtc_time and ioctls, etc */
 #include <linux/mc146818rtc.h>	/* For struct rtc_time and ioctls, etc */
 #include <linux/smp_lock.h>
 #include <linux/smp_lock.h>
+#include <linux/bcd.h>
 #include <asm/bvme6000hw.h>
 #include <asm/bvme6000hw.h>
 
 
 #include <asm/io.h>
 #include <asm/io.h>
@@ -32,9 +33,6 @@
  *	ioctls.
  *	ioctls.
  */
  */
 
 
-#define BCD2BIN(val) (((val)&15) + ((val)>>4)*10)
-#define BIN2BCD(val) ((((val)/10)<<4) + (val)%10)
-
 static unsigned char days_in_mo[] =
 static unsigned char days_in_mo[] =
 {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
 {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
 
 

+ 4 - 6
arch/mips/kernel/irq.c

@@ -68,9 +68,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 
 	if (i == 0) {
 	if (i == 0) {
 		seq_printf(p, "           ");
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 		seq_putc(p, '\n');
 	}
 	}
 
 
@@ -83,9 +82,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, "  %s", action->name);
 		seq_printf(p, "  %s", action->name);

+ 2 - 2
arch/mips/kernel/smp.c

@@ -167,8 +167,8 @@ int smp_call_function (void (*func) (void *info), void *info, int retry,
 	mb();
 	mb();
 
 
 	/* Send a message to all other CPUs and wait for them to respond */
 	/* Send a message to all other CPUs and wait for them to respond */
-	for (i = 0; i < NR_CPUS; i++)
-		if (cpu_online(i) && i != cpu)
+	for_each_online_cpu(i)
+		if (i != cpu)
 			core_send_ipi(i, SMP_CALL_FUNCTION);
 			core_send_ipi(i, SMP_CALL_FUNCTION);
 
 
 	/* Wait for response */
 	/* Wait for response */

+ 1 - 4
arch/mips/sgi-ip27/ip27-irq.c

@@ -88,12 +88,9 @@ static inline int find_level(cpuid_t *cpunum, int irq)
 {
 {
 	int cpu, i;
 	int cpu, i;
 
 
-	for (cpu = 0; cpu <= NR_CPUS; cpu++) {
+	for_each_online_cpu(cpu) {
 		struct slice_data *si = cpu_data[cpu].data;
 		struct slice_data *si = cpu_data[cpu].data;
 
 
-		if (!cpu_online(cpu))
-			continue;
-
 		for (i = BASE_PCI_IRQ; i < LEVELS_PER_SLICE; i++)
 		for (i = BASE_PCI_IRQ; i < LEVELS_PER_SLICE; i++)
 			if (si->level_to_irq[i] == irq) {
 			if (si->level_to_irq[i] == irq) {
 				*cpunum = cpu;
 				*cpunum = cpu;

+ 10 - 15
arch/parisc/kernel/smp.c

@@ -298,8 +298,8 @@ send_IPI_allbutself(enum ipi_message_type op)
 {
 {
 	int i;
 	int i;
 	
 	
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_online(i) && i != smp_processor_id())
+	for_each_online_cpu(i) {
+		if (i != smp_processor_id())
 			send_IPI_single(i, op);
 			send_IPI_single(i, op);
 	}
 	}
 }
 }
@@ -643,14 +643,13 @@ int sys_cpus(int argc, char **argv)
 	if ( argc == 1 ){
 	if ( argc == 1 ){
 	
 	
 #ifdef DUMP_MORE_STATE
 #ifdef DUMP_MORE_STATE
-		for(i=0; i<NR_CPUS; i++) {
+		for_each_online_cpu(i) {
 			int cpus_per_line = 4;
 			int cpus_per_line = 4;
-			if(cpu_online(i)) {
-				if (j++ % cpus_per_line)
-					printk(" %3d",i);
-				else
-					printk("\n %3d",i);
-			}
+
+			if (j++ % cpus_per_line)
+				printk(" %3d",i);
+			else
+				printk("\n %3d",i);
 		}
 		}
 		printk("\n"); 
 		printk("\n"); 
 #else
 #else
@@ -659,9 +658,7 @@ int sys_cpus(int argc, char **argv)
 	} else if((argc==2) && !(strcmp(argv[1],"-l"))) {
 	} else if((argc==2) && !(strcmp(argv[1],"-l"))) {
 		printk("\nCPUSTATE  TASK CPUNUM CPUID HARDCPU(HPA)\n");
 		printk("\nCPUSTATE  TASK CPUNUM CPUID HARDCPU(HPA)\n");
 #ifdef DUMP_MORE_STATE
 #ifdef DUMP_MORE_STATE
-		for(i=0;i<NR_CPUS;i++) {
-			if (!cpu_online(i))
-				continue;
+		for_each_online_cpu(i) {
 			if (cpu_data[i].cpuid != NO_PROC_ID) {
 			if (cpu_data[i].cpuid != NO_PROC_ID) {
 				switch(cpu_data[i].state) {
 				switch(cpu_data[i].state) {
 					case STATE_RENDEZVOUS:
 					case STATE_RENDEZVOUS:
@@ -695,9 +692,7 @@ int sys_cpus(int argc, char **argv)
 	} else if ((argc==2) && !(strcmp(argv[1],"-s"))) { 
 	} else if ((argc==2) && !(strcmp(argv[1],"-s"))) { 
 #ifdef DUMP_MORE_STATE
 #ifdef DUMP_MORE_STATE
      		printk("\nCPUSTATE   CPUID\n");
      		printk("\nCPUSTATE   CPUID\n");
-		for (i=0;i<NR_CPUS;i++) {
-			if (!cpu_online(i))
-				continue;
+		for_each_online_cpu(i) {
 			if (cpu_data[i].cpuid != NO_PROC_ID) {
 			if (cpu_data[i].cpuid != NO_PROC_ID) {
 				switch(cpu_data[i].state) {
 				switch(cpu_data[i].state) {
 					case STATE_RENDEZVOUS:
 					case STATE_RENDEZVOUS:

+ 32 - 6
arch/powerpc/Kconfig

@@ -127,6 +127,12 @@ config PPC_83xx
 	select 83xx
 	select 83xx
 	select PPC_FPU
 	select PPC_FPU
 
 
+config PPC_85xx
+	bool "Freescale 85xx"
+	select E500
+	select FSL_SOC
+	select 85xx
+
 config 40x
 config 40x
 	bool "AMCC 40x"
 	bool "AMCC 40x"
 
 
@@ -139,8 +145,6 @@ config 8xx
 config E200
 config E200
 	bool "Freescale e200"
 	bool "Freescale e200"
 
 
-config E500
-	bool "Freescale e500"
 endchoice
 endchoice
 
 
 config POWER4_ONLY
 config POWER4_ONLY
@@ -168,6 +172,13 @@ config 6xx
 config 83xx
 config 83xx
 	bool
 	bool
 
 
+# this is temp to handle compat with arch=ppc
+config 85xx
+	bool
+
+config E500
+	bool
+
 config PPC_FPU
 config PPC_FPU
 	bool
 	bool
 	default y if PPC64
 	default y if PPC64
@@ -217,6 +228,7 @@ config ALTIVEC
 config SPE
 config SPE
 	bool "SPE Support"
 	bool "SPE Support"
 	depends on E200 || E500
 	depends on E200 || E500
+	default y
 	---help---
 	---help---
 	  This option enables kernel support for the Signal Processing
 	  This option enables kernel support for the Signal Processing
 	  Extensions (SPE) to the PowerPC processor. The kernel currently
 	  Extensions (SPE) to the PowerPC processor. The kernel currently
@@ -238,6 +250,21 @@ config PPC_STD_MMU_32
 	def_bool y
 	def_bool y
 	depends on PPC_STD_MMU && PPC32
 	depends on PPC_STD_MMU && PPC32
 
 
+config VIRT_CPU_ACCOUNTING
+	bool "Deterministic task and CPU time accounting"
+	depends on PPC64
+	default y
+	help
+	  Select this option to enable more accurate task and CPU time
+	  accounting.  This is done by reading a CPU counter on each
+	  kernel entry and exit and on transitions within the kernel
+	  between system, softirq and hardirq state, so there is a
+	  small performance impact.  This also enables accounting of
+	  stolen time on logically-partitioned systems running on
+	  IBM POWER5-based machines.
+
+	  If in doubt, say Y here.
+
 config SMP
 config SMP
 	depends on PPC_STD_MMU
 	depends on PPC_STD_MMU
 	bool "Symmetric multi-processing support"
 	bool "Symmetric multi-processing support"
@@ -734,13 +761,12 @@ config GENERIC_ISA_DMA
 
 
 config PPC_I8259
 config PPC_I8259
 	bool
 	bool
-	default y if 85xx
 	default n
 	default n
 
 
 config PPC_INDIRECT_PCI
 config PPC_INDIRECT_PCI
 	bool
 	bool
 	depends on PCI
 	depends on PCI
-	default y if 40x || 44x || 85xx
+	default y if 40x || 44x
 	default n
 	default n
 
 
 config EISA
 config EISA
@@ -757,8 +783,8 @@ config MCA
 	bool
 	bool
 
 
 config PCI
 config PCI
-	bool "PCI support" if 40x || CPM2 || PPC_83xx || 85xx || PPC_MPC52xx || (EMBEDDED && PPC_ISERIES)
-	default y if !40x && !CPM2 && !8xx && !APUS && !PPC_83xx && !85xx
+	bool "PCI support" if 40x || CPM2 || PPC_83xx || PPC_85xx || PPC_MPC52xx || (EMBEDDED && PPC_ISERIES)
+	default y if !40x && !CPM2 && !8xx && !APUS && !PPC_83xx && !PPC_85xx
 	default PCI_PERMEDIA if !4xx && !CPM2 && !8xx && APUS
 	default PCI_PERMEDIA if !4xx && !CPM2 && !8xx && APUS
 	default PCI_QSPAN if !4xx && !CPM2 && 8xx
 	default PCI_QSPAN if !4xx && !CPM2 && 8xx
 	help
 	help

+ 1 - 1
arch/powerpc/Makefile

@@ -148,7 +148,7 @@ all: $(KBUILD_IMAGE)
 
 
 CPPFLAGS_vmlinux.lds	:= -Upowerpc
 CPPFLAGS_vmlinux.lds	:= -Upowerpc
 
 
-BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm uImage
+BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm uImage vmlinux.bin
 
 
 .PHONY: $(BOOT_TARGETS)
 .PHONY: $(BOOT_TARGETS)
 
 

+ 0 - 2
arch/powerpc/boot/install.sh

@@ -1,7 +1,5 @@
 #!/bin/sh
 #!/bin/sh
 #
 #
-# arch/ppc64/boot/install.sh
-#
 # This file is subject to the terms and conditions of the GNU General Public
 # This file is subject to the terms and conditions of the GNU General Public
 # License.  See the file "COPYING" in the main directory of this archive
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
 # for more details.

+ 2 - 2
arch/powerpc/boot/main.c

@@ -152,7 +152,7 @@ static int is_elf64(void *hdr)
 	elf64ph = (Elf64_Phdr *)((unsigned long)elf64 +
 	elf64ph = (Elf64_Phdr *)((unsigned long)elf64 +
 				 (unsigned long)elf64->e_phoff);
 				 (unsigned long)elf64->e_phoff);
 	for (i = 0; i < (unsigned int)elf64->e_phnum; i++, elf64ph++)
 	for (i = 0; i < (unsigned int)elf64->e_phnum; i++, elf64ph++)
-		if (elf64ph->p_type == PT_LOAD && elf64ph->p_offset != 0)
+		if (elf64ph->p_type == PT_LOAD)
 			break;
 			break;
 	if (i >= (unsigned int)elf64->e_phnum)
 	if (i >= (unsigned int)elf64->e_phnum)
 		return 0;
 		return 0;
@@ -193,7 +193,7 @@ static int is_elf32(void *hdr)
 	elf32 = (Elf32_Ehdr *)elfheader;
 	elf32 = (Elf32_Ehdr *)elfheader;
 	elf32ph = (Elf32_Phdr *) ((unsigned long)elf32 + elf32->e_phoff);
 	elf32ph = (Elf32_Phdr *) ((unsigned long)elf32 + elf32->e_phoff);
 	for (i = 0; i < elf32->e_phnum; i++, elf32ph++)
 	for (i = 0; i < elf32->e_phnum; i++, elf32ph++)
-		if (elf32ph->p_type == PT_LOAD && elf32ph->p_offset != 0)
+		if (elf32ph->p_type == PT_LOAD)
 			break;
 			break;
 	if (i >= elf32->e_phnum)
 	if (i >= elf32->e_phnum)
 		return 0;
 		return 0;

+ 721 - 0
arch/powerpc/configs/mpc8540_ads_defconfig

@@ -0,0 +1,721 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 
+# Sat Jan 14 15:57:54 2006
+#
+# CONFIG_PPC64 is not set
+CONFIG_PPC32=y
+CONFIG_PPC_MERGE=y
+CONFIG_MMU=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_PPC=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_GENERIC_NVRAM=y
+CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_PPC_OF=y
+CONFIG_PPC_UDBG_16550=y
+# CONFIG_GENERIC_TBSYNC is not set
+
+#
+# Processor support
+#
+# CONFIG_CLASSIC32 is not set
+# CONFIG_PPC_52xx is not set
+# CONFIG_PPC_82xx is not set
+# CONFIG_PPC_83xx is not set
+CONFIG_PPC_85xx=y
+# CONFIG_40x is not set
+# CONFIG_44x is not set
+# CONFIG_8xx is not set
+# CONFIG_E200 is not set
+CONFIG_85xx=y
+CONFIG_E500=y
+CONFIG_BOOKE=y
+CONFIG_FSL_BOOKE=y
+# CONFIG_PHYS_64BIT is not set
+CONFIG_SPE=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_EMBEDDED=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+CONFIG_SLAB=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+# CONFIG_SLOB is not set
+
+#
+# Loadable module support
+#
+# CONFIG_MODULES is not set
+
+#
+# Block layer
+#
+# CONFIG_LBD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+CONFIG_MPIC=y
+# CONFIG_WANT_EARLY_SERIAL is not set
+
+#
+# Platform support
+#
+CONFIG_MPC8540_ADS=y
+CONFIG_MPC8540=y
+CONFIG_PPC_INDIRECT_PCI_BE=y
+
+#
+# Kernel options
+#
+# CONFIG_HIGHMEM is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=y
+CONFIG_MATH_EMULATION=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_PROC_DEVICETREE=y
+# CONFIG_CMDLINE_BOOL is not set
+# CONFIG_PM is not set
+# CONFIG_SOFTWARE_SUSPEND is not set
+# CONFIG_SECCOMP is not set
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options
+#
+# CONFIG_PPC_I8259 is not set
+CONFIG_PPC_INDIRECT_PCI=y
+CONFIG_FSL_SOC=y
+# CONFIG_PCI is not set
+# CONFIG_PCI_DOMAINS is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PCI Hotplug Support
+#
+
+#
+# Advanced setup
+#
+# CONFIG_ADVANCED_OPTIONS is not set
+
+#
+# Default settings for advanced configuration options are used
+#
+CONFIG_HIGHMEM_START=0xfe000000
+CONFIG_LOWMEM_SIZE=0x30000000
+CONFIG_KERNEL_START=0xc0000000
+CONFIG_TASK_SIZE=0x80000000
+CONFIG_BOOT_LOAD=0x00800000
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+# CONFIG_IPV6 is not set
+# CONFIG_NETFILTER is not set
+
+#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+
+#
+# I2O device support
+#
+
+#
+# Macintosh device drivers
+#
+# CONFIG_WINDFARM is not set
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+
+#
+# PHY device support
+#
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_CICADA_PHY is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+
+#
+# Ethernet (1000 Mbit)
+#
+CONFIG_GIANFAR=y
+CONFIG_GFAR_NAPI=y
+
+#
+# Ethernet (10000 Mbit)
+#
+
+#
+# Token Ring devices
+#
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_NVRAM is not set
+CONFIG_GEN_RTC=y
+# CONFIG_GEN_RTC_X is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_AGP is not set
+# CONFIG_RAW_DRIVER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Hardware Monitoring support
+#
+CONFIG_HWMON=y
+# CONFIG_HWMON_VID is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia Capabilities Port drivers
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+# CONFIG_USB_ARCH_HAS_HCD is not set
+# CONFIG_USB_ARCH_HAS_OHCI is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# InfiniBand support
+#
+
+#
+# SN Devices
+#
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+# CONFIG_RELAYFS_FS is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+# CONFIG_NFS_V3 is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+
+#
+# Native Language Support
+#
+# CONFIG_NLS is not set
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+
+#
+# Instrumentation Support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+# CONFIG_MAGIC_SYSRQ is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+CONFIG_DEBUG_MUTEXES=y
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_DEBUGGER is not set
+# CONFIG_BDI_SWITCH is not set
+# CONFIG_BOOTX_TEXT is not set
+# CONFIG_PPC_EARLY_DEBUG_LPAR is not set
+# CONFIG_PPC_EARLY_DEBUG_G5 is not set
+# CONFIG_PPC_EARLY_DEBUG_RTAS is not set
+# CONFIG_PPC_EARLY_DEBUG_MAPLE is not set
+# CONFIG_PPC_EARLY_DEBUG_ISERIES is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Hardware crypto devices
+#

+ 3 - 0
arch/powerpc/kernel/asm-offsets.c

@@ -136,6 +136,9 @@ int main(void)
 	DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
 	DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
 	DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
 	DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
 	DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
 	DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
+	DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
+	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
+	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
 
 
 	DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
 	DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
 	DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
 	DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));

+ 11 - 1
arch/powerpc/kernel/cputable.c

@@ -894,7 +894,7 @@ struct cpu_spec	cpu_specs[] = {
 		.platform		= "ppc405",
 		.platform		= "ppc405",
 	},
 	},
 	{	/* Xilinx Virtex-II Pro  */
 	{	/* Xilinx Virtex-II Pro  */
-		.pvr_mask		= 0xffff0000,
+		.pvr_mask		= 0xfffff000,
 		.pvr_value		= 0x20010000,
 		.pvr_value		= 0x20010000,
 		.cpu_name		= "Virtex-II Pro",
 		.cpu_name		= "Virtex-II Pro",
 		.cpu_features		= CPU_FTRS_40X,
 		.cpu_features		= CPU_FTRS_40X,
@@ -904,6 +904,16 @@ struct cpu_spec	cpu_specs[] = {
 		.dcache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.platform		= "ppc405",
 		.platform		= "ppc405",
 	},
 	},
+	{	/* Xilinx Virtex-4 FX */
+		.pvr_mask		= 0xfffff000,
+		.pvr_value		= 0x20011000,
+		.cpu_name		= "Virtex-4 FX",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 |
+			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+	},
 	{	/* 405EP */
 	{	/* 405EP */
 		.pvr_mask		= 0xffff0000,
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x51210000,
 		.pvr_value		= 0x51210000,

+ 6 - 5
arch/powerpc/kernel/entry_64.S

@@ -1,6 +1,4 @@
 /*
 /*
- *  arch/ppc64/kernel/entry.S
- *
  *  PowerPC version 
  *  PowerPC version 
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
  *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
@@ -63,6 +61,7 @@ system_call_common:
 	std	r12,_MSR(r1)
 	std	r12,_MSR(r1)
 	std	r0,GPR0(r1)
 	std	r0,GPR0(r1)
 	std	r10,GPR1(r1)
 	std	r10,GPR1(r1)
+	ACCOUNT_CPU_USER_ENTRY(r10, r11)
 	std	r2,GPR2(r1)
 	std	r2,GPR2(r1)
 	std	r3,GPR3(r1)
 	std	r3,GPR3(r1)
 	std	r4,GPR4(r1)
 	std	r4,GPR4(r1)
@@ -170,8 +169,9 @@ syscall_error_cont:
 	stdcx.	r0,0,r1			/* to clear the reservation */
 	stdcx.	r0,0,r1			/* to clear the reservation */
 	andi.	r6,r8,MSR_PR
 	andi.	r6,r8,MSR_PR
 	ld	r4,_LINK(r1)
 	ld	r4,_LINK(r1)
-	beq-	1f			/* only restore r13 if */
-	ld	r13,GPR13(r1)		/* returning to usermode */
+	beq-	1f
+	ACCOUNT_CPU_USER_EXIT(r11, r12)
+	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */
 1:	ld	r2,GPR2(r1)
 1:	ld	r2,GPR2(r1)
 	li	r12,MSR_RI
 	li	r12,MSR_RI
 	andc	r11,r10,r12
 	andc	r11,r10,r12
@@ -322,7 +322,7 @@ _GLOBAL(ret_from_fork)
  * the fork code also.
  * the fork code also.
  *
  *
  * The code which creates the new task context is in 'copy_thread'
  * The code which creates the new task context is in 'copy_thread'
- * in arch/ppc64/kernel/process.c
+ * in arch/powerpc/kernel/process.c 
  */
  */
 	.align	7
 	.align	7
 _GLOBAL(_switch)
 _GLOBAL(_switch)
@@ -486,6 +486,7 @@ restore:
 	 * userspace
 	 * userspace
 	 */
 	 */
 	beq	1f
 	beq	1f
+	ACCOUNT_CPU_USER_EXIT(r3, r4)
 	REST_GPR(13, r1)
 	REST_GPR(13, r1)
 1:
 1:
 	ld	r3,_CTR(r1)
 	ld	r3,_CTR(r1)

+ 0 - 25
arch/powerpc/kernel/firmware.c

@@ -18,28 +18,3 @@
 #include <asm/firmware.h>
 #include <asm/firmware.h>
 
 
 unsigned long ppc64_firmware_features;
 unsigned long ppc64_firmware_features;
-
-#ifdef CONFIG_PPC_PSERIES
-firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
-	{FW_FEATURE_PFT,		"hcall-pft"},
-	{FW_FEATURE_TCE,		"hcall-tce"},
-	{FW_FEATURE_SPRG0,		"hcall-sprg0"},
-	{FW_FEATURE_DABR,		"hcall-dabr"},
-	{FW_FEATURE_COPY,		"hcall-copy"},
-	{FW_FEATURE_ASR,		"hcall-asr"},
-	{FW_FEATURE_DEBUG,		"hcall-debug"},
-	{FW_FEATURE_PERF,		"hcall-perf"},
-	{FW_FEATURE_DUMP,		"hcall-dump"},
-	{FW_FEATURE_INTERRUPT,		"hcall-interrupt"},
-	{FW_FEATURE_MIGRATE,		"hcall-migrate"},
-	{FW_FEATURE_PERFMON,		"hcall-perfmon"},
-	{FW_FEATURE_CRQ,		"hcall-crq"},
-	{FW_FEATURE_VIO,		"hcall-vio"},
-	{FW_FEATURE_RDMA,		"hcall-rdma"},
-	{FW_FEATURE_LLAN,		"hcall-lLAN"},
-	{FW_FEATURE_BULK,		"hcall-bulk"},
-	{FW_FEATURE_XDABR,		"hcall-xdabr"},
-	{FW_FEATURE_MULTITCE,		"hcall-multi-tce"},
-	{FW_FEATURE_SPLPAR,		"hcall-splpar"},
-};
-#endif

+ 0 - 2
arch/powerpc/kernel/head_44x.S

@@ -1,6 +1,4 @@
 /*
 /*
- * arch/ppc/kernel/head_44x.S
- *
  * Kernel execution entry point code.
  * Kernel execution entry point code.
  *
  *
  *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
  *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>

+ 9 - 2
arch/powerpc/kernel/head_64.S

@@ -1,6 +1,4 @@
 /*
 /*
- *  arch/ppc64/kernel/head.S
- *
  *  PowerPC version
  *  PowerPC version
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *
  *
@@ -279,6 +277,7 @@ exception_marker:
 	std	r10,0(r1);		/* make stack chain pointer	*/ \
 	std	r10,0(r1);		/* make stack chain pointer	*/ \
 	std	r0,GPR0(r1);		/* save r0 in stackframe	*/ \
 	std	r0,GPR0(r1);		/* save r0 in stackframe	*/ \
 	std	r10,GPR1(r1);		/* save r1 in stackframe	*/ \
 	std	r10,GPR1(r1);		/* save r1 in stackframe	*/ \
+	ACCOUNT_CPU_USER_ENTRY(r9, r10);				   \
 	std	r2,GPR2(r1);		/* save r2 in stackframe	*/ \
 	std	r2,GPR2(r1);		/* save r2 in stackframe	*/ \
 	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe	*/ \
 	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe	*/ \
 	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe	*/ \
 	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe	*/ \
@@ -846,6 +845,14 @@ fast_exception_return:
 	ld	r11,_NIP(r1)
 	ld	r11,_NIP(r1)
 	andi.	r3,r12,MSR_RI		/* check if RI is set */
 	andi.	r3,r12,MSR_RI		/* check if RI is set */
 	beq-	unrecov_fer
 	beq-	unrecov_fer
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	andi.	r3,r12,MSR_PR
+	beq	2f
+	ACCOUNT_CPU_USER_EXIT(r3, r4)
+2:
+#endif
+
 	ld	r3,_CCR(r1)
 	ld	r3,_CCR(r1)
 	ld	r4,_LINK(r1)
 	ld	r4,_LINK(r1)
 	ld	r5,_CTR(r1)
 	ld	r5,_CTR(r1)

+ 0 - 2
arch/powerpc/kernel/head_8xx.S

@@ -1,6 +1,4 @@
 /*
 /*
- *  arch/ppc/kernel/except_8xx.S
- *
  *  PowerPC version
  *  PowerPC version
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
  *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP

+ 363 - 0
arch/powerpc/kernel/head_booke.h

@@ -0,0 +1,363 @@
+#ifndef __HEAD_BOOKE_H__
+#define __HEAD_BOOKE_H__
+
+/*
+ * Macros used for common Book-e exception handling
+ */
+
+#define SET_IVOR(vector_number, vector_label)		\
+		li	r26,vector_label@l; 		\
+		mtspr	SPRN_IVOR##vector_number,r26;	\
+		sync
+
+#define NORMAL_EXCEPTION_PROLOG						     \
+	mtspr	SPRN_SPRG0,r10;		/* save two registers to work with */\
+	mtspr	SPRN_SPRG1,r11;						     \
+	mtspr	SPRN_SPRG4W,r1;						     \
+	mfcr	r10;			/* save CR in r10 for now	   */\
+	mfspr	r11,SPRN_SRR1;		/* check whether user or kernel    */\
+	andi.	r11,r11,MSR_PR;						     \
+	beq	1f;							     \
+	mfspr	r1,SPRN_SPRG3;		/* if from user, start at top of   */\
+	lwz	r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack   */\
+	addi	r1,r1,THREAD_SIZE;					     \
+1:	subi	r1,r1,INT_FRAME_SIZE;	/* Allocate an exception frame     */\
+	mr	r11,r1;							     \
+	stw	r10,_CCR(r11);          /* save various registers	   */\
+	stw	r12,GPR12(r11);						     \
+	stw	r9,GPR9(r11);						     \
+	mfspr	r10,SPRN_SPRG0;						     \
+	stw	r10,GPR10(r11);						     \
+	mfspr	r12,SPRN_SPRG1;						     \
+	stw	r12,GPR11(r11);						     \
+	mflr	r10;							     \
+	stw	r10,_LINK(r11);						     \
+	mfspr	r10,SPRN_SPRG4R;					     \
+	mfspr	r12,SPRN_SRR0;						     \
+	stw	r10,GPR1(r11);						     \
+	mfspr	r9,SPRN_SRR1;						     \
+	stw	r10,0(r11);						     \
+	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
+	stw	r0,GPR0(r11);						     \
+	SAVE_4GPRS(3, r11);						     \
+	SAVE_2GPRS(7, r11)
+
+/* To handle the additional exception priority levels on 40x and Book-E
+ * processors we allocate a 4k stack per additional priority level. The various
+ * head_xxx.S files allocate space (exception_stack_top) for each priority's
+ * stack times the number of CPUs
+ *
+ * On 40x critical is the only additional level
+ * On 44x/e500 we have critical and machine check
+ * On e200 we have critical and debug (machine check occurs via critical)
+ *
+ * Additionally we reserve a SPRG for each priority level so we can free up a
+ * GPR to use as the base for indirect access to the exception stacks.  This
+ * is necessary since the MMU is always on, for Book-E parts, and the stacks
+ * are offset from KERNELBASE.
+ *
+ */
+#define BOOKE_EXCEPTION_STACK_SIZE	(8192)
+
+/* CRIT_SPRG only used in critical exception handling */
+#define CRIT_SPRG	SPRN_SPRG2
+/* MCHECK_SPRG only used in machine check exception handling */
+#define MCHECK_SPRG	SPRN_SPRG6W
+
+#define MCHECK_STACK_TOP	(exception_stack_top - 4096)
+#define CRIT_STACK_TOP		(exception_stack_top)
+
+/* only on e200 for now */
+#define DEBUG_STACK_TOP		(exception_stack_top - 4096)
+#define DEBUG_SPRG		SPRN_SPRG6W
+
+#ifdef CONFIG_SMP
+#define BOOKE_LOAD_EXC_LEVEL_STACK(level)		\
+	mfspr	r8,SPRN_PIR;				\
+	mulli	r8,r8,BOOKE_EXCEPTION_STACK_SIZE;	\
+	neg	r8,r8;					\
+	addis	r8,r8,level##_STACK_TOP@ha;		\
+	addi	r8,r8,level##_STACK_TOP@l
+#else
+#define BOOKE_LOAD_EXC_LEVEL_STACK(level)		\
+	lis	r8,level##_STACK_TOP@h;			\
+	ori	r8,r8,level##_STACK_TOP@l
+#endif
+
+/*
+ * Exception prolog for critical/machine check exceptions.  This is a
+ * little different from the normal exception prolog above since a
+ * critical/machine check exception can potentially occur at any point
+ * during normal exception processing. Thus we cannot use the same SPRG
+ * registers as the normal prolog above. Instead we use a portion of the
+ * critical/machine check exception stack at low physical addresses.
+ */
+#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \
+	mtspr	exc_level##_SPRG,r8;					     \
+	BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
+	stw	r10,GPR10-INT_FRAME_SIZE(r8);				     \
+	stw	r11,GPR11-INT_FRAME_SIZE(r8);				     \
+	mfcr	r10;			/* save CR in r10 for now	   */\
+	mfspr	r11,exc_level_srr1;	/* check whether user or kernel    */\
+	andi.	r11,r11,MSR_PR;						     \
+	mr	r11,r8;							     \
+	mfspr	r8,exc_level##_SPRG;					     \
+	beq	1f;							     \
+	/* COMING FROM USER MODE */					     \
+	mfspr	r11,SPRN_SPRG3;		/* if from user, start at top of   */\
+	lwz	r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+	addi	r11,r11,THREAD_SIZE;					     \
+1:	subi	r11,r11,INT_FRAME_SIZE;	/* Allocate an exception frame     */\
+	stw	r10,_CCR(r11);          /* save various registers	   */\
+	stw	r12,GPR12(r11);						     \
+	stw	r9,GPR9(r11);						     \
+	mflr	r10;							     \
+	stw	r10,_LINK(r11);						     \
+	mfspr	r12,SPRN_DEAR;		/* save DEAR and ESR in the frame  */\
+	stw	r12,_DEAR(r11);		/* since they may have had stuff   */\
+	mfspr	r9,SPRN_ESR;		/* in them at the point where the  */\
+	stw	r9,_ESR(r11);		/* exception was taken		   */\
+	mfspr	r12,exc_level_srr0;					     \
+	stw	r1,GPR1(r11);						     \
+	mfspr	r9,exc_level_srr1;					     \
+	stw	r1,0(r11);						     \
+	mr	r1,r11;							     \
+	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
+	stw	r0,GPR0(r11);						     \
+	SAVE_4GPRS(3, r11);						     \
+	SAVE_2GPRS(7, r11)
+
+#define CRITICAL_EXCEPTION_PROLOG \
+		EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1)
+#define DEBUG_EXCEPTION_PROLOG \
+		EXC_LEVEL_EXCEPTION_PROLOG(DEBUG, SPRN_DSRR0, SPRN_DSRR1)
+#define MCHECK_EXCEPTION_PROLOG \
+		EXC_LEVEL_EXCEPTION_PROLOG(MCHECK, SPRN_MCSRR0, SPRN_MCSRR1)
+
+/*
+ * Exception vectors.
+ */
+#define	START_EXCEPTION(label)						     \
+        .align 5;              						     \
+label:
+
+#define FINISH_EXCEPTION(func)					\
+	bl	transfer_to_handler_full;			\
+	.long	func;						\
+	.long	ret_from_except_full
+
+#define EXCEPTION(n, label, hdlr, xfer)				\
+	START_EXCEPTION(label);					\
+	NORMAL_EXCEPTION_PROLOG;				\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
+	xfer(n, hdlr)
+
+#define CRITICAL_EXCEPTION(n, label, hdlr)			\
+	START_EXCEPTION(label);					\
+	CRITICAL_EXCEPTION_PROLOG;				\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
+	EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+			  NOCOPY, crit_transfer_to_handler, \
+			  ret_from_crit_exc)
+
+#define MCHECK_EXCEPTION(n, label, hdlr)			\
+	START_EXCEPTION(label);					\
+	MCHECK_EXCEPTION_PROLOG;				\
+	mfspr	r5,SPRN_ESR;					\
+	stw	r5,_ESR(r11);					\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
+	EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+			  NOCOPY, mcheck_transfer_to_handler,   \
+			  ret_from_mcheck_exc)
+
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret)	\
+	li	r10,trap;					\
+	stw	r10,_TRAP(r11);					\
+	lis	r10,msr@h;					\
+	ori	r10,r10,msr@l;					\
+	copyee(r10, r9);					\
+	bl	tfer;		 				\
+	.long	hdlr;						\
+	.long	ret
+
+#define COPY_EE(d, s)		rlwimi d,s,0,16,16
+#define NOCOPY(d, s)
+
+#define EXC_XFER_STD(n, hdlr)		\
+	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
+			  ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr)		\
+	EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
+			  ret_from_except)
+
+#define EXC_XFER_EE(n, hdlr)		\
+	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
+			  ret_from_except_full)
+
+#define EXC_XFER_EE_LITE(n, hdlr)	\
+	EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
+			  ret_from_except)
+
+/* Check for a single step debug exception while in an exception
+ * handler before state has been saved.  This is to catch the case
+ * where an instruction that we are trying to single step causes
+ * an exception (eg ITLB/DTLB miss) and thus the first instruction of
+ * the exception handler generates a single step debug exception.
+ *
+ * If we get a debug trap on the first instruction of an exception handler,
+ * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is
+ * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR).
+ * The exception handler was handling a non-critical interrupt, so it will
+ * save (and later restore) the MSR via SPRN_CSRR1, which will still have
+ * the MSR_DE bit set.
+ */
+#ifdef CONFIG_E200
+#define DEBUG_EXCEPTION							      \
+	START_EXCEPTION(Debug);						      \
+	DEBUG_EXCEPTION_PROLOG;						      \
+									      \
+	/*								      \
+	 * If there is a single step or branch-taken exception in an	      \
+	 * exception entry sequence, it was probably meant to apply to	      \
+	 * the code where the exception occurred (since exception entry	      \
+	 * doesn't turn off DE automatically).  We simulate the effect	      \
+	 * of turning off DE on entry to an exception handler by turning      \
+	 * off DE in the CSRR1 value and clearing the debug status.	      \
+	 */								      \
+	mfspr	r10,SPRN_DBSR;		/* check single-step/branch taken */  \
+	andis.	r10,r10,DBSR_IC@h;					      \
+	beq+	2f;							      \
+									      \
+	lis	r10,KERNELBASE@h;	/* check if exception in vectors */   \
+	ori	r10,r10,KERNELBASE@l;					      \
+	cmplw	r12,r10;						      \
+	blt+	2f;			/* addr below exception vectors */    \
+									      \
+	lis	r10,Debug@h;						      \
+	ori	r10,r10,Debug@l;					      \
+	cmplw	r12,r10;						      \
+	bgt+	2f;			/* addr above exception vectors */    \
+									      \
+	/* here it looks like we got an inappropriate debug exception. */     \
+1:	rlwinm	r9,r9,0,~MSR_DE;	/* clear DE in the CDRR1 value */     \
+	lis	r10,DBSR_IC@h;		/* clear the IC event */	      \
+	mtspr	SPRN_DBSR,r10;						      \
+	/* restore state and get out */					      \
+	lwz	r10,_CCR(r11);						      \
+	lwz	r0,GPR0(r11);						      \
+	lwz	r1,GPR1(r11);						      \
+	mtcrf	0x80,r10;						      \
+	mtspr	SPRN_DSRR0,r12;						      \
+	mtspr	SPRN_DSRR1,r9;						      \
+	lwz	r9,GPR9(r11);						      \
+	lwz	r12,GPR12(r11);						      \
+	mtspr	DEBUG_SPRG,r8;						      \
+	BOOKE_LOAD_EXC_LEVEL_STACK(DEBUG); /* r8 points to the debug stack */ \
+	lwz	r10,GPR10-INT_FRAME_SIZE(r8);				      \
+	lwz	r11,GPR11-INT_FRAME_SIZE(r8);				      \
+	mfspr	r8,DEBUG_SPRG;						      \
+									      \
+	RFDI;								      \
+	b	.;							      \
+									      \
+	/* continue normal handling for a critical exception... */	      \
+2:	mfspr	r4,SPRN_DBSR;						      \
+	addi	r3,r1,STACK_FRAME_OVERHEAD;				      \
+	EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc)
+#else
+#define DEBUG_EXCEPTION							      \
+	START_EXCEPTION(Debug);						      \
+	CRITICAL_EXCEPTION_PROLOG;					      \
+									      \
+	/*								      \
+	 * If there is a single step or branch-taken exception in an	      \
+	 * exception entry sequence, it was probably meant to apply to	      \
+	 * the code where the exception occurred (since exception entry	      \
+	 * doesn't turn off DE automatically).  We simulate the effect	      \
+	 * of turning off DE on entry to an exception handler by turning      \
+	 * off DE in the CSRR1 value and clearing the debug status.	      \
+	 */								      \
+	mfspr	r10,SPRN_DBSR;		/* check single-step/branch taken */  \
+	andis.	r10,r10,DBSR_IC@h;					      \
+	beq+	2f;							      \
+									      \
+	lis	r10,KERNELBASE@h;	/* check if exception in vectors */   \
+	ori	r10,r10,KERNELBASE@l;					      \
+	cmplw	r12,r10;						      \
+	blt+	2f;			/* addr below exception vectors */    \
+									      \
+	lis	r10,Debug@h;						      \
+	ori	r10,r10,Debug@l;					      \
+	cmplw	r12,r10;						      \
+	bgt+	2f;			/* addr above exception vectors */    \
+									      \
+	/* here it looks like we got an inappropriate debug exception. */     \
+1:	rlwinm	r9,r9,0,~MSR_DE;	/* clear DE in the CSRR1 value */     \
+	lis	r10,DBSR_IC@h;		/* clear the IC event */	      \
+	mtspr	SPRN_DBSR,r10;						      \
+	/* restore state and get out */					      \
+	lwz	r10,_CCR(r11);						      \
+	lwz	r0,GPR0(r11);						      \
+	lwz	r1,GPR1(r11);						      \
+	mtcrf	0x80,r10;						      \
+	mtspr	SPRN_CSRR0,r12;						      \
+	mtspr	SPRN_CSRR1,r9;						      \
+	lwz	r9,GPR9(r11);						      \
+	lwz	r12,GPR12(r11);						      \
+	mtspr	CRIT_SPRG,r8;						      \
+	BOOKE_LOAD_EXC_LEVEL_STACK(CRIT); /* r8 points to the debug stack */  \
+	lwz	r10,GPR10-INT_FRAME_SIZE(r8);				      \
+	lwz	r11,GPR11-INT_FRAME_SIZE(r8);				      \
+	mfspr	r8,CRIT_SPRG;						      \
+									      \
+	rfci;								      \
+	b	.;							      \
+									      \
+	/* continue normal handling for a critical exception... */	      \
+2:	mfspr	r4,SPRN_DBSR;						      \
+	addi	r3,r1,STACK_FRAME_OVERHEAD;				      \
+	EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+#endif
+
+#define INSTRUCTION_STORAGE_EXCEPTION					      \
+	START_EXCEPTION(InstructionStorage)				      \
+	NORMAL_EXCEPTION_PROLOG;					      \
+	mfspr	r5,SPRN_ESR;		/* Grab the ESR and save it */	      \
+	stw	r5,_ESR(r11);						      \
+	mr      r4,r12;                 /* Pass SRR0 as arg2 */		      \
+	li      r5,0;                   /* Pass zero as arg3 */		      \
+	EXC_XFER_EE_LITE(0x0400, handle_page_fault)
+
+#define ALIGNMENT_EXCEPTION						      \
+	START_EXCEPTION(Alignment)					      \
+	NORMAL_EXCEPTION_PROLOG;					      \
+	mfspr   r4,SPRN_DEAR;           /* Grab the DEAR and save it */	      \
+	stw     r4,_DEAR(r11);						      \
+	addi    r3,r1,STACK_FRAME_OVERHEAD;				      \
+	EXC_XFER_EE(0x0600, alignment_exception)
+
+#define PROGRAM_EXCEPTION						      \
+	START_EXCEPTION(Program)					      \
+	NORMAL_EXCEPTION_PROLOG;					      \
+	mfspr	r4,SPRN_ESR;		/* Grab the ESR and save it */	      \
+	stw	r4,_ESR(r11);						      \
+	addi	r3,r1,STACK_FRAME_OVERHEAD;				      \
+	EXC_XFER_STD(0x0700, program_check_exception)
+
+#define DECREMENTER_EXCEPTION						      \
+	START_EXCEPTION(Decrementer)					      \
+	NORMAL_EXCEPTION_PROLOG;					      \
+	lis     r0,TSR_DIS@h;           /* Setup the DEC interrupt mask */    \
+	mtspr   SPRN_TSR,r0;		/* Clear the DEC interrupt */	      \
+	addi    r3,r1,STACK_FRAME_OVERHEAD;				      \
+	EXC_XFER_LITE(0x0900, timer_interrupt)
+
+#define FP_UNAVAILABLE_EXCEPTION					      \
+	START_EXCEPTION(FloatingPointUnavailable)			      \
+	NORMAL_EXCEPTION_PROLOG;					      \
+	bne	load_up_fpu;		/* if from user, just load it up */   \
+	addi	r3,r1,STACK_FRAME_OVERHEAD;				      \
+	EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+
+#endif /* __HEAD_BOOKE_H__ */

+ 4 - 2
arch/powerpc/kernel/head_fsl_booke.S

@@ -1,6 +1,4 @@
 /*
 /*
- * arch/ppc/kernel/head_fsl_booke.S
- *
  * Kernel execution entry point code.
  * Kernel execution entry point code.
  *
  *
  *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
  *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
@@ -316,6 +314,7 @@ skpinv:	addi	r6,r6,1				/* Increment */
 	 */
 	 */
 	lis	r2,DBCR0_IDM@h
 	lis	r2,DBCR0_IDM@h
 	mtspr	SPRN_DBCR0,r2
 	mtspr	SPRN_DBCR0,r2
+	isync
 	/* clear any residual debug events */
 	/* clear any residual debug events */
 	li	r2,-1
 	li	r2,-1
 	mtspr	SPRN_DBSR,r2
 	mtspr	SPRN_DBSR,r2
@@ -1002,12 +1001,15 @@ _GLOBAL(giveup_fpu)
 _GLOBAL(abort)
 _GLOBAL(abort)
 	li	r13,0
 	li	r13,0
         mtspr   SPRN_DBCR0,r13		/* disable all debug events */
         mtspr   SPRN_DBCR0,r13		/* disable all debug events */
+	isync
 	mfmsr	r13
 	mfmsr	r13
 	ori	r13,r13,MSR_DE@l	/* Enable Debug Events */
 	ori	r13,r13,MSR_DE@l	/* Enable Debug Events */
 	mtmsr	r13
 	mtmsr	r13
+	isync
         mfspr   r13,SPRN_DBCR0
         mfspr   r13,SPRN_DBCR0
         lis	r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h
         lis	r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h
         mtspr   SPRN_DBCR0,r13
         mtspr   SPRN_DBCR0,r13
+	isync
 
 
 _GLOBAL(set_context)
 _GLOBAL(set_context)
 
 

+ 0 - 2
arch/powerpc/kernel/iomap.c

@@ -1,6 +1,4 @@
 /*
 /*
- * arch/ppc64/kernel/iomap.c
- *
  * ppc64 "iomap" interface implementation.
  * ppc64 "iomap" interface implementation.
  *
  *
  * (C) Copyright 2004 Linus Torvalds
  * (C) Copyright 2004 Linus Torvalds

+ 0 - 1
arch/powerpc/kernel/iommu.c

@@ -1,5 +1,4 @@
 /*
 /*
- * arch/ppc64/kernel/iommu.c
  * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
  * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
  * 
  * 
  * Rewrite, cleanup, new allocation schemes, virtual merging: 
  * Rewrite, cleanup, new allocation schemes, virtual merging: 

+ 24 - 13
arch/powerpc/kernel/irq.c

@@ -1,6 +1,4 @@
 /*
 /*
- *  arch/ppc/kernel/irq.c
- *
  *  Derived from arch/i386/kernel/irq.c
  *  Derived from arch/i386/kernel/irq.c
  *    Copyright (C) 1992 Linus Torvalds
  *    Copyright (C) 1992 Linus Torvalds
  *  Adapted from arch/i386 by Gary Thomas
  *  Adapted from arch/i386 by Gary Thomas
@@ -137,9 +135,8 @@ skip:
 #ifdef CONFIG_TAU_INT
 #ifdef CONFIG_TAU_INT
 		if (tau_initialized){
 		if (tau_initialized){
 			seq_puts(p, "TAU: ");
 			seq_puts(p, "TAU: ");
-			for (j = 0; j < NR_CPUS; j++)
-				if (cpu_online(j))
-					seq_printf(p, "%10u ", tau_interrupts(j));
+			for_each_online_cpu(j)
+				seq_printf(p, "%10u ", tau_interrupts(j));
 			seq_puts(p, "  PowerPC             Thermal Assist (cpu temp)\n");
 			seq_puts(p, "  PowerPC             Thermal Assist (cpu temp)\n");
 		}
 		}
 #endif
 #endif
@@ -371,6 +368,7 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
 	return NO_IRQ;
 	return NO_IRQ;
 
 
 }
 }
+#endif /* CONFIG_PPC64 */
 
 
 #ifdef CONFIG_IRQSTACKS
 #ifdef CONFIG_IRQSTACKS
 struct thread_info *softirq_ctx[NR_CPUS];
 struct thread_info *softirq_ctx[NR_CPUS];
@@ -394,10 +392,24 @@ void irq_ctx_init(void)
 	}
 	}
 }
 }
 
 
+static inline void do_softirq_onstack(void)
+{
+	struct thread_info *curtp, *irqtp;
+
+	curtp = current_thread_info();
+	irqtp = softirq_ctx[smp_processor_id()];
+	irqtp->task = curtp->task;
+	call_do_softirq(irqtp);
+	irqtp->task = NULL;
+}
+
+#else
+#define do_softirq_onstack()	__do_softirq()
+#endif /* CONFIG_IRQSTACKS */
+
 void do_softirq(void)
 void do_softirq(void)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
-	struct thread_info *curtp, *irqtp;
 
 
 	if (in_interrupt())
 	if (in_interrupt())
 		return;
 		return;
@@ -405,19 +417,18 @@ void do_softirq(void)
 	local_irq_save(flags);
 	local_irq_save(flags);
 
 
 	if (local_softirq_pending()) {
 	if (local_softirq_pending()) {
-		curtp = current_thread_info();
-		irqtp = softirq_ctx[smp_processor_id()];
-		irqtp->task = curtp->task;
-		call_do_softirq(irqtp);
-		irqtp->task = NULL;
+		account_system_vtime(current);
+		local_bh_disable();
+		do_softirq_onstack();
+		account_system_vtime(current);
+		__local_bh_enable();
 	}
 	}
 
 
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 }
 }
 EXPORT_SYMBOL(do_softirq);
 EXPORT_SYMBOL(do_softirq);
 
 
-#endif /* CONFIG_IRQSTACKS */
-
+#ifdef CONFIG_PPC64
 static int __init setup_noirqdistrib(char *str)
 static int __init setup_noirqdistrib(char *str)
 {
 {
 	distribute_irqs = 0;
 	distribute_irqs = 0;

+ 2 - 3
arch/powerpc/kernel/kprobes.c

@@ -1,6 +1,5 @@
 /*
 /*
  *  Kernel Probes (KProbes)
  *  Kernel Probes (KProbes)
- *  arch/ppc64/kernel/kprobes.c
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * it under the terms of the GNU General Public License as published by
@@ -82,9 +81,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 {
-	down(&kprobe_mutex);
+	mutex_lock(&kprobe_mutex);
 	free_insn_slot(p->ainsn.insn);
 	free_insn_slot(p->ainsn.insn);
-	up(&kprobe_mutex);
+	mutex_unlock(&kprobe_mutex);
 }
 }
 
 
 static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)

+ 1 - 4
arch/powerpc/kernel/of_device.c

@@ -147,15 +147,12 @@ postcore_initcall(of_bus_driver_init);
 
 
 int of_register_driver(struct of_platform_driver *drv)
 int of_register_driver(struct of_platform_driver *drv)
 {
 {
-	int count = 0;
-
 	/* initialize common driver fields */
 	/* initialize common driver fields */
 	drv->driver.name = drv->name;
 	drv->driver.name = drv->name;
 	drv->driver.bus = &of_platform_bus_type;
 	drv->driver.bus = &of_platform_bus_type;
 
 
 	/* register with core */
 	/* register with core */
-	count = driver_register(&drv->driver);
-	return count ? count : 1;
+	return driver_register(&drv->driver);
 }
 }
 
 
 void of_unregister_driver(struct of_platform_driver *drv)
 void of_unregister_driver(struct of_platform_driver *drv)

+ 0 - 1
arch/powerpc/kernel/pci_iommu.c

@@ -1,5 +1,4 @@
 /*
 /*
- * arch/ppc64/kernel/pci_iommu.c
  * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
  * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
  *
  *
  * Rewrite, cleanup, new allocation schemes:
  * Rewrite, cleanup, new allocation schemes:

+ 0 - 1
arch/powerpc/kernel/ppc_ksyms.c

@@ -57,7 +57,6 @@ extern void machine_check_exception(struct pt_regs *regs);
 extern void alignment_exception(struct pt_regs *regs);
 extern void alignment_exception(struct pt_regs *regs);
 extern void program_check_exception(struct pt_regs *regs);
 extern void program_check_exception(struct pt_regs *regs);
 extern void single_step_exception(struct pt_regs *regs);
 extern void single_step_exception(struct pt_regs *regs);
-extern int pmac_newworld;
 extern int sys_sigreturn(struct pt_regs *regs);
 extern int sys_sigreturn(struct pt_regs *regs);
 
 
 EXPORT_SYMBOL(clear_pages);
 EXPORT_SYMBOL(clear_pages);

+ 6 - 3
arch/powerpc/kernel/process.c

@@ -1,6 +1,4 @@
 /*
 /*
- *  arch/ppc/kernel/process.c
- *
  *  Derived from "arch/i386/kernel/process.c"
  *  Derived from "arch/i386/kernel/process.c"
  *    Copyright (C) 1995  Linus Torvalds
  *    Copyright (C) 1995  Linus Torvalds
  *
  *
@@ -47,9 +45,9 @@
 #include <asm/mmu.h>
 #include <asm/mmu.h>
 #include <asm/prom.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/machdep.h>
+#include <asm/time.h>
 #ifdef CONFIG_PPC64
 #ifdef CONFIG_PPC64
 #include <asm/firmware.h>
 #include <asm/firmware.h>
-#include <asm/time.h>
 #endif
 #endif
 
 
 extern unsigned long _get_SP(void);
 extern unsigned long _get_SP(void);
@@ -330,6 +328,11 @@ struct task_struct *__switch_to(struct task_struct *prev,
 #endif
 #endif
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
+
+	account_system_vtime(current);
+	account_process_vtime(current);
+	calculate_steal_time();
+
 	last = _switch(old_thread, new_thread);
 	last = _switch(old_thread, new_thread);
 
 
 	local_irq_restore(flags);
 	local_irq_restore(flags);

+ 0 - 4
arch/powerpc/kernel/prom.c

@@ -829,10 +829,6 @@ void __init unflatten_device_tree(void)
 
 
 	/* Allocate memory for the expanded device tree */
 	/* Allocate memory for the expanded device tree */
 	mem = lmb_alloc(size + 4, __alignof__(struct device_node));
 	mem = lmb_alloc(size + 4, __alignof__(struct device_node));
-	if (!mem) {
-		DBG("Couldn't allocate memory with lmb_alloc()!\n");
-		panic("Couldn't allocate memory with lmb_alloc()!\n");
-	}
 	mem = (unsigned long) __va(mem);
 	mem = (unsigned long) __va(mem);
 
 
 	((u32 *)mem)[size / 4] = 0xdeadbeef;
 	((u32 *)mem)[size / 4] = 0xdeadbeef;

+ 0 - 2
arch/powerpc/kernel/ptrace-common.h

@@ -1,6 +1,4 @@
 /*
 /*
- *  linux/arch/ppc64/kernel/ptrace-common.h
- *
  *    Copyright (c) 2002 Stephen Rothwell, IBM Coproration
  *    Copyright (c) 2002 Stephen Rothwell, IBM Coproration
  *    Extracted from ptrace.c and ptrace32.c
  *    Extracted from ptrace.c and ptrace32.c
  *
  *

+ 0 - 1
arch/powerpc/kernel/rtas-proc.c

@@ -1,5 +1,4 @@
 /*
 /*
- *   arch/ppc64/kernel/rtas-proc.c
  *   Copyright (C) 2000 Tilmann Bitterberg
  *   Copyright (C) 2000 Tilmann Bitterberg
  *   (tilmann@bitterberg.de)
  *   (tilmann@bitterberg.de)
  *
  *

+ 0 - 2
arch/powerpc/kernel/rtas_pci.c

@@ -1,6 +1,4 @@
 /*
 /*
- * arch/ppc64/kernel/rtas_pci.c
- *
  * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
  * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
  * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
  * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
  *
  *

+ 2 - 3
arch/powerpc/kernel/setup-common.c

@@ -162,9 +162,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 #if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
 #if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
 		unsigned long bogosum = 0;
 		unsigned long bogosum = 0;
 		int i;
 		int i;
-		for (i = 0; i < NR_CPUS; ++i)
-			if (cpu_online(i))
-				bogosum += loops_per_jiffy;
+		for_each_online_cpu(i)
+			bogosum += loops_per_jiffy;
 		seq_printf(m, "total bogomips\t: %lu.%02lu\n",
 		seq_printf(m, "total bogomips\t: %lu.%02lu\n",
 			   bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
 			   bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
 #endif /* CONFIG_SMP && CONFIG_PPC32 */
 #endif /* CONFIG_SMP && CONFIG_PPC32 */

+ 2 - 3
arch/powerpc/kernel/setup_32.c

@@ -272,9 +272,8 @@ int __init ppc_init(void)
 	if ( ppc_md.progress ) ppc_md.progress("             ", 0xffff);
 	if ( ppc_md.progress ) ppc_md.progress("             ", 0xffff);
 
 
 	/* register CPU devices */
 	/* register CPU devices */
-	for (i = 0; i < NR_CPUS; i++)
-		if (cpu_possible(i))
-			register_cpu(&cpu_devices[i], i, NULL);
+	for_each_cpu(i)
+		register_cpu(&cpu_devices[i], i, NULL);
 
 
 	/* call platform init */
 	/* call platform init */
 	if (ppc_md.init != NULL) {
 	if (ppc_md.init != NULL) {

+ 0 - 2
arch/powerpc/kernel/setup_64.c

@@ -497,8 +497,6 @@ void __init setup_system(void)
 #endif
 #endif
 	printk("-----------------------------------------------------\n");
 	printk("-----------------------------------------------------\n");
 
 
-	mm_init_ppc64();
-
 	DBG(" <- setup_system()\n");
 	DBG(" <- setup_system()\n");
 }
 }
 
 

+ 0 - 2
arch/powerpc/kernel/signal_64.c

@@ -1,6 +1,4 @@
 /*
 /*
- *  linux/arch/ppc64/kernel/signal.c
- *
  *  PowerPC version 
  *  PowerPC version 
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *
  *

+ 3 - 1
arch/powerpc/kernel/smp.c

@@ -541,7 +541,7 @@ int __devinit start_secondary(void *unused)
 		smp_ops->take_timebase();
 		smp_ops->take_timebase();
 
 
 	if (system_state > SYSTEM_BOOTING)
 	if (system_state > SYSTEM_BOOTING)
-		per_cpu(last_jiffy, cpu) = get_tb();
+		snapshot_timebase();
 
 
 	spin_lock(&call_lock);
 	spin_lock(&call_lock);
 	cpu_set(cpu, cpu_online_map);
 	cpu_set(cpu, cpu_online_map);
@@ -573,6 +573,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 
 	set_cpus_allowed(current, old_mask);
 	set_cpus_allowed(current, old_mask);
 
 
+	snapshot_timebases();
+
 	dump_numa_cpu_topology();
 	dump_numa_cpu_topology();
 }
 }
 
 

+ 239 - 2
arch/powerpc/kernel/time.c

@@ -51,6 +51,7 @@
 #include <linux/percpu.h>
 #include <linux/percpu.h>
 #include <linux/rtc.h>
 #include <linux/rtc.h>
 #include <linux/jiffies.h>
 #include <linux/jiffies.h>
+#include <linux/posix-timers.h>
 
 
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
@@ -98,6 +99,7 @@ unsigned long tb_ticks_per_jiffy;
 unsigned long tb_ticks_per_usec = 100; /* sane default */
 unsigned long tb_ticks_per_usec = 100; /* sane default */
 EXPORT_SYMBOL(tb_ticks_per_usec);
 EXPORT_SYMBOL(tb_ticks_per_usec);
 unsigned long tb_ticks_per_sec;
 unsigned long tb_ticks_per_sec;
+EXPORT_SYMBOL(tb_ticks_per_sec);	/* for cputime_t conversions */
 u64 tb_to_xs;
 u64 tb_to_xs;
 unsigned tb_to_us;
 unsigned tb_to_us;
 
 
@@ -135,6 +137,224 @@ unsigned long tb_last_stamp;
  */
  */
 DEFINE_PER_CPU(unsigned long, last_jiffy);
 DEFINE_PER_CPU(unsigned long, last_jiffy);
 
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/*
+ * Factors for converting from cputime_t (timebase ticks) to
+ * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds).
+ * These are all stored as 0.64 fixed-point binary fractions.
+ */
+u64 __cputime_jiffies_factor;
+EXPORT_SYMBOL(__cputime_jiffies_factor);
+u64 __cputime_msec_factor;
+EXPORT_SYMBOL(__cputime_msec_factor);
+u64 __cputime_sec_factor;
+EXPORT_SYMBOL(__cputime_sec_factor);
+u64 __cputime_clockt_factor;
+EXPORT_SYMBOL(__cputime_clockt_factor);
+
+static void calc_cputime_factors(void)
+{
+	struct div_result res;
+
+	div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
+	__cputime_jiffies_factor = res.result_low;
+	div128_by_32(1000, 0, tb_ticks_per_sec, &res);
+	__cputime_msec_factor = res.result_low;
+	div128_by_32(1, 0, tb_ticks_per_sec, &res);
+	__cputime_sec_factor = res.result_low;
+	div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
+	__cputime_clockt_factor = res.result_low;
+}
+
+/*
+ * Read the PURR on systems that have it, otherwise the timebase.
+ */
+static u64 read_purr(void)
+{
+	if (cpu_has_feature(CPU_FTR_PURR))
+		return mfspr(SPRN_PURR);
+	return mftb();
+}
+
+/*
+ * Account time for a transition between system, hard irq
+ * or soft irq state.
+ */
+void account_system_vtime(struct task_struct *tsk)
+{
+	u64 now, delta;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	now = read_purr();
+	delta = now - get_paca()->startpurr;
+	get_paca()->startpurr = now;
+	if (!in_interrupt()) {
+		delta += get_paca()->system_time;
+		get_paca()->system_time = 0;
+	}
+	account_system_time(tsk, 0, delta);
+	local_irq_restore(flags);
+}
+
+/*
+ * Transfer the user and system times accumulated in the paca
+ * by the exception entry and exit code to the generic process
+ * user and system time records.
+ * Must be called with interrupts disabled.
+ */
+void account_process_vtime(struct task_struct *tsk)
+{
+	cputime_t utime;
+
+	utime = get_paca()->user_time;
+	get_paca()->user_time = 0;
+	account_user_time(tsk, utime);
+}
+
+static void account_process_time(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	account_process_vtime(current);
+	run_local_timers();
+	if (rcu_pending(cpu))
+		rcu_check_callbacks(cpu, user_mode(regs));
+	scheduler_tick();
+ 	run_posix_cpu_timers(current);
+}
+
+#ifdef CONFIG_PPC_SPLPAR
+/*
+ * Stuff for accounting stolen time.
+ */
+struct cpu_purr_data {
+	int	initialized;			/* thread is running */
+	u64	tb0;			/* timebase at origin time */
+	u64	purr0;			/* PURR at origin time */
+	u64	tb;			/* last TB value read */
+	u64	purr;			/* last PURR value read */
+	u64	stolen;			/* stolen time so far */
+	spinlock_t lock;
+};
+
+static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data);
+
+static void snapshot_tb_and_purr(void *data)
+{
+	struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data);
+
+	p->tb0 = mftb();
+	p->purr0 = mfspr(SPRN_PURR);
+	p->tb = p->tb0;
+	p->purr = 0;
+	wmb();
+	p->initialized = 1;
+}
+
+/*
+ * Called during boot when all cpus have come up.
+ */
+void snapshot_timebases(void)
+{
+	int cpu;
+
+	if (!cpu_has_feature(CPU_FTR_PURR))
+		return;
+	for_each_cpu(cpu)
+		spin_lock_init(&per_cpu(cpu_purr_data, cpu).lock);
+	on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1);
+}
+
+void calculate_steal_time(void)
+{
+	u64 tb, purr, t0;
+	s64 stolen;
+	struct cpu_purr_data *p0, *pme, *phim;
+	int cpu;
+
+	if (!cpu_has_feature(CPU_FTR_PURR))
+		return;
+	cpu = smp_processor_id();
+	pme = &per_cpu(cpu_purr_data, cpu);
+	if (!pme->initialized)
+		return;		/* this can happen in early boot */
+	p0 = &per_cpu(cpu_purr_data, cpu & ~1);
+	phim = &per_cpu(cpu_purr_data, cpu ^ 1);
+	spin_lock(&p0->lock);
+	tb = mftb();
+	purr = mfspr(SPRN_PURR) - pme->purr0;
+	if (!phim->initialized || !cpu_online(cpu ^ 1)) {
+		stolen = (tb - pme->tb) - (purr - pme->purr);
+	} else {
+		t0 = pme->tb0;
+		if (phim->tb0 < t0)
+			t0 = phim->tb0;
+		stolen = phim->tb - t0 - phim->purr - purr - p0->stolen;
+	}
+	if (stolen > 0) {
+		account_steal_time(current, stolen);
+		p0->stolen += stolen;
+	}
+	pme->tb = tb;
+	pme->purr = purr;
+	spin_unlock(&p0->lock);
+}
+
+/*
+ * Must be called before the cpu is added to the online map when
+ * a cpu is being brought up at runtime.
+ */
+static void snapshot_purr(void)
+{
+	int cpu;
+	u64 purr;
+	struct cpu_purr_data *p0, *pme, *phim;
+	unsigned long flags;
+
+	if (!cpu_has_feature(CPU_FTR_PURR))
+		return;
+	cpu = smp_processor_id();
+	pme = &per_cpu(cpu_purr_data, cpu);
+	p0 = &per_cpu(cpu_purr_data, cpu & ~1);
+	phim = &per_cpu(cpu_purr_data, cpu ^ 1);
+	spin_lock_irqsave(&p0->lock, flags);
+	pme->tb = pme->tb0 = mftb();
+	purr = mfspr(SPRN_PURR);
+	if (!phim->initialized) {
+		pme->purr = 0;
+		pme->purr0 = purr;
+	} else {
+		/* set p->purr and p->purr0 for no change in p0->stolen */
+		pme->purr = phim->tb - phim->tb0 - phim->purr - p0->stolen;
+		pme->purr0 = purr - pme->purr;
+	}
+	pme->initialized = 1;
+	spin_unlock_irqrestore(&p0->lock, flags);
+}
+
+#endif /* CONFIG_PPC_SPLPAR */
+
+#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
+#define calc_cputime_factors()
+#define account_process_time(regs)	update_process_times(user_mode(regs))
+#define calculate_steal_time()		do { } while (0)
+#endif
+
+#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR))
+#define snapshot_purr()			do { } while (0)
+#endif
+
+/*
+ * Called when a cpu comes up after the system has finished booting,
+ * i.e. as a result of a hotplug cpu action.
+ */
+void snapshot_timebase(void)
+{
+	__get_cpu_var(last_jiffy) = get_tb();
+	snapshot_purr();
+}
+
 void __delay(unsigned long loops)
 void __delay(unsigned long loops)
 {
 {
 	unsigned long start;
 	unsigned long start;
@@ -392,6 +612,7 @@ static void iSeries_tb_recal(void)
 						new_tb_ticks_per_jiffy, sign, tick_diff );
 						new_tb_ticks_per_jiffy, sign, tick_diff );
 				tb_ticks_per_jiffy = new_tb_ticks_per_jiffy;
 				tb_ticks_per_jiffy = new_tb_ticks_per_jiffy;
 				tb_ticks_per_sec   = new_tb_ticks_per_sec;
 				tb_ticks_per_sec   = new_tb_ticks_per_sec;
+				calc_cputime_factors();
 				div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres );
 				div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres );
 				do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
 				do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
 				tb_to_xs = divres.result_low;
 				tb_to_xs = divres.result_low;
@@ -440,6 +661,7 @@ void timer_interrupt(struct pt_regs * regs)
 	irq_enter();
 	irq_enter();
 
 
 	profile_tick(CPU_PROFILING, regs);
 	profile_tick(CPU_PROFILING, regs);
+	calculate_steal_time();
 
 
 #ifdef CONFIG_PPC_ISERIES
 #ifdef CONFIG_PPC_ISERIES
 	get_lppaca()->int_dword.fields.decr_int = 0;
 	get_lppaca()->int_dword.fields.decr_int = 0;
@@ -461,7 +683,7 @@ void timer_interrupt(struct pt_regs * regs)
 		 * is the case.
 		 * is the case.
 		 */
 		 */
 		if (!cpu_is_offline(cpu))
 		if (!cpu_is_offline(cpu))
-			update_process_times(user_mode(regs));
+			account_process_time(regs);
 
 
 		/*
 		/*
 		 * No need to check whether cpu is offline here; boot_cpuid
 		 * No need to check whether cpu is offline here; boot_cpuid
@@ -518,13 +740,27 @@ void wakeup_decrementer(void)
 void __init smp_space_timers(unsigned int max_cpus)
 void __init smp_space_timers(unsigned int max_cpus)
 {
 {
 	int i;
 	int i;
+	unsigned long half = tb_ticks_per_jiffy / 2;
 	unsigned long offset = tb_ticks_per_jiffy / max_cpus;
 	unsigned long offset = tb_ticks_per_jiffy / max_cpus;
 	unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid);
 	unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid);
 
 
 	/* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */
 	/* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */
 	previous_tb -= tb_ticks_per_jiffy;
 	previous_tb -= tb_ticks_per_jiffy;
+	/*
+	 * The stolen time calculation for POWER5 shared-processor LPAR
+	 * systems works better if the two threads' timebase interrupts
+	 * are staggered by half a jiffy with respect to each other.
+	 */
 	for_each_cpu(i) {
 	for_each_cpu(i) {
-		if (i != boot_cpuid) {
+		if (i == boot_cpuid)
+			continue;
+		if (i == (boot_cpuid ^ 1))
+			per_cpu(last_jiffy, i) =
+				per_cpu(last_jiffy, boot_cpuid) - half;
+		else if (i & 1)
+			per_cpu(last_jiffy, i) =
+				per_cpu(last_jiffy, i ^ 1) + half;
+		else {
 			previous_tb += offset;
 			previous_tb += offset;
 			per_cpu(last_jiffy, i) = previous_tb;
 			per_cpu(last_jiffy, i) = previous_tb;
 		}
 		}
@@ -720,6 +956,7 @@ void __init time_init(void)
 	tb_ticks_per_sec = ppc_tb_freq;
 	tb_ticks_per_sec = ppc_tb_freq;
 	tb_ticks_per_usec = ppc_tb_freq / 1000000;
 	tb_ticks_per_usec = ppc_tb_freq / 1000000;
 	tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
 	tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
+	calc_cputime_factors();
 
 
 	/*
 	/*
 	 * Calculate the length of each tick in ns.  It will not be
 	 * Calculate the length of each tick in ns.  It will not be

+ 0 - 2
arch/powerpc/kernel/vdso.c

@@ -1,6 +1,4 @@
 /*
 /*
- *  linux/arch/ppc64/kernel/vdso.c
- *
  *    Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
  *    Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
  *			 <benh@kernel.crashing.org>
  *			 <benh@kernel.crashing.org>
  *
  *

+ 0 - 2
arch/powerpc/lib/copypage_64.S

@@ -1,6 +1,4 @@
 /*
 /*
- * arch/ppc64/lib/copypage.S
- *
  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  *
  *
  * This program is free software; you can redistribute it and/or
  * This program is free software; you can redistribute it and/or

+ 0 - 2
arch/powerpc/lib/copyuser_64.S

@@ -1,6 +1,4 @@
 /*
 /*
- * arch/ppc64/lib/copyuser.S
- *
  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  *
  *
  * This program is free software; you can redistribute it and/or
  * This program is free software; you can redistribute it and/or

+ 11 - 3
arch/powerpc/lib/e2a.c

@@ -1,9 +1,7 @@
 /*
 /*
- *  arch/ppc64/lib/e2a.c
- *
  *  EBCDIC to ASCII conversion
  *  EBCDIC to ASCII conversion
  *
  *
- * This function moved here from arch/ppc64/kernel/viopath.c
+ * This function moved here from arch/powerpc/platforms/iseries/viopath.c 
  *
  *
  * (C) Copyright 2000-2004 IBM Corporation
  * (C) Copyright 2000-2004 IBM Corporation
  *
  *
@@ -105,4 +103,14 @@ unsigned char e2a(unsigned char x)
 }
 }
 EXPORT_SYMBOL(e2a);
 EXPORT_SYMBOL(e2a);
 
 
+unsigned char* strne2a(unsigned char *dest, const unsigned char *src, size_t n)
+{
+	int i;
+
+	n = strnlen(src, n);
 
 
+	for (i = 0; i < n; i++)
+		dest[i] = e2a(src[i]);
+
+	return dest;
+}

+ 0 - 2
arch/powerpc/lib/memcpy_64.S

@@ -1,6 +1,4 @@
 /*
 /*
- * arch/ppc64/lib/memcpy.S
- *
  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  *
  *
  * This program is free software; you can redistribute it and/or
  * This program is free software; you can redistribute it and/or

+ 0 - 2
arch/powerpc/lib/rheap.c

@@ -1,6 +1,4 @@
 /*
 /*
- * arch/ppc/syslib/rheap.c
- *
  * A Remote Heap.  Remote means that we don't touch the memory that the
  * A Remote Heap.  Remote means that we don't touch the memory that the
  * heap points to. Normal heap implementations use the memory they manage
  * heap points to. Normal heap implementations use the memory they manage
  * to place their list. We cannot do that because the memory we manage may
  * to place their list. We cannot do that because the memory we manage may

+ 0 - 2
arch/powerpc/mm/fault.c

@@ -1,6 +1,4 @@
 /*
 /*
- *  arch/ppc/mm/fault.c
- *
  *  PowerPC version
  *  PowerPC version
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *
  *

+ 0 - 2
arch/powerpc/mm/hash_low_32.S

@@ -1,6 +1,4 @@
 /*
 /*
- *  arch/ppc/kernel/hashtable.S
- *
  *  $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $
  *  $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $
  *
  *
  *  PowerPC version
  *  PowerPC version

+ 16 - 16
arch/powerpc/mm/hash_utils_64.c

@@ -169,7 +169,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 #ifdef CONFIG_PPC_ISERIES
 #ifdef CONFIG_PPC_ISERIES
 		if (_machine == PLATFORM_ISERIES_LPAR)
 		if (_machine == PLATFORM_ISERIES_LPAR)
 			ret = iSeries_hpte_insert(hpteg, va,
 			ret = iSeries_hpte_insert(hpteg, va,
-						  __pa(vaddr),
+						  paddr,
 						  tmp_mode,
 						  tmp_mode,
 						  HPTE_V_BOLTED,
 						  HPTE_V_BOLTED,
 						  psize);
 						  psize);
@@ -178,7 +178,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 #ifdef CONFIG_PPC_PSERIES
 #ifdef CONFIG_PPC_PSERIES
 		if (_machine & PLATFORM_LPAR)
 		if (_machine & PLATFORM_LPAR)
 			ret = pSeries_lpar_hpte_insert(hpteg, va,
 			ret = pSeries_lpar_hpte_insert(hpteg, va,
-						       virt_to_abs(paddr),
+						       paddr,
 						       tmp_mode,
 						       tmp_mode,
 						       HPTE_V_BOLTED,
 						       HPTE_V_BOLTED,
 						       psize);
 						       psize);
@@ -186,7 +186,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 #endif
 #endif
 #ifdef CONFIG_PPC_MULTIPLATFORM
 #ifdef CONFIG_PPC_MULTIPLATFORM
 			ret = native_hpte_insert(hpteg, va,
 			ret = native_hpte_insert(hpteg, va,
-						 virt_to_abs(paddr),
+						 paddr,
 						 tmp_mode, HPTE_V_BOLTED,
 						 tmp_mode, HPTE_V_BOLTED,
 						 psize);
 						 psize);
 #endif
 #endif
@@ -392,7 +392,7 @@ static unsigned long __init htab_get_table_size(void)
 #ifdef CONFIG_MEMORY_HOTPLUG
 #ifdef CONFIG_MEMORY_HOTPLUG
 void create_section_mapping(unsigned long start, unsigned long end)
 void create_section_mapping(unsigned long start, unsigned long end)
 {
 {
-		BUG_ON(htab_bolt_mapping(start, end, start,
+		BUG_ON(htab_bolt_mapping(start, end, __pa(start),
 			_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX,
 			_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX,
 			mmu_linear_psize));
 			mmu_linear_psize));
 }
 }
@@ -422,7 +422,7 @@ void __init htab_initialize(void)
 
 
 	htab_hash_mask = pteg_count - 1;
 	htab_hash_mask = pteg_count - 1;
 
 
-	if (platform_is_lpar()) {
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
 		/* Using a hypervisor which owns the htab */
 		/* Using a hypervisor which owns the htab */
 		htab_address = NULL;
 		htab_address = NULL;
 		_SDR1 = 0; 
 		_SDR1 = 0; 
@@ -431,7 +431,6 @@ void __init htab_initialize(void)
 		 * the absolute address space.
 		 * the absolute address space.
 		 */
 		 */
 		table = lmb_alloc(htab_size_bytes, htab_size_bytes);
 		table = lmb_alloc(htab_size_bytes, htab_size_bytes);
-		BUG_ON(table == 0);
 
 
 		DBG("Hash table allocated at %lx, size: %lx\n", table,
 		DBG("Hash table allocated at %lx, size: %lx\n", table,
 		    htab_size_bytes);
 		    htab_size_bytes);
@@ -474,21 +473,22 @@ void __init htab_initialize(void)
 
 
 		if (dart_tablebase != 0 && dart_tablebase >= base
 		if (dart_tablebase != 0 && dart_tablebase >= base
 		    && dart_tablebase < (base + size)) {
 		    && dart_tablebase < (base + size)) {
+			unsigned long dart_table_end = dart_tablebase + 16 * MB;
 			if (base != dart_tablebase)
 			if (base != dart_tablebase)
 				BUG_ON(htab_bolt_mapping(base, dart_tablebase,
 				BUG_ON(htab_bolt_mapping(base, dart_tablebase,
-							 base, mode_rw,
-							 mmu_linear_psize));
-			if ((base + size) > (dart_tablebase + 16*MB))
+							__pa(base), mode_rw,
+							mmu_linear_psize));
+			if ((base + size) > dart_table_end)
 				BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
 				BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
-							 base + size,
-							 dart_tablebase+16*MB,
+							base + size,
+							__pa(dart_table_end),
 							 mode_rw,
 							 mode_rw,
 							 mmu_linear_psize));
 							 mmu_linear_psize));
 			continue;
 			continue;
 		}
 		}
 #endif /* CONFIG_U3_DART */
 #endif /* CONFIG_U3_DART */
-		BUG_ON(htab_bolt_mapping(base, base + size, base,
-					 mode_rw, mmu_linear_psize));
+		BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
+					mode_rw, mmu_linear_psize));
        }
        }
 
 
 	/*
 	/*
@@ -505,8 +505,8 @@ void __init htab_initialize(void)
 		if (base + size >= tce_alloc_start)
 		if (base + size >= tce_alloc_start)
 			tce_alloc_start = base + size + 1;
 			tce_alloc_start = base + size + 1;
 
 
- 		BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
- 					 tce_alloc_start, mode_rw,
+		BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
+					 __pa(tce_alloc_start), mode_rw,
 					 mmu_linear_psize));
 					 mmu_linear_psize));
 	}
 	}
 
 
@@ -517,7 +517,7 @@ void __init htab_initialize(void)
 
 
 void htab_initialize_secondary(void)
 void htab_initialize_secondary(void)
 {
 {
-	if (!platform_is_lpar())
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		mtspr(SPRN_SDR1, _SDR1);
 		mtspr(SPRN_SDR1, _SDR1);
 }
 }
 
 

+ 0 - 48
arch/powerpc/mm/init_64.c

@@ -84,54 +84,6 @@
 /* max amount of RAM to use */
 /* max amount of RAM to use */
 unsigned long __max_memory;
 unsigned long __max_memory;
 
 
-/* info on what we think the IO hole is */
-unsigned long 	io_hole_start;
-unsigned long	io_hole_size;
-
-/*
- * Do very early mm setup.
- */
-void __init mm_init_ppc64(void)
-{
-#ifndef CONFIG_PPC_ISERIES
-	unsigned long i;
-#endif
-
-	ppc64_boot_msg(0x100, "MM Init");
-
-	/* This is the story of the IO hole... please, keep seated,
-	 * unfortunately, we are out of oxygen masks at the moment.
-	 * So we need some rough way to tell where your big IO hole
-	 * is. On pmac, it's between 2G and 4G, on POWER3, it's around
-	 * that area as well, on POWER4 we don't have one, etc...
-	 * We need that as a "hint" when sizing the TCE table on POWER3
-	 * So far, the simplest way that seem work well enough for us it
-	 * to just assume that the first discontinuity in our physical
-	 * RAM layout is the IO hole. That may not be correct in the future
-	 * (and isn't on iSeries but then we don't care ;)
-	 */
-
-#ifndef CONFIG_PPC_ISERIES
-	for (i = 1; i < lmb.memory.cnt; i++) {
-		unsigned long base, prevbase, prevsize;
-
-		prevbase = lmb.memory.region[i-1].base;
-		prevsize = lmb.memory.region[i-1].size;
-		base = lmb.memory.region[i].base;
-		if (base > (prevbase + prevsize)) {
-			io_hole_start = prevbase + prevsize;
-			io_hole_size = base  - (prevbase + prevsize);
-			break;
-		}
-	}
-#endif /* CONFIG_PPC_ISERIES */
-	if (io_hole_start)
-		printk("IO Hole assumed to be %lx -> %lx\n",
-		       io_hole_start, io_hole_start + io_hole_size - 1);
-
-	ppc64_boot_msg(0x100, "MM Init Done");
-}
-
 void free_initmem(void)
 void free_initmem(void)
 {
 {
 	unsigned long addr;
 	unsigned long addr;

+ 16 - 0
arch/powerpc/mm/lmb.c

@@ -31,6 +31,8 @@
 #define DBG(fmt...)
 #define DBG(fmt...)
 #endif
 #endif
 
 
+#define LMB_ALLOC_ANYWHERE	0
+
 struct lmb lmb;
 struct lmb lmb;
 
 
 void lmb_dump_all(void)
 void lmb_dump_all(void)
@@ -225,6 +227,20 @@ unsigned long __init lmb_alloc(unsigned long size, unsigned long align)
 
 
 unsigned long __init lmb_alloc_base(unsigned long size, unsigned long align,
 unsigned long __init lmb_alloc_base(unsigned long size, unsigned long align,
 				    unsigned long max_addr)
 				    unsigned long max_addr)
+{
+	unsigned long alloc;
+
+	alloc = __lmb_alloc_base(size, align, max_addr);
+
+	if (alloc == 0)
+		panic("ERROR: Failed to allocate 0x%lx bytes below 0x%lx.\n",
+				size, max_addr);
+
+	return alloc;
+}
+
+unsigned long __init __lmb_alloc_base(unsigned long size, unsigned long align,
+				    unsigned long max_addr)
 {
 {
 	long i, j;
 	long i, j;
 	unsigned long base = 0;
 	unsigned long base = 0;

+ 1 - 2
arch/powerpc/mm/mem.c

@@ -125,7 +125,7 @@ int __devinit add_memory(u64 start, u64 size)
 	nid = hot_add_scn_to_nid(start);
 	nid = hot_add_scn_to_nid(start);
 	pgdata = NODE_DATA(nid);
 	pgdata = NODE_DATA(nid);
 
 
-	start = __va(start);
+	start = (unsigned long)__va(start);
 	create_section_mapping(start, start + size);
 	create_section_mapping(start, start + size);
 
 
 	/* this should work for most non-highmem platforms */
 	/* this should work for most non-highmem platforms */
@@ -249,7 +249,6 @@ void __init do_init_bootmem(void)
 	bootmap_pages = bootmem_bootmap_pages(total_pages);
 	bootmap_pages = bootmem_bootmap_pages(total_pages);
 
 
 	start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
 	start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
-	BUG_ON(!start);
 
 
 	boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
 	boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
 
 

+ 0 - 2
arch/powerpc/mm/mmap.c

@@ -1,6 +1,4 @@
 /*
 /*
- *  linux/arch/ppc64/mm/mmap.c
- *
  *  flexible mmap layout support
  *  flexible mmap layout support
  *
  *
  * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
  * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.

+ 74 - 86
arch/powerpc/mm/numa.c

@@ -129,10 +129,12 @@ void __init get_region(unsigned int nid, unsigned long *start_pfn,
 		*start_pfn = 0;
 		*start_pfn = 0;
 }
 }
 
 
-static inline void map_cpu_to_node(int cpu, int node)
+static void __cpuinit map_cpu_to_node(int cpu, int node)
 {
 {
 	numa_cpu_lookup_table[cpu] = node;
 	numa_cpu_lookup_table[cpu] = node;
 
 
+	dbg("adding cpu %d to node %d\n", cpu, node);
+
 	if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node])))
 	if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node])))
 		cpu_set(cpu, numa_cpumask_lookup_table[node]);
 		cpu_set(cpu, numa_cpumask_lookup_table[node]);
 }
 }
@@ -153,7 +155,7 @@ static void unmap_cpu_from_node(unsigned long cpu)
 }
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 #endif /* CONFIG_HOTPLUG_CPU */
 
 
-static struct device_node *find_cpu_node(unsigned int cpu)
+static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
 {
 {
 	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
 	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
 	struct device_node *cpu_node = NULL;
 	struct device_node *cpu_node = NULL;
@@ -189,23 +191,29 @@ static int *of_get_associativity(struct device_node *dev)
 	return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
 	return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
 }
 }
 
 
-static int of_node_numa_domain(struct device_node *device)
+/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
+ * info is found.
+ */
+static int of_node_to_nid(struct device_node *device)
 {
 {
-	int numa_domain;
+	int nid = -1;
 	unsigned int *tmp;
 	unsigned int *tmp;
 
 
 	if (min_common_depth == -1)
 	if (min_common_depth == -1)
-		return 0;
+		goto out;
 
 
 	tmp = of_get_associativity(device);
 	tmp = of_get_associativity(device);
-	if (tmp && (tmp[0] >= min_common_depth)) {
-		numa_domain = tmp[min_common_depth];
-	} else {
-		dbg("WARNING: no NUMA information for %s\n",
-		    device->full_name);
-		numa_domain = 0;
-	}
-	return numa_domain;
+	if (!tmp)
+		goto out;
+
+	if (tmp[0] >= min_common_depth)
+		nid = tmp[min_common_depth];
+
+	/* POWER4 LPAR uses 0xffff as invalid node */
+	if (nid == 0xffff || nid >= MAX_NUMNODES)
+		nid = -1;
+out:
+	return nid;
 }
 }
 
 
 /*
 /*
@@ -246,8 +254,7 @@ static int __init find_min_common_depth(void)
 	if ((len >= 1) && ref_points) {
 	if ((len >= 1) && ref_points) {
 		depth = ref_points[1];
 		depth = ref_points[1];
 	} else {
 	} else {
-		dbg("WARNING: could not find NUMA "
-		    "associativity reference point\n");
+		dbg("NUMA: ibm,associativity-reference-points not found.\n");
 		depth = -1;
 		depth = -1;
 	}
 	}
 	of_node_put(rtas_root);
 	of_node_put(rtas_root);
@@ -283,9 +290,9 @@ static unsigned long __devinit read_n_cells(int n, unsigned int **buf)
  * Figure out to which domain a cpu belongs and stick it there.
  * Figure out to which domain a cpu belongs and stick it there.
  * Return the id of the domain used.
  * Return the id of the domain used.
  */
  */
-static int numa_setup_cpu(unsigned long lcpu)
+static int __cpuinit numa_setup_cpu(unsigned long lcpu)
 {
 {
-	int numa_domain = 0;
+	int nid = 0;
 	struct device_node *cpu = find_cpu_node(lcpu);
 	struct device_node *cpu = find_cpu_node(lcpu);
 
 
 	if (!cpu) {
 	if (!cpu) {
@@ -293,27 +300,16 @@ static int numa_setup_cpu(unsigned long lcpu)
 		goto out;
 		goto out;
 	}
 	}
 
 
-	numa_domain = of_node_numa_domain(cpu);
+	nid = of_node_to_nid(cpu);
 
 
-	if (numa_domain >= num_online_nodes()) {
-		/*
-		 * POWER4 LPAR uses 0xffff as invalid node,
-		 * dont warn in this case.
-		 */
-		if (numa_domain != 0xffff)
-			printk(KERN_ERR "WARNING: cpu %ld "
-			       "maps to invalid NUMA node %d\n",
-			       lcpu, numa_domain);
-		numa_domain = 0;
-	}
+	if (nid < 0 || !node_online(nid))
+		nid = any_online_node(NODE_MASK_ALL);
 out:
 out:
-	node_set_online(numa_domain);
-
-	map_cpu_to_node(lcpu, numa_domain);
+	map_cpu_to_node(lcpu, nid);
 
 
 	of_node_put(cpu);
 	of_node_put(cpu);
 
 
-	return numa_domain;
+	return nid;
 }
 }
 
 
 static int cpu_numa_callback(struct notifier_block *nfb,
 static int cpu_numa_callback(struct notifier_block *nfb,
@@ -325,10 +321,7 @@ static int cpu_numa_callback(struct notifier_block *nfb,
 
 
 	switch (action) {
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE:
-		if (min_common_depth == -1 || !numa_enabled)
-			map_cpu_to_node(lcpu, 0);
-		else
-			numa_setup_cpu(lcpu);
+		numa_setup_cpu(lcpu);
 		ret = NOTIFY_OK;
 		ret = NOTIFY_OK;
 		break;
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
 #ifdef CONFIG_HOTPLUG_CPU
@@ -375,7 +368,7 @@ static int __init parse_numa_properties(void)
 {
 {
 	struct device_node *cpu = NULL;
 	struct device_node *cpu = NULL;
 	struct device_node *memory = NULL;
 	struct device_node *memory = NULL;
-	int max_domain;
+	int default_nid = 0;
 	unsigned long i;
 	unsigned long i;
 
 
 	if (numa_enabled == 0) {
 	if (numa_enabled == 0) {
@@ -385,32 +378,32 @@ static int __init parse_numa_properties(void)
 
 
 	min_common_depth = find_min_common_depth();
 	min_common_depth = find_min_common_depth();
 
 
-	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
 	if (min_common_depth < 0)
 	if (min_common_depth < 0)
 		return min_common_depth;
 		return min_common_depth;
 
 
-	max_domain = numa_setup_cpu(boot_cpuid);
+	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
 
 
 	/*
 	/*
-	 * Even though we connect cpus to numa domains later in SMP init,
-	 * we need to know the maximum node id now. This is because each
-	 * node id must have NODE_DATA etc backing it.
-	 * As a result of hotplug we could still have cpus appear later on
-	 * with larger node ids. In that case we force the cpu into node 0.
+	 * Even though we connect cpus to numa domains later in SMP
+	 * init, we need to know the node ids now. This is because
+	 * each node to be onlined must have NODE_DATA etc backing it.
 	 */
 	 */
-	for_each_cpu(i) {
-		int numa_domain;
+	for_each_present_cpu(i) {
+		int nid;
 
 
 		cpu = find_cpu_node(i);
 		cpu = find_cpu_node(i);
+		BUG_ON(!cpu);
+		nid = of_node_to_nid(cpu);
+		of_node_put(cpu);
 
 
-		if (cpu) {
-			numa_domain = of_node_numa_domain(cpu);
-			of_node_put(cpu);
-
-			if (numa_domain < MAX_NUMNODES &&
-			    max_domain < numa_domain)
-				max_domain = numa_domain;
-		}
+		/*
+		 * Don't fall back to default_nid yet -- we will plug
+		 * cpus into nodes once the memory scan has discovered
+		 * the topology.
+		 */
+		if (nid < 0)
+			continue;
+		node_set_online(nid);
 	}
 	}
 
 
 	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
 	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
@@ -418,7 +411,7 @@ static int __init parse_numa_properties(void)
 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
 		unsigned long start;
 		unsigned long start;
 		unsigned long size;
 		unsigned long size;
-		int numa_domain;
+		int nid;
 		int ranges;
 		int ranges;
 		unsigned int *memcell_buf;
 		unsigned int *memcell_buf;
 		unsigned int len;
 		unsigned int len;
@@ -439,18 +432,15 @@ new_range:
 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
 
 
-		numa_domain = of_node_numa_domain(memory);
-
-		if (numa_domain >= MAX_NUMNODES) {
-			if (numa_domain != 0xffff)
-				printk(KERN_ERR "WARNING: memory at %lx maps "
-				       "to invalid NUMA node %d\n", start,
-				       numa_domain);
-			numa_domain = 0;
-		}
-
-		if (max_domain < numa_domain)
-			max_domain = numa_domain;
+		/*
+		 * Assumption: either all memory nodes or none will
+		 * have associativity properties.  If none, then
+		 * everything goes to default_nid.
+		 */
+		nid = of_node_to_nid(memory);
+		if (nid < 0)
+			nid = default_nid;
+		node_set_online(nid);
 
 
 		if (!(size = numa_enforce_memory_limit(start, size))) {
 		if (!(size = numa_enforce_memory_limit(start, size))) {
 			if (--ranges)
 			if (--ranges)
@@ -459,16 +449,13 @@ new_range:
 				continue;
 				continue;
 		}
 		}
 
 
-		add_region(numa_domain, start >> PAGE_SHIFT,
+		add_region(nid, start >> PAGE_SHIFT,
 			   size >> PAGE_SHIFT);
 			   size >> PAGE_SHIFT);
 
 
 		if (--ranges)
 		if (--ranges)
 			goto new_range;
 			goto new_range;
 	}
 	}
 
 
-	for (i = 0; i <= max_domain; i++)
-		node_set_online(i);
-
 	return 0;
 	return 0;
 }
 }
 
 
@@ -483,7 +470,6 @@ static void __init setup_nonnuma(void)
 	printk(KERN_INFO "Memory hole size: %ldMB\n",
 	printk(KERN_INFO "Memory hole size: %ldMB\n",
 	       (top_of_ram - total_ram) >> 20);
 	       (top_of_ram - total_ram) >> 20);
 
 
-	map_cpu_to_node(boot_cpuid, 0);
 	for (i = 0; i < lmb.memory.cnt; ++i)
 	for (i = 0; i < lmb.memory.cnt; ++i)
 		add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT,
 		add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT,
 			   lmb_size_pages(&lmb.memory, i));
 			   lmb_size_pages(&lmb.memory, i));
@@ -570,11 +556,11 @@ static void __init *careful_allocation(int nid, unsigned long size,
 				       unsigned long end_pfn)
 				       unsigned long end_pfn)
 {
 {
 	int new_nid;
 	int new_nid;
-	unsigned long ret = lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
+	unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
 
 
 	/* retry over all memory */
 	/* retry over all memory */
 	if (!ret)
 	if (!ret)
-		ret = lmb_alloc_base(size, align, lmb_end_of_DRAM());
+		ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
 
 
 	if (!ret)
 	if (!ret)
 		panic("numa.c: cannot allocate %lu bytes on node %d",
 		panic("numa.c: cannot allocate %lu bytes on node %d",
@@ -620,6 +606,8 @@ void __init do_init_bootmem(void)
 		dump_numa_memory_topology();
 		dump_numa_memory_topology();
 
 
 	register_cpu_notifier(&ppc64_numa_nb);
 	register_cpu_notifier(&ppc64_numa_nb);
+	cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
+			  (void *)(unsigned long)boot_cpuid);
 
 
 	for_each_online_node(nid) {
 	for_each_online_node(nid) {
 		unsigned long start_pfn, end_pfn, pages_present;
 		unsigned long start_pfn, end_pfn, pages_present;
@@ -767,10 +755,10 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 {
 {
 	struct device_node *memory = NULL;
 	struct device_node *memory = NULL;
 	nodemask_t nodes;
 	nodemask_t nodes;
-	int numa_domain = 0;
+	int default_nid = any_online_node(NODE_MASK_ALL);
 
 
 	if (!numa_enabled || (min_common_depth < 0))
 	if (!numa_enabled || (min_common_depth < 0))
-		return numa_domain;
+		return default_nid;
 
 
 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
 		unsigned long start, size;
 		unsigned long start, size;
@@ -787,15 +775,15 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 ha_new_range:
 ha_new_range:
 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
-		numa_domain = of_node_numa_domain(memory);
+		nid = of_node_to_nid(memory);
 
 
 		/* Domains not present at boot default to 0 */
 		/* Domains not present at boot default to 0 */
-		if (!node_online(numa_domain))
-			numa_domain = any_online_node(NODE_MASK_ALL);
+		if (nid < 0 || !node_online(nid))
+			nid = default_nid;
 
 
 		if ((scn_addr >= start) && (scn_addr < (start + size))) {
 		if ((scn_addr >= start) && (scn_addr < (start + size))) {
 			of_node_put(memory);
 			of_node_put(memory);
-			goto got_numa_domain;
+			goto got_nid;
 		}
 		}
 
 
 		if (--ranges)		/* process all ranges in cell */
 		if (--ranges)		/* process all ranges in cell */
@@ -804,12 +792,12 @@ ha_new_range:
 	BUG();	/* section address should be found above */
 	BUG();	/* section address should be found above */
 
 
 	/* Temporary code to ensure that returned node is not empty */
 	/* Temporary code to ensure that returned node is not empty */
-got_numa_domain:
+got_nid:
 	nodes_setall(nodes);
 	nodes_setall(nodes);
-	while (NODE_DATA(numa_domain)->node_spanned_pages == 0) {
-		node_clear(numa_domain, nodes);
-		numa_domain = any_online_node(nodes);
+	while (NODE_DATA(nid)->node_spanned_pages == 0) {
+		node_clear(nid, nodes);
+		nid = any_online_node(nodes);
 	}
 	}
-	return numa_domain;
+	return nid;
 }
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 #endif /* CONFIG_MEMORY_HOTPLUG */

+ 0 - 2
arch/powerpc/mm/slb_low.S

@@ -1,6 +1,4 @@
 /*
 /*
- * arch/ppc64/mm/slb_low.S
- *
  * Low-level SLB routines
  * Low-level SLB routines
  *
  *
  * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
  * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM

+ 0 - 4
arch/powerpc/mm/stab.c

@@ -247,10 +247,6 @@ void stabs_alloc(void)
 
 
 		newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
 		newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
 					 1<<SID_SHIFT);
 					 1<<SID_SHIFT);
-		if (! newstab)
-			panic("Unable to allocate segment table for CPU %d.\n",
-			      cpu);
-
 		newstab = (unsigned long)__va(newstab);
 		newstab = (unsigned long)__va(newstab);
 
 
 		memset((void *)newstab, 0, HW_PAGE_SIZE);
 		memset((void *)newstab, 0, HW_PAGE_SIZE);

+ 1 - 1
arch/powerpc/mm/tlb_64.c

@@ -36,7 +36,7 @@
 DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
 DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
 
 
 /* This is declared as we are using the more or less generic
 /* This is declared as we are using the more or less generic
- * include/asm-ppc64/tlb.h file -- tgall
+ * include/asm-powerpc/tlb.h file -- tgall
  */
  */
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
 DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);

部分文件因为文件数量过多而无法显示