ソースを参照

Merge branch 'master'

Jeff Garzik 19 年 前
コミット
88e3c1da8b
100 ファイル変更2598 行追加711 行削除
  1. 10 1
      Documentation/kernel-parameters.txt
  2. 44 7
      Documentation/power/swsusp.txt
  3. 149 0
      Documentation/power/userland-swsusp.txt
  4. 33 41
      Documentation/power/video.txt
  5. 72 0
      Documentation/powerpc/booting-without-of.txt
  6. 8 7
      Documentation/powerpc/eeh-pci-error-recovery.txt
  7. 2 2
      Documentation/powerpc/hvcs.txt
  8. 2 2
      MAINTAINERS
  9. 4 6
      arch/cris/kernel/irq.c
  10. 4 6
      arch/frv/kernel/irq.c
  11. 14 10
      arch/i386/Kconfig
  12. 9 0
      arch/i386/Kconfig.debug
  13. 1 1
      arch/i386/kernel/Makefile
  14. 321 0
      arch/i386/kernel/alternative.c
  15. 1 0
      arch/i386/kernel/apic.c
  16. 1 0
      arch/i386/kernel/cpu/centaur.c
  17. 30 17
      arch/i386/kernel/cpu/common.c
  18. 1 3
      arch/i386/kernel/cpu/cpufreq/powernow-k8.c
  19. 6 6
      arch/i386/kernel/cpu/intel.c
  20. 2 2
      arch/i386/kernel/cpu/intel_cacheinfo.c
  21. 1 1
      arch/i386/kernel/cpu/proc.c
  22. 1 1
      arch/i386/kernel/crash.c
  23. 4 0
      arch/i386/kernel/entry.S
  24. 1 4
      arch/i386/kernel/head.S
  25. 11 14
      arch/i386/kernel/io_apic.c
  26. 2 2
      arch/i386/kernel/kprobes.c
  27. 22 10
      arch/i386/kernel/module.c
  28. 4 3
      arch/i386/kernel/mpparse.c
  29. 3 3
      arch/i386/kernel/nmi.c
  30. 1 1
      arch/i386/kernel/process.c
  31. 2 2
      arch/i386/kernel/ptrace.c
  32. 4 4
      arch/i386/kernel/semaphore.c
  33. 10 108
      arch/i386/kernel/setup.c
  34. 2 5
      arch/i386/kernel/signal.c
  35. 3 0
      arch/i386/kernel/smpboot.c
  36. 9 0
      arch/i386/kernel/topology.c
  37. 40 17
      arch/i386/kernel/traps.c
  38. 20 0
      arch/i386/kernel/vmlinux.lds.S
  39. 3 0
      arch/i386/kernel/vsyscall-sysenter.S
  40. 4 1
      arch/i386/mach-es7000/es7000.h
  41. 2 4
      arch/i386/mach-es7000/es7000plat.c
  42. 138 72
      arch/i386/mm/fault.c
  43. 22 23
      arch/i386/mm/init.c
  44. 2 5
      arch/i386/oprofile/nmi_int.c
  45. 1 6
      arch/ia64/hp/sim/simserial.c
  46. 4 6
      arch/m32r/kernel/irq.c
  47. 1 3
      arch/m68k/bvme6000/rtc.c
  48. 4 6
      arch/mips/kernel/irq.c
  49. 2 2
      arch/mips/kernel/smp.c
  50. 1 4
      arch/mips/sgi-ip27/ip27-irq.c
  51. 10 15
      arch/parisc/kernel/smp.c
  52. 32 6
      arch/powerpc/Kconfig
  53. 1 1
      arch/powerpc/Makefile
  54. 0 2
      arch/powerpc/boot/install.sh
  55. 2 2
      arch/powerpc/boot/main.c
  56. 721 0
      arch/powerpc/configs/mpc8540_ads_defconfig
  57. 3 0
      arch/powerpc/kernel/asm-offsets.c
  58. 11 1
      arch/powerpc/kernel/cputable.c
  59. 6 5
      arch/powerpc/kernel/entry_64.S
  60. 0 25
      arch/powerpc/kernel/firmware.c
  61. 0 2
      arch/powerpc/kernel/head_44x.S
  62. 9 2
      arch/powerpc/kernel/head_64.S
  63. 0 2
      arch/powerpc/kernel/head_8xx.S
  64. 363 0
      arch/powerpc/kernel/head_booke.h
  65. 4 2
      arch/powerpc/kernel/head_fsl_booke.S
  66. 0 2
      arch/powerpc/kernel/iomap.c
  67. 0 1
      arch/powerpc/kernel/iommu.c
  68. 24 13
      arch/powerpc/kernel/irq.c
  69. 2 3
      arch/powerpc/kernel/kprobes.c
  70. 1 4
      arch/powerpc/kernel/of_device.c
  71. 0 1
      arch/powerpc/kernel/pci_iommu.c
  72. 0 1
      arch/powerpc/kernel/ppc_ksyms.c
  73. 6 3
      arch/powerpc/kernel/process.c
  74. 0 4
      arch/powerpc/kernel/prom.c
  75. 0 2
      arch/powerpc/kernel/ptrace-common.h
  76. 0 1
      arch/powerpc/kernel/rtas-proc.c
  77. 0 2
      arch/powerpc/kernel/rtas_pci.c
  78. 2 3
      arch/powerpc/kernel/setup-common.c
  79. 2 3
      arch/powerpc/kernel/setup_32.c
  80. 0 2
      arch/powerpc/kernel/setup_64.c
  81. 0 2
      arch/powerpc/kernel/signal_64.c
  82. 3 1
      arch/powerpc/kernel/smp.c
  83. 239 2
      arch/powerpc/kernel/time.c
  84. 0 2
      arch/powerpc/kernel/vdso.c
  85. 0 2
      arch/powerpc/lib/copypage_64.S
  86. 0 2
      arch/powerpc/lib/copyuser_64.S
  87. 11 3
      arch/powerpc/lib/e2a.c
  88. 0 2
      arch/powerpc/lib/memcpy_64.S
  89. 0 2
      arch/powerpc/lib/rheap.c
  90. 0 2
      arch/powerpc/mm/fault.c
  91. 0 2
      arch/powerpc/mm/hash_low_32.S
  92. 16 16
      arch/powerpc/mm/hash_utils_64.c
  93. 0 48
      arch/powerpc/mm/init_64.c
  94. 16 0
      arch/powerpc/mm/lmb.c
  95. 1 2
      arch/powerpc/mm/mem.c
  96. 0 2
      arch/powerpc/mm/mmap.c
  97. 74 86
      arch/powerpc/mm/numa.c
  98. 0 2
      arch/powerpc/mm/slb_low.S
  99. 0 4
      arch/powerpc/mm/stab.c
  100. 1 1
      arch/powerpc/mm/tlb_64.c

+ 10 - 1
Documentation/kernel-parameters.txt

@@ -1008,7 +1008,9 @@ running once the system is up.
 			noexec=on: enable non-executable mappings (default)
 			noexec=off: disable nn-executable mappings
 
-	nofxsr		[BUGS=IA-32]
+	nofxsr		[BUGS=IA-32] Disables x86 floating point extended
+			register save and restore. The kernel will only save
+			legacy floating-point registers on task switch.
 
 	nohlt		[BUGS=ARM]
 
@@ -1053,6 +1055,8 @@ running once the system is up.
 
 	nosbagart	[IA-64]
 
+	nosep		[BUGS=IA-32] Disables x86 SYSENTER/SYSEXIT support.
+
 	nosmp		[SMP] Tells an SMP kernel to act as a UP kernel.
 
 	nosync		[HW,M68K] Disables sync negotiation for all devices.
@@ -1122,6 +1126,11 @@ running once the system is up.
 	pas16=		[HW,SCSI]
 			See header of drivers/scsi/pas16.c.
 
+	pause_on_oops=
+			Halt all CPUs after the first oops has been printed for
+			the specified number of seconds.  This is to be used if
+			your oopses keep scrolling off the screen.
+
 	pcbit=		[HW,ISDN]
 
 	pcd.		[PARIDE]

+ 44 - 7
Documentation/power/swsusp.txt

@@ -17,6 +17,11 @@ Some warnings, first.
  * but it will probably only crash.
  *
  * (*) suspend/resume support is needed to make it safe.
+ *
+ * If you have any filesystems on USB devices mounted before suspend,
+ * they won't be accessible after resume and you may lose data, as though
+ * you have unplugged the USB devices with mounted filesystems on them
+ * (see the FAQ below for details).
 
 You need to append resume=/dev/your_swap_partition to kernel command
 line. Then you suspend by
@@ -27,19 +32,18 @@ echo shutdown > /sys/power/disk; echo disk > /sys/power/state
 
 echo platform > /sys/power/disk; echo disk > /sys/power/state
 
+. If you have SATA disks, you'll need recent kernels with SATA suspend
+support. For suspend and resume to work, make sure your disk drivers
+are built into kernel -- not modules. [There's way to make
+suspend/resume with modular disk drivers, see FAQ, but you probably
+should not do that.]
+
 If you want to limit the suspend image size to N bytes, do
 
 echo N > /sys/power/image_size
 
 before suspend (it is limited to 500 MB by default).
 
-Encrypted suspend image:
-------------------------
-If you want to store your suspend image encrypted with a temporary
-key to prevent data gathering after resume you must compile
-crypto and the aes algorithm into the kernel - modules won't work
-as they cannot be loaded at resume time.
-
 
 Article about goals and implementation of Software Suspend for Linux
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -333,4 +337,37 @@ init=/bin/bash, then swapon and starting suspend sequence manually
 usually does the trick. Then it is good idea to try with latest
 vanilla kernel.
 
+Q: How can distributions ship a swsusp-supporting kernel with modular
+disk drivers (especially SATA)?
+
+A: Well, it can be done, load the drivers, then do echo into
+/sys/power/disk/resume file from initrd. Be sure not to mount
+anything, not even read-only mount, or you are going to lose your
+data.
+
+Q: How do I make suspend more verbose?
+
+A: If you want to see any non-error kernel messages on the virtual
+terminal the kernel switches to during suspend, you have to set the
+kernel console loglevel to at least 5, for example by doing
+
+	echo 5 > /proc/sys/kernel/printk
+
+Q: Is this true that if I have a mounted filesystem on a USB device and
+I suspend to disk, I can lose data unless the filesystem has been mounted
+with "sync"?
+
+A: That's right.  It depends on your hardware, and it could be true even for
+suspend-to-RAM.  In fact, even with "-o sync" you can lose data if your
+programs have information in buffers they haven't written out to disk.
+
+If you're lucky, your hardware will support low-power modes for USB
+controllers while the system is asleep.  Lots of hardware doesn't,
+however.  Shutting off the power to a USB controller is equivalent to
+unplugging all the attached devices.
+
+Remember that it's always a bad idea to unplug a disk drive containing a
+mounted filesystem.  With USB that's true even when your system is asleep!
+The safest thing is to unmount all USB-based filesystems before suspending
+and remount them after resuming.
 

+ 149 - 0
Documentation/power/userland-swsusp.txt

@@ -0,0 +1,149 @@
+Documentation for userland software suspend interface
+	(C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+
+First, the warnings at the beginning of swsusp.txt still apply.
+
+Second, you should read the FAQ in swsusp.txt _now_ if you have not
+done it already.
+
+Now, to use the userland interface for software suspend you need special
+utilities that will read/write the system memory snapshot from/to the
+kernel.  Such utilities are available, for example, from
+<http://www.sisk.pl/kernel/utilities/suspend>.  You may want to have
+a look at them if you are going to develop your own suspend/resume
+utilities.
+
+The interface consists of a character device providing the open(),
+release(), read(), and write() operations as well as several ioctl()
+commands defined in kernel/power/power.h.  The major and minor
+numbers of the device are, respectively, 10 and 231, and they can
+be read from /sys/class/misc/snapshot/dev.
+
+The device can be open either for reading or for writing.  If open for
+reading, it is considered to be in the suspend mode.  Otherwise it is
+assumed to be in the resume mode.  The device cannot be open for reading
+and writing.  It is also impossible to have the device open more than once
+at a time.
+
+The ioctl() commands recognized by the device are:
+
+SNAPSHOT_FREEZE - freeze user space processes (the current process is
+	not frozen); this is required for SNAPSHOT_ATOMIC_SNAPSHOT
+	and SNAPSHOT_ATOMIC_RESTORE to succeed
+
+SNAPSHOT_UNFREEZE - thaw user space processes frozen by SNAPSHOT_FREEZE
+
+SNAPSHOT_ATOMIC_SNAPSHOT - create a snapshot of the system memory; the
+	last argument of ioctl() should be a pointer to an int variable,
+	the value of which will indicate whether the call returned after
+	creating the snapshot (1) or after restoring the system memory state
+	from it (0) (after resume the system finds itself finishing the
+	SNAPSHOT_ATOMIC_SNAPSHOT ioctl() again); after the snapshot
+	has been created the read() operation can be used to transfer
+	it out of the kernel
+
+SNAPSHOT_ATOMIC_RESTORE - restore the system memory state from the
+	uploaded snapshot image; before calling it you should transfer
+	the system memory snapshot back to the kernel using the write()
+	operation; this call will not succeed if the snapshot
+	image is not available to the kernel
+
+SNAPSHOT_FREE - free memory allocated for the snapshot image
+
+SNAPSHOT_SET_IMAGE_SIZE - set the preferred maximum size of the image
+	(the kernel will do its best to ensure the image size will not exceed
+	this number, but if it turns out to be impossible, the kernel will
+	create the smallest image possible)
+
+SNAPSHOT_AVAIL_SWAP - return the amount of available swap in bytes (the last
+	argument should be a pointer to an unsigned int variable that will
+	contain the result if the call is successful).
+
+SNAPSHOT_GET_SWAP_PAGE - allocate a swap page from the resume partition
+	(the last argument should be a pointer to a loff_t variable that
+	will contain the swap page offset if the call is successful)
+
+SNAPSHOT_FREE_SWAP_PAGES - free all swap pages allocated with
+	SNAPSHOT_GET_SWAP_PAGE
+
+SNAPSHOT_SET_SWAP_FILE - set the resume partition (the last ioctl() argument
+	should specify the device's major and minor numbers in the old
+	two-byte format, as returned by the stat() function in the .st_rdev
+	member of the stat structure); it is recommended to always use this
+	call, because the code to set the resume partition could be removed from
+	future kernels
+
+The device's read() operation can be used to transfer the snapshot image from
+the kernel.  It has the following limitations:
+- you cannot read() more than one virtual memory page at a time
+- read()s accross page boundaries are impossible (ie. if ypu read() 1/2 of
+	a page in the previous call, you will only be able to read()
+	_at_ _most_ 1/2 of the page in the next call)
+
+The device's write() operation is used for uploading the system memory snapshot
+into the kernel.  It has the same limitations as the read() operation.
+
+The release() operation frees all memory allocated for the snapshot image
+and all swap pages allocated with SNAPSHOT_GET_SWAP_PAGE (if any).
+Thus it is not necessary to use either SNAPSHOT_FREE or
+SNAPSHOT_FREE_SWAP_PAGES before closing the device (in fact it will also
+unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are
+still frozen when the device is being closed).
+
+Currently it is assumed that the userland utilities reading/writing the
+snapshot image from/to the kernel will use a swap parition, called the resume
+partition, as storage space.  However, this is not really required, as they
+can use, for example, a special (blank) suspend partition or a file on a partition
+that is unmounted before SNAPSHOT_ATOMIC_SNAPSHOT and mounted afterwards.
+
+These utilities SHOULD NOT make any assumptions regarding the ordering of
+data within the snapshot image, except for the image header that MAY be
+assumed to start with an swsusp_info structure, as specified in
+kernel/power/power.h.  This structure MAY be used by the userland utilities
+to obtain some information about the snapshot image, such as the size
+of the snapshot image, including the metadata and the header itself,
+contained in the .size member of swsusp_info.
+
+The snapshot image MUST be written to the kernel unaltered (ie. all of the image
+data, metadata and header MUST be written in _exactly_ the same amount, form
+and order in which they have been read).  Otherwise, the behavior of the
+resumed system may be totally unpredictable.
+
+While executing SNAPSHOT_ATOMIC_RESTORE the kernel checks if the
+structure of the snapshot image is consistent with the information stored
+in the image header.  If any inconsistencies are detected,
+SNAPSHOT_ATOMIC_RESTORE will not succeed.  Still, this is not a fool-proof
+mechanism and the userland utilities using the interface SHOULD use additional
+means, such as checksums, to ensure the integrity of the snapshot image.
+
+The suspending and resuming utilities MUST lock themselves in memory,
+preferrably using mlockall(), before calling SNAPSHOT_FREEZE.
+
+The suspending utility MUST check the value stored by SNAPSHOT_ATOMIC_SNAPSHOT
+in the memory location pointed to by the last argument of ioctl() and proceed
+in accordance with it:
+1. 	If the value is 1 (ie. the system memory snapshot has just been
+	created and the system is ready for saving it):
+	(a)	The suspending utility MUST NOT close the snapshot device
+		_unless_ the whole suspend procedure is to be cancelled, in
+		which case, if the snapshot image has already been saved, the
+		suspending utility SHOULD destroy it, preferrably by zapping
+		its header.  If the suspend is not to be cancelled, the
+		system MUST be powered off or rebooted after the snapshot
+		image has been saved.
+	(b)	The suspending utility SHOULD NOT attempt to perform any
+		file system operations (including reads) on the file systems
+		that were mounted before SNAPSHOT_ATOMIC_SNAPSHOT has been
+		called.  However, it MAY mount a file system that was not
+		mounted at that time and perform some operations on it (eg.
+		use it for saving the image).
+2.	If the value is 0 (ie. the system state has just been restored from
+	the snapshot image), the suspending utility MUST close the snapshot
+	device.  Afterwards it will be treated as a regular userland process,
+	so it need not exit.
+
+The resuming utility SHOULD NOT attempt to mount any file systems that could
+be mounted before suspend and SHOULD NOT attempt to perform any operations
+involving such file systems.
+
+For details, please refer to the source code.

+ 33 - 41
Documentation/power/video.txt

@@ -1,7 +1,7 @@
 
 		Video issues with S3 resume
 		~~~~~~~~~~~~~~~~~~~~~~~~~~~
-		  2003-2005, Pavel Machek
+		  2003-2006, Pavel Machek
 
 During S3 resume, hardware needs to be reinitialized. For most
 devices, this is easy, and kernel driver knows how to do
@@ -15,6 +15,27 @@ run normally so video card is normally initialized. It should not be
 problem for S1 standby, because hardware should retain its state over
 that.
 
+We either have to run video BIOS during early resume, or interpret it
+using vbetool later, or maybe nothing is neccessary on particular
+system because video state is preserved. Unfortunately different
+methods work on different systems, and no known method suits all of
+them.
+
+Userland application called s2ram has been developed; it contains long
+whitelist of systems, and automatically selects working method for a
+given system. It can be downloaded from CVS at
+www.sf.net/projects/suspend . If you get a system that is not in the
+whitelist, please try to find a working solution, and submit whitelist
+entry so that work does not need to be repeated.
+
+Currently, VBE_SAVE method (6 below) works on most
+systems. Unfortunately, vbetool only runs after userland is resumed,
+so it makes debugging of early resume problems
+hard/impossible. Methods that do not rely on userland are preferable.
+
+Details
+~~~~~~~
+
 There are a few types of systems where video works after S3 resume:
 
 (1) systems where video state is preserved over S3.
@@ -104,6 +125,7 @@ HP NX7000			??? (*)
 HP Pavilion ZD7000		vbetool post needed, need open-source nv driver for X
 HP Omnibook XE3	athlon version	none (1)
 HP Omnibook XE3GC		none (1), video is S3 Savage/IX-MV
+HP Omnibook 5150		none (1), (S1 also works OK)
 IBM TP T20, model 2647-44G	none (1), video is S3 Inc. 86C270-294 Savage/IX-MV, vesafb gets "interesting" but X work.
 IBM TP A31 / Type 2652-M5G      s3_mode (3) [works ok with BIOS 1.04 2002-08-23, but not at all with BIOS 1.11 2004-11-05 :-(]
 IBM TP R32 / Type 2658-MMG      none (1)
@@ -120,18 +142,24 @@ IBM ThinkPad T42p (2373-GTG)	s3_bios (2)
 IBM TP X20			??? (*)
 IBM TP X30			s3_bios (2)
 IBM TP X31 / Type 2672-XXH      none (1), use radeontool (http://fdd.com/software/radeon/) to turn off backlight.
-IBM TP X32			none (1), but backlight is on and video is trashed after long suspend
+IBM TP X32			none (1), but backlight is on and video is trashed after long suspend. s3_bios,s3_mode (4) works too. Perhaps that gets better results?
 IBM Thinkpad X40 Type 2371-7JG  s3_bios,s3_mode (4)
+IBM TP 600e			none(1), but a switch to console and back to X is needed
 Medion MD4220			??? (*)
 Samsung P35			vbetool needed (6)
-Sharp PC-AR10 (ATI rage)	none (1)
+Sharp PC-AR10 (ATI rage)	none (1), backlight does not switch off
 Sony Vaio PCG-C1VRX/K		s3_bios (2)
 Sony Vaio PCG-F403		??? (*)
+Sony Vaio PCG-GRT995MP		none (1), works with 'nv' X driver
+Sony Vaio PCG-GR7/K		none (1), but needs radeonfb, use radeontool (http://fdd.com/software/radeon/) to turn off backlight.
 Sony Vaio PCG-N505SN		??? (*)
 Sony Vaio vgn-s260		X or boot-radeon can init it (5)
+Sony Vaio vgn-S580BH		vga=normal, but suspend from X. Console will be blank unless you return to X.
+Sony Vaio vgn-FS115B		s3_bios (2),s3_mode (4)
 Toshiba Libretto L5		none (1)
-Toshiba Satellite 4030CDT	s3_mode (3)
-Toshiba Satellite 4080XCDT      s3_mode (3)
+Toshiba Portege 3020CT		s3_mode (3)
+Toshiba Satellite 4030CDT	s3_mode (3) (S1 also works OK)
+Toshiba Satellite 4080XCDT      s3_mode (3) (S1 also works OK)
 Toshiba Satellite 4090XCDT      ??? (*)
 Toshiba Satellite P10-554       s3_bios,s3_mode (4)(****)
 Toshiba M30                     (2) xor X with nvidia driver using internal AGP
@@ -151,39 +179,3 @@ Asus A7V8X	    nVidia RIVA TNT2 model 64	  s3_bios,s3_mode (4)
 (***) To be tested with a newer kernel.
 
 (****) Not with SMP kernel, UP only.
-
-VBEtool details
-~~~~~~~~~~~~~~~
-(with thanks to Carl-Daniel Hailfinger)
-
-First, boot into X and run the following script ONCE:
-#!/bin/bash
-statedir=/root/s3/state
-mkdir -p $statedir
-chvt 2
-sleep 1
-vbetool vbestate save >$statedir/vbe
-
-
-To suspend and resume properly, call the following script as root:
-#!/bin/bash
-statedir=/root/s3/state
-curcons=`fgconsole`
-fuser /dev/tty$curcons 2>/dev/null|xargs ps -o comm= -p|grep -q X && chvt 2
-cat /dev/vcsa >$statedir/vcsa
-sync
-echo 3 >/proc/acpi/sleep
-sync
-vbetool post
-vbetool vbestate restore <$statedir/vbe
-cat $statedir/vcsa >/dev/vcsa
-rckbd restart
-chvt $[curcons%6+1]
-chvt $curcons
-
-
-Unless you change your graphics card or other hardware configuration,
-the state once saved will be OK for every resume afterwards.
-NOTE: The "rckbd restart" command may be different for your
-distribution. Simply replace it with the command you would use to
-set the fonts on screen.

+ 72 - 0
Documentation/powerpc/booting-without-of.txt

@@ -1365,6 +1365,78 @@ platforms are moved over to use the flattened-device-tree model.
 	};
 
 
+   g) Freescale SOC SEC Security Engines
+
+   Required properties:
+
+    - device_type : Should be "crypto"
+    - model : Model of the device.  Should be "SEC1" or "SEC2"
+    - compatible : Should be "talitos"
+    - reg : Offset and length of the register set for the device
+    - interrupts : <a b> where a is the interrupt number and b is a
+      field that represents an encoding of the sense and level
+      information for the interrupt.  This should be encoded based on
+      the information in section 2) depending on the type of interrupt
+      controller you have.
+    - interrupt-parent : the phandle for the interrupt controller that
+      services interrupts for this device.
+    - num-channels : An integer representing the number of channels
+      available.
+    - channel-fifo-len : An integer representing the number of
+      descriptor pointers each channel fetch fifo can hold.
+    - exec-units-mask : The bitmask representing what execution units
+      (EUs) are available. It's a single 32 bit cell. EU information
+      should be encoded following the SEC's Descriptor Header Dword
+      EU_SEL0 field documentation, i.e. as follows:
+
+        bit 0 = reserved - should be 0
+        bit 1 = set if SEC has the ARC4 EU (AFEU)
+        bit 2 = set if SEC has the DES/3DES EU (DEU)
+        bit 3 = set if SEC has the message digest EU (MDEU)
+        bit 4 = set if SEC has the random number generator EU (RNG)
+        bit 5 = set if SEC has the public key EU (PKEU)
+        bit 6 = set if SEC has the AES EU (AESU)
+        bit 7 = set if SEC has the Kasumi EU (KEU)
+
+      bits 8 through 31 are reserved for future SEC EUs.
+
+    - descriptor-types-mask : The bitmask representing what descriptors
+      are available. It's a single 32 bit cell. Descriptor type
+      information should be encoded following the SEC's Descriptor
+      Header Dword DESC_TYPE field documentation, i.e. as follows:
+
+        bit 0  = set if SEC supports the aesu_ctr_nonsnoop desc. type
+        bit 1  = set if SEC supports the ipsec_esp descriptor type
+        bit 2  = set if SEC supports the common_nonsnoop desc. type
+        bit 3  = set if SEC supports the 802.11i AES ccmp desc. type
+        bit 4  = set if SEC supports the hmac_snoop_no_afeu desc. type
+        bit 5  = set if SEC supports the srtp descriptor type
+        bit 6  = set if SEC supports the non_hmac_snoop_no_afeu desc.type
+        bit 7  = set if SEC supports the pkeu_assemble descriptor type
+        bit 8  = set if SEC supports the aesu_key_expand_output desc.type
+        bit 9  = set if SEC supports the pkeu_ptmul descriptor type
+        bit 10 = set if SEC supports the common_nonsnoop_afeu desc. type
+        bit 11 = set if SEC supports the pkeu_ptadd_dbl descriptor type
+
+      ..and so on and so forth.
+
+   Example:
+
+       /* MPC8548E */
+       crypto@30000 {
+               device_type = "crypto";
+               model = "SEC2";
+               compatible = "talitos";
+               reg = <30000 10000>;
+               interrupts = <1d 3>;
+               interrupt-parent = <40000>;
+               num-channels = <4>;
+               channel-fifo-len = <24>;
+               exec-units-mask = <000000fe>;
+               descriptor-types-mask = <073f1127>;
+       };
+
+
    More devices will be defined as this spec matures.
 
 

+ 8 - 7
Documentation/powerpc/eeh-pci-error-recovery.txt

@@ -121,7 +121,7 @@ accomplished.
 
 EEH must be enabled in the PHB's very early during the boot process,
 and if a PCI slot is hot-plugged. The former is performed by
-eeh_init() in arch/ppc64/kernel/eeh.c, and the later by
+eeh_init() in arch/powerpc/platforms/pseries/eeh.c, and the later by
 drivers/pci/hotplug/pSeries_pci.c calling in to the eeh.c code.
 EEH must be enabled before a PCI scan of the device can proceed.
 Current Power5 hardware will not work unless EEH is enabled;
@@ -133,7 +133,7 @@ error.  Given an arbitrary address, the routine
 pci_get_device_by_addr() will find the pci device associated
 with that address (if any).
 
-The default include/asm-ppc64/io.h macros readb(), inb(), insb(),
+The default include/asm-powerpc/io.h macros readb(), inb(), insb(),
 etc. include a check to see if the i/o read returned all-0xff's.
 If so, these make a call to eeh_dn_check_failure(), which in turn
 asks the firmware if the all-ff's value is the sign of a true EEH
@@ -143,11 +143,12 @@ seen in /proc/ppc64/eeh (subject to change).  Normally, almost
 all of these occur during boot, when the PCI bus is scanned, where
 a large number of 0xff reads are part of the bus scan procedure.
 
-If a frozen slot is detected, code in arch/ppc64/kernel/eeh.c will
-print a stack trace to syslog (/var/log/messages).  This stack trace
-has proven to be very useful to device-driver authors for finding
-out at what point the EEH error was detected, as the error itself
-usually occurs slightly beforehand.
+If a frozen slot is detected, code in 
+arch/powerpc/platforms/pseries/eeh.c will print a stack trace to 
+syslog (/var/log/messages).  This stack trace has proven to be very 
+useful to device-driver authors for finding out at what point the EEH 
+error was detected, as the error itself usually occurs slightly 
+beforehand.
 
 Next, it uses the Linux kernel notifier chain/work queue mechanism to
 allow any interested parties to find out about the failure.  Device

+ 2 - 2
Documentation/powerpc/hvcs.txt

@@ -558,9 +558,9 @@ partitions.
 
 The proper channel for reporting bugs is either through the Linux OS
 distribution company that provided your OS or by posting issues to the
-ppc64 development mailing list at:
+PowerPC development mailing list at:
 
-linuxppc64-dev@lists.linuxppc.org
+linuxppc-dev@ozlabs.org
 
 This request is to provide a documented and searchable public exchange
 of the problems and solutions surrounding this driver for the benefit of

+ 2 - 2
MAINTAINERS

@@ -534,7 +534,7 @@ S:   Supported
 BROADBAND PROCESSOR ARCHITECTURE
 P:	Arnd Bergmann
 M:	arnd@arndb.de
-L:	linuxppc64-dev@ozlabs.org
+L:	linuxppc-dev@ozlabs.org
 W:	http://linuxppc64.org
 S:	Supported
 
@@ -1624,7 +1624,7 @@ P:	Anton Blanchard
 M:	anton@samba.org
 M:	anton@au.ibm.com
 W:	http://linuxppc64.org
-L:	linuxppc64-dev@ozlabs.org
+L:	linuxppc-dev@ozlabs.org
 S:	Supported
 
 LINUX SECURITY MODULE (LSM) FRAMEWORK

+ 4 - 6
arch/cris/kernel/irq.c

@@ -52,9 +52,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i == 0) {
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 	}
 
@@ -67,9 +66,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, "  %s", action->name);

+ 4 - 6
arch/frv/kernel/irq.c

@@ -75,9 +75,8 @@ int show_interrupts(struct seq_file *p, void *v)
 	switch (i) {
 	case 0:
 		seq_printf(p, "           ");
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 
 		seq_putc(p, '\n');
 		break;
@@ -100,9 +99,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i - 1]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i - 1]);
 #endif
 
 		level = group->sources[ix]->level - frv_irq_levels;

+ 14 - 10
arch/i386/Kconfig

@@ -80,6 +80,7 @@ config X86_VOYAGER
 
 config X86_NUMAQ
 	bool "NUMAQ (IBM/Sequent)"
+	select SMP
 	select NUMA
 	help
 	  This option is used for getting Linux to run on a (IBM/Sequent) NUMA
@@ -400,6 +401,7 @@ choice
 
 config NOHIGHMEM
 	bool "off"
+	depends on !X86_NUMAQ
 	---help---
 	  Linux can use up to 64 Gigabytes of physical memory on x86 systems.
 	  However, the address space of 32-bit x86 processors is only 4
@@ -436,6 +438,7 @@ config NOHIGHMEM
 
 config HIGHMEM4G
 	bool "4GB"
+	depends on !X86_NUMAQ
 	help
 	  Select this if you have a 32-bit processor and between 1 and 4
 	  gigabytes of physical RAM.
@@ -503,10 +506,6 @@ config NUMA
 	default n if X86_PC
 	default y if (X86_NUMAQ || X86_SUMMIT)
 
-# Need comments to help the hapless user trying to turn on NUMA support
-comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support"
-	depends on X86_NUMAQ && (!HIGHMEM64G || !SMP)
-
 comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
 	depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI)
 
@@ -660,13 +659,18 @@ config BOOT_IOREMAP
 	default y
 
 config REGPARM
-	bool "Use register arguments (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
-	default n
+	bool "Use register arguments"
+	default y
 	help
-	Compile the kernel with -mregparm=3. This uses a different ABI
-	and passes the first three arguments of a function call in registers.
-	This will probably break binary only modules.
+	Compile the kernel with -mregparm=3. This instructs gcc to use
+	a more efficient function call ABI which passes the first three
+	arguments of a function call via registers, which results in denser
+	and faster code.
+
+	If this option is disabled, then the default ABI of passing
+	arguments via the stack is used.
+
+	If unsure, say Y.
 
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"

+ 9 - 0
arch/i386/Kconfig.debug

@@ -31,6 +31,15 @@ config DEBUG_STACK_USAGE
 
 	  This option will slow down process creation somewhat.
 
+config STACK_BACKTRACE_COLS
+	int "Stack backtraces per line" if DEBUG_KERNEL
+	range 1 3
+	default 2
+	help
+	  Selects how many stack backtrace entries per line to display.
+
+	  This can save screen space when displaying traces.
+
 comment "Page alloc debug is incompatible with Software Suspend on i386"
 	depends on DEBUG_KERNEL && SOFTWARE_SUSPEND
 

+ 1 - 1
arch/i386/kernel/Makefile

@@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds
 obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o \
 		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
 		pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
-		quirks.o i8237.o topology.o
+		quirks.o i8237.o topology.o alternative.o
 
 obj-y				+= cpu/
 obj-y				+= timers/

+ 321 - 0
arch/i386/kernel/alternative.c

@@ -0,0 +1,321 @@
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <asm/alternative.h>
+#include <asm/sections.h>
+
+#define DEBUG 0
+#if DEBUG
+# define DPRINTK(fmt, args...) printk(fmt, args)
+#else
+# define DPRINTK(fmt, args...)
+#endif
+
+/* Use inline assembly to define this because the nops are defined
+   as inline assembly strings in the include files and we cannot
+   get them easily into strings. */
+asm("\t.data\nintelnops: "
+	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
+	GENERIC_NOP7 GENERIC_NOP8);
+asm("\t.data\nk8nops: "
+	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
+	K8_NOP7 K8_NOP8);
+asm("\t.data\nk7nops: "
+	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
+	K7_NOP7 K7_NOP8);
+
+extern unsigned char intelnops[], k8nops[], k7nops[];
+static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
+	NULL,
+	intelnops,
+	intelnops + 1,
+	intelnops + 1 + 2,
+	intelnops + 1 + 2 + 3,
+	intelnops + 1 + 2 + 3 + 4,
+	intelnops + 1 + 2 + 3 + 4 + 5,
+	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
+	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+};
+static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
+	NULL,
+	k8nops,
+	k8nops + 1,
+	k8nops + 1 + 2,
+	k8nops + 1 + 2 + 3,
+	k8nops + 1 + 2 + 3 + 4,
+	k8nops + 1 + 2 + 3 + 4 + 5,
+	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
+	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+};
+static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
+	NULL,
+	k7nops,
+	k7nops + 1,
+	k7nops + 1 + 2,
+	k7nops + 1 + 2 + 3,
+	k7nops + 1 + 2 + 3 + 4,
+	k7nops + 1 + 2 + 3 + 4 + 5,
+	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
+	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+};
+static struct nop {
+	int cpuid;
+	unsigned char **noptable;
+} noptypes[] = {
+	{ X86_FEATURE_K8, k8_nops },
+	{ X86_FEATURE_K7, k7_nops },
+	{ -1, NULL }
+};
+
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
+extern u8 *__smp_locks[], *__smp_locks_end[];
+
+extern u8 __smp_alt_begin[], __smp_alt_end[];
+
+
+static unsigned char** find_nop_table(void)
+{
+	unsigned char **noptable = intel_nops;
+	int i;
+
+	for (i = 0; noptypes[i].cpuid >= 0; i++) {
+		if (boot_cpu_has(noptypes[i].cpuid)) {
+			noptable = noptypes[i].noptable;
+			break;
+		}
+	}
+	return noptable;
+}
+
+/* Replace instructions with better alternatives for this CPU type.
+   This runs before SMP is initialized to avoid SMP problems with
+   self modifying code. This implies that assymetric systems where
+   APs have less capabilities than the boot processor are not handled.
+   Tough. Make sure you disable such features by hand. */
+
+void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+{
+	unsigned char **noptable = find_nop_table();
+	struct alt_instr *a;
+	int diff, i, k;
+
+	DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
+	for (a = start; a < end; a++) {
+		BUG_ON(a->replacementlen > a->instrlen);
+		if (!boot_cpu_has(a->cpuid))
+			continue;
+		memcpy(a->instr, a->replacement, a->replacementlen);
+		diff = a->instrlen - a->replacementlen;
+		/* Pad the rest with nops */
+		for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
+			k = diff;
+			if (k > ASM_NOP_MAX)
+				k = ASM_NOP_MAX;
+			memcpy(a->instr + i, noptable[k], k);
+		}
+	}
+}
+
+static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end)
+{
+	struct alt_instr *a;
+
+	DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end);
+	for (a = start; a < end; a++) {
+		memcpy(a->replacement + a->replacementlen,
+		       a->instr,
+		       a->instrlen);
+	}
+}
+
+static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end)
+{
+	struct alt_instr *a;
+
+	for (a = start; a < end; a++) {
+		memcpy(a->instr,
+		       a->replacement + a->replacementlen,
+		       a->instrlen);
+	}
+}
+
+static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
+{
+	u8 **ptr;
+
+	for (ptr = start; ptr < end; ptr++) {
+		if (*ptr < text)
+			continue;
+		if (*ptr > text_end)
+			continue;
+		**ptr = 0xf0; /* lock prefix */
+	};
+}
+
+static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
+{
+	unsigned char **noptable = find_nop_table();
+	u8 **ptr;
+
+	for (ptr = start; ptr < end; ptr++) {
+		if (*ptr < text)
+			continue;
+		if (*ptr > text_end)
+			continue;
+		**ptr = noptable[1][0];
+	};
+}
+
+struct smp_alt_module {
+	/* what is this ??? */
+	struct module	*mod;
+	char		*name;
+
+	/* ptrs to lock prefixes */
+	u8		**locks;
+	u8		**locks_end;
+
+	/* .text segment, needed to avoid patching init code ;) */
+	u8		*text;
+	u8		*text_end;
+
+	struct list_head next;
+};
+static LIST_HEAD(smp_alt_modules);
+static DEFINE_SPINLOCK(smp_alt);
+
+static int smp_alt_once = 0;
+static int __init bootonly(char *str)
+{
+	smp_alt_once = 1;
+	return 1;
+}
+__setup("smp-alt-boot", bootonly);
+
+void alternatives_smp_module_add(struct module *mod, char *name,
+				 void *locks, void *locks_end,
+				 void *text,  void *text_end)
+{
+	struct smp_alt_module *smp;
+	unsigned long flags;
+
+	if (smp_alt_once) {
+		if (boot_cpu_has(X86_FEATURE_UP))
+			alternatives_smp_unlock(locks, locks_end,
+						text, text_end);
+		return;
+	}
+
+	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
+	if (NULL == smp)
+		return; /* we'll run the (safe but slow) SMP code then ... */
+
+	smp->mod	= mod;
+	smp->name	= name;
+	smp->locks	= locks;
+	smp->locks_end	= locks_end;
+	smp->text	= text;
+	smp->text_end	= text_end;
+	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
+		__FUNCTION__, smp->locks, smp->locks_end,
+		smp->text, smp->text_end, smp->name);
+
+	spin_lock_irqsave(&smp_alt, flags);
+	list_add_tail(&smp->next, &smp_alt_modules);
+	if (boot_cpu_has(X86_FEATURE_UP))
+		alternatives_smp_unlock(smp->locks, smp->locks_end,
+					smp->text, smp->text_end);
+	spin_unlock_irqrestore(&smp_alt, flags);
+}
+
+void alternatives_smp_module_del(struct module *mod)
+{
+	struct smp_alt_module *item;
+	unsigned long flags;
+
+	if (smp_alt_once)
+		return;
+
+	spin_lock_irqsave(&smp_alt, flags);
+	list_for_each_entry(item, &smp_alt_modules, next) {
+		if (mod != item->mod)
+			continue;
+		list_del(&item->next);
+		spin_unlock_irqrestore(&smp_alt, flags);
+		DPRINTK("%s: %s\n", __FUNCTION__, item->name);
+		kfree(item);
+		return;
+	}
+	spin_unlock_irqrestore(&smp_alt, flags);
+}
+
+void alternatives_smp_switch(int smp)
+{
+	struct smp_alt_module *mod;
+	unsigned long flags;
+
+	if (smp_alt_once)
+		return;
+	BUG_ON(!smp && (num_online_cpus() > 1));
+
+	spin_lock_irqsave(&smp_alt, flags);
+	if (smp) {
+		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
+		clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
+		clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+		alternatives_smp_apply(__smp_alt_instructions,
+				       __smp_alt_instructions_end);
+		list_for_each_entry(mod, &smp_alt_modules, next)
+			alternatives_smp_lock(mod->locks, mod->locks_end,
+					      mod->text, mod->text_end);
+	} else {
+		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
+		set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
+		set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+		apply_alternatives(__smp_alt_instructions,
+				   __smp_alt_instructions_end);
+		list_for_each_entry(mod, &smp_alt_modules, next)
+			alternatives_smp_unlock(mod->locks, mod->locks_end,
+						mod->text, mod->text_end);
+	}
+	spin_unlock_irqrestore(&smp_alt, flags);
+}
+
+void __init alternative_instructions(void)
+{
+	apply_alternatives(__alt_instructions, __alt_instructions_end);
+
+	/* switch to patch-once-at-boottime-only mode and free the
+	 * tables in case we know the number of CPUs will never ever
+	 * change */
+#ifdef CONFIG_HOTPLUG_CPU
+	if (num_possible_cpus() < 2)
+		smp_alt_once = 1;
+#else
+	smp_alt_once = 1;
+#endif
+
+	if (smp_alt_once) {
+		if (1 == num_possible_cpus()) {
+			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
+			set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
+			set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+			apply_alternatives(__smp_alt_instructions,
+					   __smp_alt_instructions_end);
+			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
+						_text, _etext);
+		}
+		free_init_pages("SMP alternatives",
+				(unsigned long)__smp_alt_begin,
+				(unsigned long)__smp_alt_end);
+	} else {
+		alternatives_smp_save(__smp_alt_instructions,
+				      __smp_alt_instructions_end);
+		alternatives_smp_module_add(NULL, "core kernel",
+					    __smp_locks, __smp_locks_end,
+					    _text, _etext);
+		alternatives_smp_switch(0);
+	}
+}

+ 1 - 0
arch/i386/kernel/apic.c

@@ -38,6 +38,7 @@
 #include <asm/i8253.h>
 
 #include <mach_apic.h>
+#include <mach_apicdef.h>
 #include <mach_ipi.h>
 
 #include "io_ports.h"

+ 1 - 0
arch/i386/kernel/cpu/centaur.c

@@ -4,6 +4,7 @@
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/e820.h>
+#include <asm/mtrr.h>
 #include "cpu.h"
 
 #ifdef CONFIG_X86_OOSTORE

+ 30 - 17
arch/i386/kernel/cpu/common.c

@@ -25,9 +25,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
 DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
 EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
 
-static int cachesize_override __devinitdata = -1;
-static int disable_x86_fxsr __devinitdata = 0;
-static int disable_x86_serial_nr __devinitdata = 1;
+static int cachesize_override __cpuinitdata = -1;
+static int disable_x86_fxsr __cpuinitdata;
+static int disable_x86_serial_nr __cpuinitdata = 1;
+static int disable_x86_sep __cpuinitdata;
 
 struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
 
@@ -59,7 +60,7 @@ static int __init cachesize_setup(char *str)
 }
 __setup("cachesize=", cachesize_setup);
 
-int __devinit get_model_name(struct cpuinfo_x86 *c)
+int __cpuinit get_model_name(struct cpuinfo_x86 *c)
 {
 	unsigned int *v;
 	char *p, *q;
@@ -89,7 +90,7 @@ int __devinit get_model_name(struct cpuinfo_x86 *c)
 }
 
 
-void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
+void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 {
 	unsigned int n, dummy, ecx, edx, l2size;
 
@@ -130,7 +131,7 @@ void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
 /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
 
 /* Look up CPU names by table lookup. */
-static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
+static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
 {
 	struct cpu_model_info *info;
 
@@ -151,7 +152,7 @@ static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
 }
 
 
-static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
 {
 	char *v = c->x86_vendor_id;
 	int i;
@@ -187,6 +188,14 @@ static int __init x86_fxsr_setup(char * s)
 __setup("nofxsr", x86_fxsr_setup);
 
 
+static int __init x86_sep_setup(char * s)
+{
+	disable_x86_sep = 1;
+	return 1;
+}
+__setup("nosep", x86_sep_setup);
+
+
 /* Standard macro to see if a specific flag is changeable */
 static inline int flag_is_changeable_p(u32 flag)
 {
@@ -210,7 +219,7 @@ static inline int flag_is_changeable_p(u32 flag)
 
 
 /* Probe for the CPUID instruction */
-static int __devinit have_cpuid_p(void)
+static int __cpuinit have_cpuid_p(void)
 {
 	return flag_is_changeable_p(X86_EFLAGS_ID);
 }
@@ -254,7 +263,7 @@ static void __init early_cpu_detect(void)
 	}
 }
 
-void __devinit generic_identify(struct cpuinfo_x86 * c)
+void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 {
 	u32 tfms, xlvl;
 	int junk;
@@ -307,7 +316,7 @@ void __devinit generic_identify(struct cpuinfo_x86 * c)
 #endif
 }
 
-static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 {
 	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
 		/* Disable processor serial number */
@@ -335,7 +344,7 @@ __setup("serialnumber", x86_serial_nr_setup);
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
-void __devinit identify_cpu(struct cpuinfo_x86 *c)
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 {
 	int i;
 
@@ -405,6 +414,10 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 		clear_bit(X86_FEATURE_XMM, c->x86_capability);
 	}
 
+	/* SEP disabled? */
+	if (disable_x86_sep)
+		clear_bit(X86_FEATURE_SEP, c->x86_capability);
+
 	if (disable_pse)
 		clear_bit(X86_FEATURE_PSE, c->x86_capability);
 
@@ -417,7 +430,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 		else
 			/* Last resort... */
 			sprintf(c->x86_model_id, "%02x/%02x",
-				c->x86_vendor, c->x86_model);
+				c->x86, c->x86_model);
 	}
 
 	/* Now the feature flags better reflect actual CPU features! */
@@ -453,7 +466,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 }
 
 #ifdef CONFIG_X86_HT
-void __devinit detect_ht(struct cpuinfo_x86 *c)
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 {
 	u32 	eax, ebx, ecx, edx;
 	int 	index_msb, core_bits;
@@ -500,7 +513,7 @@ void __devinit detect_ht(struct cpuinfo_x86 *c)
 }
 #endif
 
-void __devinit print_cpu_info(struct cpuinfo_x86 *c)
+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 {
 	char *vendor = NULL;
 
@@ -523,7 +536,7 @@ void __devinit print_cpu_info(struct cpuinfo_x86 *c)
 		printk("\n");
 }
 
-cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE;
+cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
 
 /* This is hacky. :)
  * We're emulating future behavior.
@@ -570,7 +583,7 @@ void __init early_cpu_init(void)
  * and IDT. We reload them nevertheless, this function acts as a
  * 'CPU state barrier', nothing should get across.
  */
-void __devinit cpu_init(void)
+void __cpuinit cpu_init(void)
 {
 	int cpu = smp_processor_id();
 	struct tss_struct * t = &per_cpu(init_tss, cpu);
@@ -670,7 +683,7 @@ void __devinit cpu_init(void)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-void __devinit cpu_uninit(void)
+void __cpuinit cpu_uninit(void)
 {
 	int cpu = raw_smp_processor_id();
 	cpu_clear(cpu, cpu_initialized);

+ 1 - 3
arch/i386/kernel/cpu/cpufreq/powernow-k8.c

@@ -1145,9 +1145,7 @@ static int __cpuinit powernowk8_init(void)
 {
 	unsigned int i, supported_cpus = 0;
 
-	for (i=0; i<NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_cpu(i) {
 		if (check_supported_cpu(i))
 			supported_cpus++;
 	}

+ 6 - 6
arch/i386/kernel/cpu/intel.c

@@ -29,7 +29,7 @@ extern int trap_init_f00f_bug(void);
 struct movsl_mask movsl_mask __read_mostly;
 #endif
 
-void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
+void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c)
 {
 	if (c->x86_vendor != X86_VENDOR_INTEL)
 		return;
@@ -44,7 +44,7 @@ void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
  *	This is called before we do cpu ident work
  */
  
-int __devinit ppro_with_ram_bug(void)
+int __cpuinit ppro_with_ram_bug(void)
 {
 	/* Uses data from early_cpu_detect now */
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -62,7 +62,7 @@ int __devinit ppro_with_ram_bug(void)
  * P4 Xeon errata 037 workaround.
  * Hardware prefetcher may cause stale data to be loaded into the cache.
  */
-static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
+static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
 {
 	unsigned long lo, hi;
 
@@ -81,7 +81,7 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
 /*
  * find out the number of processor cores on the die
  */
-static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
+static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c)
 {
 	unsigned int eax, ebx, ecx, edx;
 
@@ -96,7 +96,7 @@ static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
 		return 1;
 }
 
-static void __devinit init_intel(struct cpuinfo_x86 *c)
+static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 {
 	unsigned int l2 = 0;
 	char *p = NULL;
@@ -205,7 +205,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
 	return size;
 }
 
-static struct cpu_dev intel_cpu_dev __devinitdata = {
+static struct cpu_dev intel_cpu_dev __cpuinitdata = {
 	.c_vendor	= "Intel",
 	.c_ident 	= { "GenuineIntel" },
 	.c_models = {

+ 2 - 2
arch/i386/kernel/cpu/intel_cacheinfo.c

@@ -174,7 +174,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 
-	if (c->cpuid_level > 4) {
+	if (c->cpuid_level > 3) {
 		static int is_initialized;
 
 		if (is_initialized == 0) {
@@ -330,7 +330,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 		}
 	}
 }
-static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
+static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 {
 	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 	int sibling;

+ 1 - 1
arch/i386/kernel/cpu/proc.c

@@ -40,7 +40,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		/* Other (Linux-defined) */
 		"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
 		NULL, NULL, NULL, NULL,
-		"constant_tsc", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		"constant_tsc", "up", NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 

+ 1 - 1
arch/i386/kernel/crash.c

@@ -105,7 +105,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
 		return 1;
 	local_irq_disable();
 
-	if (!user_mode(regs)) {
+	if (!user_mode_vm(regs)) {
 		crash_fixup_ss_esp(&fixed_regs, regs);
 		regs = &fixed_regs;
 	}

+ 4 - 0
arch/i386/kernel/entry.S

@@ -226,6 +226,10 @@ ENTRY(system_call)
 	pushl %eax			# save orig_eax
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
+	testl $TF_MASK,EFLAGS(%esp)
+	jz no_singlestep
+	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
+no_singlestep:
 					# system call tracing in operation / emulation
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
 	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)

+ 1 - 4
arch/i386/kernel/head.S

@@ -450,7 +450,6 @@ int_msg:
 
 .globl boot_gdt_descr
 .globl idt_descr
-.globl cpu_gdt_descr
 
 	ALIGN
 # early boot GDT descriptor (must use 1:1 address mapping)
@@ -470,8 +469,6 @@ cpu_gdt_descr:
 	.word GDT_ENTRIES*8-1
 	.long cpu_gdt_table
 
-	.fill NR_CPUS-1,8,0		# space for the other GDT descriptors
-
 /*
  * The boot_gdt_table must mirror the equivalent in setup.S and is
  * used only for booting.
@@ -485,7 +482,7 @@ ENTRY(boot_gdt_table)
 /*
  * The Global Descriptor Table contains 28 quadwords, per-CPU.
  */
-	.align PAGE_SIZE_asm
+	.align L1_CACHE_BYTES
 ENTRY(cpu_gdt_table)
 	.quad 0x0000000000000000	/* NULL descriptor */
 	.quad 0x0000000000000000	/* 0x0b reserved */

+ 11 - 14
arch/i386/kernel/io_apic.c

@@ -351,8 +351,8 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
 {
 	int i, j;
 	Dprintk("Rotating IRQs among CPUs.\n");
-	for (i = 0; i < NR_CPUS; i++) {
-		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
+	for_each_online_cpu(i) {
+		for (j = 0; j < NR_IRQS; j++) {
 			if (!irq_desc[j].action)
 				continue;
 			/* Is it a significant load ?  */
@@ -381,7 +381,7 @@ static void do_irq_balance(void)
 	unsigned long imbalance = 0;
 	cpumask_t allowed_mask, target_cpu_mask, tmp;
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_cpu(i) {
 		int package_index;
 		CPU_IRQ(i) = 0;
 		if (!cpu_online(i))
@@ -422,9 +422,7 @@ static void do_irq_balance(void)
 		}
 	}
 	/* Find the least loaded processor package */
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		if (i != CPU_TO_PACKAGEINDEX(i))
 			continue;
 		if (min_cpu_irq > CPU_IRQ(i)) {
@@ -441,9 +439,7 @@ tryanothercpu:
 	 */
 	tmp_cpu_irq = 0;
 	tmp_loaded = -1;
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		if (i != CPU_TO_PACKAGEINDEX(i))
 			continue;
 		if (max_cpu_irq <= CPU_IRQ(i)) 
@@ -619,9 +615,7 @@ static int __init balanced_irq_init(void)
 	if (smp_num_siblings > 1 && !cpus_empty(tmp))
 		physical_balance = 1;
 
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!cpu_online(i))
-			continue;
+	for_each_online_cpu(i) {
 		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
 		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
 		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
@@ -638,9 +632,11 @@ static int __init balanced_irq_init(void)
 	else 
 		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
 failed:
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_cpu(i) {
 		kfree(irq_cpu_data[i].irq_delta);
+		irq_cpu_data[i].irq_delta = NULL;
 		kfree(irq_cpu_data[i].last_irq);
+		irq_cpu_data[i].last_irq = NULL;
 	}
 	return 0;
 }
@@ -1761,7 +1757,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
 	 * Don't check I/O APIC IDs for xAPIC systems.  They have
 	 * no meaning without the serial APIC bus.
 	 */
-	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15))
+	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
 		return;
 	/*
 	 * This is broken; anything with a real cpu count has to

+ 2 - 2
arch/i386/kernel/kprobes.c

@@ -84,9 +84,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-	down(&kprobe_mutex);
+	mutex_lock(&kprobe_mutex);
 	free_insn_slot(p->ainsn.insn);
-	up(&kprobe_mutex);
+	mutex_unlock(&kprobe_mutex);
 }
 
 static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)

+ 22 - 10
arch/i386/kernel/module.c

@@ -104,26 +104,38 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
 	return -ENOEXEC;
 }
 
-extern void apply_alternatives(void *start, void *end); 
-
 int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 {
-	const Elf_Shdr *s;
+	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
-	/* look for .altinstructions to patch */ 
 	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 
-		void *seg; 		
-		if (strcmp(".altinstructions", secstrings + s->sh_name))
-			continue;
-		seg = (void *)s->sh_addr; 
-		apply_alternatives(seg, seg + s->sh_size); 
-	} 	
+		if (!strcmp(".text", secstrings + s->sh_name))
+			text = s;
+		if (!strcmp(".altinstructions", secstrings + s->sh_name))
+			alt = s;
+		if (!strcmp(".smp_locks", secstrings + s->sh_name))
+			locks= s;
+	}
+
+	if (alt) {
+		/* patch .altinstructions */
+		void *aseg = (void *)alt->sh_addr;
+		apply_alternatives(aseg, aseg + alt->sh_size);
+	}
+	if (locks && text) {
+		void *lseg = (void *)locks->sh_addr;
+		void *tseg = (void *)text->sh_addr;
+		alternatives_smp_module_add(me, me->name,
+					    lseg, lseg + locks->sh_size,
+					    tseg, tseg + text->sh_size);
+	}
 	return 0;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
+	alternatives_smp_module_del(mod);
 }

+ 4 - 3
arch/i386/kernel/mpparse.c

@@ -828,6 +828,8 @@ void __init find_smp_config (void)
 		smp_scan_config(address, 0x400);
 }
 
+int es7000_plat;
+
 /* --------------------------------------------------------------------------
                             ACPI-based MP Configuration
    -------------------------------------------------------------------------- */
@@ -935,7 +937,8 @@ void __init mp_register_ioapic (
 	mp_ioapics[idx].mpc_apicaddr = address;
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15))
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		&& !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
 		tmpid = io_apic_get_unique_id(idx, id);
 	else
 		tmpid = id;
@@ -1011,8 +1014,6 @@ void __init mp_override_legacy_irq (
 	return;
 }
 
-int es7000_plat;
-
 void __init mp_config_acpi_legacy_irqs (void)
 {
 	struct mpc_config_intsrc intsrc;

+ 3 - 3
arch/i386/kernel/nmi.c

@@ -143,7 +143,7 @@ static int __init check_nmi_watchdog(void)
 	local_irq_enable();
 	mdelay((10*1000)/nmi_hz); // wait 10 ticks
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_cpu(cpu) {
 #ifdef CONFIG_SMP
 		/* Check cpu_callin_map here because that is set
 		   after the timer is started. */
@@ -510,7 +510,7 @@ void touch_nmi_watchdog (void)
 	 * Just reset the alert counters, (other CPUs might be
 	 * spinning on locks we hold):
 	 */
-	for (i = 0; i < NR_CPUS; i++)
+	for_each_cpu(i)
 		alert_counter[i] = 0;
 
 	/*
@@ -543,7 +543,7 @@ void nmi_watchdog_tick (struct pt_regs * regs)
 			/*
 			 * die_nmi will return ONLY if NOTIFY_STOP happens..
 			 */
-			die_nmi(regs, "NMI Watchdog detected LOCKUP");
+			die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
 	} else {
 		last_irq_sums[cpu] = sum;
 		alert_counter[cpu] = 0;

+ 1 - 1
arch/i386/kernel/process.c

@@ -295,7 +295,7 @@ void show_regs(struct pt_regs * regs)
 	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
 	print_symbol("EIP is at %s\n", regs->eip);
 
-	if (user_mode(regs))
+	if (user_mode_vm(regs))
 		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
 	printk(" EFLAGS: %08lx    %s  (%s %.*s)\n",
 	       regs->eflags, print_tainted(), system_utsname.release,

+ 2 - 2
arch/i386/kernel/ptrace.c

@@ -34,10 +34,10 @@
 
 /*
  * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9).
+ * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9).
  * Also masks reserved bits (31-22, 15, 5, 3, 1).
  */
-#define FLAG_MASK 0x00054dd5
+#define FLAG_MASK 0x00050dd5
 
 /* set's the trap flag. */
 #define TRAP_FLAG 0x100

+ 4 - 4
arch/i386/kernel/semaphore.c

@@ -110,11 +110,11 @@ asm(
 ".align	4\n"
 ".globl	__write_lock_failed\n"
 "__write_lock_failed:\n\t"
-	LOCK "addl	$" RW_LOCK_BIAS_STR ",(%eax)\n"
+	LOCK_PREFIX "addl	$" RW_LOCK_BIAS_STR ",(%eax)\n"
 "1:	rep; nop\n\t"
 	"cmpl	$" RW_LOCK_BIAS_STR ",(%eax)\n\t"
 	"jne	1b\n\t"
-	LOCK "subl	$" RW_LOCK_BIAS_STR ",(%eax)\n\t"
+	LOCK_PREFIX "subl	$" RW_LOCK_BIAS_STR ",(%eax)\n\t"
 	"jnz	__write_lock_failed\n\t"
 	"ret"
 );
@@ -124,11 +124,11 @@ asm(
 ".align	4\n"
 ".globl	__read_lock_failed\n"
 "__read_lock_failed:\n\t"
-	LOCK "incl	(%eax)\n"
+	LOCK_PREFIX "incl	(%eax)\n"
 "1:	rep; nop\n\t"
 	"cmpl	$1,(%eax)\n\t"
 	"js	1b\n\t"
-	LOCK "decl	(%eax)\n\t"
+	LOCK_PREFIX "decl	(%eax)\n\t"
 	"js	__read_lock_failed\n\t"
 	"ret"
 );

+ 10 - 108
arch/i386/kernel/setup.c

@@ -1377,101 +1377,6 @@ static void __init register_memory(void)
 		pci_mem_start, gapstart, gapsize);
 }
 
-/* Use inline assembly to define this because the nops are defined 
-   as inline assembly strings in the include files and we cannot 
-   get them easily into strings. */
-asm("\t.data\nintelnops: " 
-    GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
-    GENERIC_NOP7 GENERIC_NOP8); 
-asm("\t.data\nk8nops: " 
-    K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
-    K8_NOP7 K8_NOP8); 
-asm("\t.data\nk7nops: " 
-    K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
-    K7_NOP7 K7_NOP8); 
-    
-extern unsigned char intelnops[], k8nops[], k7nops[];
-static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 
-     NULL,
-     intelnops,
-     intelnops + 1,
-     intelnops + 1 + 2,
-     intelnops + 1 + 2 + 3,
-     intelnops + 1 + 2 + 3 + 4,
-     intelnops + 1 + 2 + 3 + 4 + 5,
-     intelnops + 1 + 2 + 3 + 4 + 5 + 6,
-     intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-}; 
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
-     NULL,
-     k8nops,
-     k8nops + 1,
-     k8nops + 1 + 2,
-     k8nops + 1 + 2 + 3,
-     k8nops + 1 + 2 + 3 + 4,
-     k8nops + 1 + 2 + 3 + 4 + 5,
-     k8nops + 1 + 2 + 3 + 4 + 5 + 6,
-     k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-}; 
-static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 
-     NULL,
-     k7nops,
-     k7nops + 1,
-     k7nops + 1 + 2,
-     k7nops + 1 + 2 + 3,
-     k7nops + 1 + 2 + 3 + 4,
-     k7nops + 1 + 2 + 3 + 4 + 5,
-     k7nops + 1 + 2 + 3 + 4 + 5 + 6,
-     k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-}; 
-static struct nop { 
-     int cpuid; 
-     unsigned char **noptable; 
-} noptypes[] = { 
-     { X86_FEATURE_K8, k8_nops }, 
-     { X86_FEATURE_K7, k7_nops }, 
-     { -1, NULL }
-}; 
-
-/* Replace instructions with better alternatives for this CPU type.
-
-   This runs before SMP is initialized to avoid SMP problems with
-   self modifying code. This implies that assymetric systems where
-   APs have less capabilities than the boot processor are not handled. 
-   Tough. Make sure you disable such features by hand. */ 
-void apply_alternatives(void *start, void *end) 
-{ 
-	struct alt_instr *a; 
-	int diff, i, k;
-        unsigned char **noptable = intel_nops; 
-	for (i = 0; noptypes[i].cpuid >= 0; i++) { 
-		if (boot_cpu_has(noptypes[i].cpuid)) { 
-			noptable = noptypes[i].noptable;
-			break;
-		}
-	} 
-	for (a = start; (void *)a < end; a++) { 
-		if (!boot_cpu_has(a->cpuid))
-			continue;
-		BUG_ON(a->replacementlen > a->instrlen); 
-		memcpy(a->instr, a->replacement, a->replacementlen); 
-		diff = a->instrlen - a->replacementlen; 
-		/* Pad the rest with nops */
-		for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
-			k = diff;
-			if (k > ASM_NOP_MAX)
-				k = ASM_NOP_MAX;
-			memcpy(a->instr + i, noptable[k], k); 
-		} 
-	}
-} 
-
-void __init alternative_instructions(void)
-{
-	extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-	apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
 static char * __init machine_specific_memory_setup(void);
 
 #ifdef CONFIG_MCA
@@ -1554,6 +1459,16 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_cmdline_early(cmdline_p);
 
+#ifdef CONFIG_EARLY_PRINTK
+	{
+		char *s = strstr(*cmdline_p, "earlyprintk=");
+		if (s) {
+			setup_early_printk(strchr(s, '=') + 1);
+			printk("early console enabled\n");
+		}
+	}
+#endif
+
 	max_low_pfn = setup_memory();
 
 	/*
@@ -1578,19 +1493,6 @@ void __init setup_arch(char **cmdline_p)
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
 
-#ifdef CONFIG_EARLY_PRINTK
-	{
-		char *s = strstr(*cmdline_p, "earlyprintk=");
-		if (s) {
-			extern void setup_early_printk(char *);
-
-			setup_early_printk(strchr(s, '=') + 1);
-			printk("early console enabled\n");
-		}
-	}
-#endif
-
-
 	dmi_scan_machine();
 
 #ifdef CONFIG_X86_GENERICARCH

+ 2 - 5
arch/i386/kernel/signal.c

@@ -123,7 +123,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
 	  err |= __get_user(tmp, &sc->seg);				\
 	  loadsegment(seg,tmp); }
 
-#define	FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \
+#define	FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_RF |		 \
+			 X86_EFLAGS_OF | X86_EFLAGS_DF |		 \
 			 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
 			 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
 
@@ -582,9 +583,6 @@ static void fastcall do_signal(struct pt_regs *regs)
 	if (!user_mode(regs))
 		return;
 
-	if (try_to_freeze())
-		goto no_signal;
-
 	if (test_thread_flag(TIF_RESTORE_SIGMASK))
 		oldset = &current->saved_sigmask;
 	else
@@ -613,7 +611,6 @@ static void fastcall do_signal(struct pt_regs *regs)
 		return;
 	}
 
-no_signal:
 	/* Did we come from a system call? */
 	if (regs->orig_eax >= 0) {
 		/* Restart the system call - no handlers present */

+ 3 - 0
arch/i386/kernel/smpboot.c

@@ -899,6 +899,7 @@ static int __devinit do_boot_cpu(int apicid, int cpu)
 	unsigned short nmi_high = 0, nmi_low = 0;
 
 	++cpucount;
+	alternatives_smp_switch(1);
 
 	/*
 	 * We can't use kernel_thread since we must avoid to
@@ -1368,6 +1369,8 @@ void __cpu_die(unsigned int cpu)
 		/* They ack this in play_dead by setting CPU_DEAD */
 		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
 			printk ("CPU %d is now offline\n", cpu);
+			if (1 == num_online_cpus())
+				alternatives_smp_switch(0);
 			return;
 		}
 		msleep(100);

+ 9 - 0
arch/i386/kernel/topology.c

@@ -41,6 +41,15 @@ int arch_register_cpu(int num){
 		parent = &node_devices[node].node;
 #endif /* CONFIG_NUMA */
 
+	/*
+	 * CPU0 cannot be offlined due to several
+	 * restrictions and assumptions in kernel. This basically
+	 * doesnt add a control file, one cannot attempt to offline
+	 * BSP.
+	 */
+	if (!num)
+		cpu_devices[num].cpu.no_control = 1;
+
 	return register_cpu(&cpu_devices[num].cpu, num, parent);
 }
 

+ 40 - 17
arch/i386/kernel/traps.c

@@ -99,6 +99,8 @@ int register_die_notifier(struct notifier_block *nb)
 {
 	int err = 0;
 	unsigned long flags;
+
+	vmalloc_sync_all();
 	spin_lock_irqsave(&die_notifier_lock, flags);
 	err = notifier_chain_register(&i386die_chain, nb);
 	spin_unlock_irqrestore(&die_notifier_lock, flags);
@@ -112,12 +114,30 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
 		p < (void *)tinfo + THREAD_SIZE - 3;
 }
 
-static void print_addr_and_symbol(unsigned long addr, char *log_lvl)
+/*
+ * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line.
+ */
+static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl,
+					int printed)
 {
-	printk(log_lvl);
+	if (!printed)
+		printk(log_lvl);
+
+#if CONFIG_STACK_BACKTRACE_COLS == 1
 	printk(" [<%08lx>] ", addr);
+#else
+	printk(" <%08lx> ", addr);
+#endif
 	print_symbol("%s", addr);
-	printk("\n");
+
+	printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS;
+
+	if (printed)
+		printk("  ");
+	else
+		printk("\n");
+
+	return printed;
 }
 
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
@@ -125,20 +145,24 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 				char *log_lvl)
 {
 	unsigned long addr;
+	int printed = 0; /* nr of entries already printed on current line */
 
 #ifdef	CONFIG_FRAME_POINTER
 	while (valid_stack_ptr(tinfo, (void *)ebp)) {
 		addr = *(unsigned long *)(ebp + 4);
-		print_addr_and_symbol(addr, log_lvl);
+		printed = print_addr_and_symbol(addr, log_lvl, printed);
 		ebp = *(unsigned long *)ebp;
 	}
 #else
 	while (valid_stack_ptr(tinfo, stack)) {
 		addr = *stack++;
 		if (__kernel_text_address(addr))
-			print_addr_and_symbol(addr, log_lvl);
+			printed = print_addr_and_symbol(addr, log_lvl, printed);
 	}
 #endif
+	if (printed)
+		printk("\n");
+
 	return ebp;
 }
 
@@ -166,8 +190,7 @@ static void show_trace_log_lvl(struct task_struct *task,
 		stack = (unsigned long*)context->previous_esp;
 		if (!stack)
 			break;
-		printk(log_lvl);
-		printk(" =======================\n");
+		printk("%s =======================\n", log_lvl);
 	}
 }
 
@@ -194,21 +217,17 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
 	for(i = 0; i < kstack_depth_to_print; i++) {
 		if (kstack_end(stack))
 			break;
-		if (i && ((i % 8) == 0)) {
-			printk("\n");
-			printk(log_lvl);
-			printk("       ");
-		}
+		if (i && ((i % 8) == 0))
+			printk("\n%s       ", log_lvl);
 		printk("%08lx ", *stack++);
 	}
-	printk("\n");
-	printk(log_lvl);
-	printk("Call Trace:\n");
+	printk("\n%sCall Trace:\n", log_lvl);
 	show_trace_log_lvl(task, esp, log_lvl);
 }
 
 void show_stack(struct task_struct *task, unsigned long *esp)
 {
+	printk("       ");
 	show_stack_log_lvl(task, esp, "");
 }
 
@@ -233,7 +252,7 @@ void show_registers(struct pt_regs *regs)
 
 	esp = (unsigned long) (&regs->esp);
 	savesegment(ss, ss);
-	if (user_mode(regs)) {
+	if (user_mode_vm(regs)) {
 		in_kernel = 0;
 		esp = regs->esp;
 		ss = regs->xss & 0xffff;
@@ -333,6 +352,8 @@ void die(const char * str, struct pt_regs * regs, long err)
 	static int die_counter;
 	unsigned long flags;
 
+	oops_enter();
+
 	if (die.lock_owner != raw_smp_processor_id()) {
 		console_verbose();
 		spin_lock_irqsave(&die.lock, flags);
@@ -385,6 +406,7 @@ void die(const char * str, struct pt_regs * regs, long err)
 		ssleep(5);
 		panic("Fatal exception");
 	}
+	oops_exit();
 	do_exit(SIGSEGV);
 }
 
@@ -623,7 +645,7 @@ void die_nmi (struct pt_regs *regs, const char *msg)
 	/* If we are in kernel we are probably nested up pretty bad
 	 * and might aswell get out now while we still can.
 	*/
-	if (!user_mode(regs)) {
+	if (!user_mode_vm(regs)) {
 		current->thread.trap_no = 2;
 		crash_kexec(regs);
 	}
@@ -694,6 +716,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
 
 void set_nmi_callback(nmi_callback_t callback)
 {
+	vmalloc_sync_all();
 	rcu_assign_pointer(nmi_callback, callback);
 }
 EXPORT_SYMBOL_GPL(set_nmi_callback);

+ 20 - 0
arch/i386/kernel/vmlinux.lds.S

@@ -68,6 +68,26 @@ SECTIONS
 	*(.data.init_task)
   }
 
+  /* might get freed after init */
+  . = ALIGN(4096);
+  __smp_alt_begin = .;
+  __smp_alt_instructions = .;
+  .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
+	*(.smp_altinstructions)
+  }
+  __smp_alt_instructions_end = .;
+  . = ALIGN(4);
+  __smp_locks = .;
+  .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+	*(.smp_locks)
+  }
+  __smp_locks_end = .;
+  .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
+	*(.smp_altinstr_replacement)
+  }
+  . = ALIGN(4096);
+  __smp_alt_end = .;
+
   /* will be freed after init */
   . = ALIGN(4096);		/* Init code and data */
   __init_begin = .;

+ 3 - 0
arch/i386/kernel/vsyscall-sysenter.S

@@ -21,6 +21,9 @@
  * instruction clobbers %esp, the user's %esp won't even survive entry
  * into the kernel. We store %esp in %ebp. Code in entry.S must fetch
  * arg6 from the stack.
+ *
+ * You can not use this vsyscall for the clone() syscall because the
+ * three dwords on the parent stack do not get copied to the child.
  */
 	.text
 	.globl __kernel_vsyscall

+ 4 - 1
arch/i386/mach-es7000/es7000.h

@@ -83,6 +83,7 @@ struct es7000_oem_table {
 	struct psai psai;
 };
 
+#ifdef CONFIG_ACPI
 struct acpi_table_sdt {
 	unsigned long pa;
 	unsigned long count;
@@ -99,6 +100,9 @@ struct oem_table {
 	u32 OEMTableSize;
 };
 
+extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
+#endif
+
 struct mip_reg {
 	unsigned long long off_0;
 	unsigned long long off_8;
@@ -114,7 +118,6 @@ struct mip_reg {
 #define	MIP_FUNC(VALUE) 	(VALUE & 0xff)
 
 extern int parse_unisys_oem (char *oemptr);
-extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
 extern void setup_unisys(void);
 extern int es7000_start_cpu(int cpu, unsigned long eip);
 extern void es7000_sw_apic(void);

+ 2 - 4
arch/i386/mach-es7000/es7000plat.c

@@ -51,8 +51,6 @@ struct mip_reg		*host_reg;
 int 			mip_port;
 unsigned long		mip_addr, host_addr;
 
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI)
-
 /*
  * GSI override for ES7000 platforms.
  */
@@ -76,8 +74,6 @@ es7000_rename_gsi(int ioapic, int gsi)
 	return gsi;
 }
 
-#endif	/* (CONFIG_X86_IO_APIC) && (CONFIG_ACPI) */
-
 void __init
 setup_unisys(void)
 {
@@ -160,6 +156,7 @@ parse_unisys_oem (char *oemptr)
 	return es7000_plat;
 }
 
+#ifdef CONFIG_ACPI
 int __init
 find_unisys_acpi_oem_table(unsigned long *oem_addr)
 {
@@ -212,6 +209,7 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr)
 	}
 	return -1;
 }
+#endif
 
 static void
 es7000_spin(int n)

+ 138 - 72
arch/i386/mm/fault.c

@@ -214,6 +214,68 @@ static noinline void force_sig_info_fault(int si_signo, int si_code,
 
 fastcall void do_invalid_op(struct pt_regs *, unsigned long);
 
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
+	unsigned index = pgd_index(address);
+	pgd_t *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+
+	pgd += index;
+	pgd_k = init_mm.pgd + index;
+
+	if (!pgd_present(*pgd_k))
+		return NULL;
+
+	/*
+	 * set_pgd(pgd, *pgd_k); here would be useless on PAE
+	 * and redundant with the set_pmd() on non-PAE. As would
+	 * set_pud.
+	 */
+
+	pud = pud_offset(pgd, address);
+	pud_k = pud_offset(pgd_k, address);
+	if (!pud_present(*pud_k))
+		return NULL;
+
+	pmd = pmd_offset(pud, address);
+	pmd_k = pmd_offset(pud_k, address);
+	if (!pmd_present(*pmd_k))
+		return NULL;
+	if (!pmd_present(*pmd))
+		set_pmd(pmd, *pmd_k);
+	else
+		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+	return pmd_k;
+}
+
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ *
+ * This assumes no large pages in there.
+ */
+static inline int vmalloc_fault(unsigned long address)
+{
+	unsigned long pgd_paddr;
+	pmd_t *pmd_k;
+	pte_t *pte_k;
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 *
+	 * Do _not_ use "current" here. We might be inside
+	 * an interrupt in the middle of a task switch..
+	 */
+	pgd_paddr = read_cr3();
+	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+	if (!pmd_k)
+		return -1;
+	pte_k = pte_offset_kernel(pmd_k, address);
+	if (!pte_present(*pte_k))
+		return -1;
+	return 0;
+}
+
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
@@ -223,6 +285,8 @@ fastcall void do_invalid_op(struct pt_regs *, unsigned long);
  *	bit 0 == 0 means no page found, 1 means protection fault
  *	bit 1 == 0 means read, 1 means write
  *	bit 2 == 0 means kernel, 1 means user-mode
+ *	bit 3 == 1 means use of reserved bit detected
+ *	bit 4 == 1 means fault was an instruction fetch
  */
 fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 				      unsigned long error_code)
@@ -237,13 +301,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 	/* get the address */
         address = read_cr2();
 
-	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
-					SIGSEGV) == NOTIFY_STOP)
-		return;
-	/* It's safe to allow irq's after cr2 has been saved */
-	if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
-		local_irq_enable();
-
 	tsk = current;
 
 	si_code = SEGV_MAPERR;
@@ -259,17 +316,29 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 	 *
 	 * This verifies that the fault happens in kernel space
 	 * (error_code & 4) == 0, and that the fault was not a
-	 * protection error (error_code & 1) == 0.
+	 * protection error (error_code & 9) == 0.
 	 */
-	if (unlikely(address >= TASK_SIZE)) { 
-		if (!(error_code & 5))
-			goto vmalloc_fault;
-		/* 
+	if (unlikely(address >= TASK_SIZE)) {
+		if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
+			return;
+		if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+						SIGSEGV) == NOTIFY_STOP)
+			return;
+		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.
 		 */
 		goto bad_area_nosemaphore;
-	} 
+	}
+
+	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+					SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
+	   fault has been handled. */
+	if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
+		local_irq_enable();
 
 	mm = tsk->mm;
 
@@ -440,24 +509,31 @@ no_context:
 
 	bust_spinlocks(1);
 
-#ifdef CONFIG_X86_PAE
-	if (error_code & 16) {
-		pte_t *pte = lookup_address(address);
+	if (oops_may_print()) {
+	#ifdef CONFIG_X86_PAE
+		if (error_code & 16) {
+			pte_t *pte = lookup_address(address);
 
-		if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
-			printk(KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", current->uid);
+			if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
+				printk(KERN_CRIT "kernel tried to execute "
+					"NX-protected page - exploit attempt? "
+					"(uid: %d)\n", current->uid);
+		}
+	#endif
+		if (address < PAGE_SIZE)
+			printk(KERN_ALERT "BUG: unable to handle kernel NULL "
+					"pointer dereference");
+		else
+			printk(KERN_ALERT "BUG: unable to handle kernel paging"
+					" request");
+		printk(" at virtual address %08lx\n",address);
+		printk(KERN_ALERT " printing eip:\n");
+		printk("%08lx\n", regs->eip);
 	}
-#endif
-	if (address < PAGE_SIZE)
-		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
-	else
-		printk(KERN_ALERT "Unable to handle kernel paging request");
-	printk(" at virtual address %08lx\n",address);
-	printk(KERN_ALERT " printing eip:\n");
-	printk("%08lx\n", regs->eip);
 	page = read_cr3();
 	page = ((unsigned long *) __va(page))[address >> 22];
-	printk(KERN_ALERT "*pde = %08lx\n", page);
+	if (oops_may_print())
+		printk(KERN_ALERT "*pde = %08lx\n", page);
 	/*
 	 * We must not directly access the pte in the highpte
 	 * case, the page table might be allocated in highmem.
@@ -465,7 +541,7 @@ no_context:
 	 * it's allocated already.
 	 */
 #ifndef CONFIG_HIGHPTE
-	if (page & 1) {
+	if ((page & 1) && oops_may_print()) {
 		page &= PAGE_MASK;
 		address &= 0x003ff000;
 		page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
@@ -510,51 +586,41 @@ do_sigbus:
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 14;
 	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
-	return;
-
-vmalloc_fault:
-	{
-		/*
-		 * Synchronize this task's top level page-table
-		 * with the 'reference' page table.
-		 *
-		 * Do _not_ use "tsk" here. We might be inside
-		 * an interrupt in the middle of a task switch..
-		 */
-		int index = pgd_index(address);
-		unsigned long pgd_paddr;
-		pgd_t *pgd, *pgd_k;
-		pud_t *pud, *pud_k;
-		pmd_t *pmd, *pmd_k;
-		pte_t *pte_k;
-
-		pgd_paddr = read_cr3();
-		pgd = index + (pgd_t *)__va(pgd_paddr);
-		pgd_k = init_mm.pgd + index;
-
-		if (!pgd_present(*pgd_k))
-			goto no_context;
-
-		/*
-		 * set_pgd(pgd, *pgd_k); here would be useless on PAE
-		 * and redundant with the set_pmd() on non-PAE. As would
-		 * set_pud.
-		 */
+}
 
-		pud = pud_offset(pgd, address);
-		pud_k = pud_offset(pgd_k, address);
-		if (!pud_present(*pud_k))
-			goto no_context;
-		
-		pmd = pmd_offset(pud, address);
-		pmd_k = pmd_offset(pud_k, address);
-		if (!pmd_present(*pmd_k))
-			goto no_context;
-		set_pmd(pmd, *pmd_k);
+#ifndef CONFIG_X86_PAE
+void vmalloc_sync_all(void)
+{
+	/*
+	 * Note that races in the updates of insync and start aren't
+	 * problematic: insync can only get set bits added, and updates to
+	 * start are only improving performance (without affecting correctness
+	 * if undone).
+	 */
+	static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+	static unsigned long start = TASK_SIZE;
+	unsigned long address;
 
-		pte_k = pte_offset_kernel(pmd_k, address);
-		if (!pte_present(*pte_k))
-			goto no_context;
-		return;
+	BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
+	for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
+		if (!test_bit(pgd_index(address), insync)) {
+			unsigned long flags;
+			struct page *page;
+
+			spin_lock_irqsave(&pgd_lock, flags);
+			for (page = pgd_list; page; page =
+					(struct page *)page->index)
+				if (!vmalloc_sync_one(page_address(page),
+								address)) {
+					BUG_ON(page != pgd_list);
+					break;
+				}
+			spin_unlock_irqrestore(&pgd_lock, flags);
+			if (!page)
+				set_bit(pgd_index(address), insync);
+		}
+		if (address == start && test_bit(pgd_index(address), insync))
+			start = address + PGDIR_SIZE;
 	}
 }
+#endif

+ 22 - 23
arch/i386/mm/init.c

@@ -720,21 +720,6 @@ static int noinline do_test_wp_bit(void)
 	return flag;
 }
 
-void free_initmem(void)
-{
-	unsigned long addr;
-
-	addr = (unsigned long)(&__init_begin);
-	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(addr));
-		init_page_count(virt_to_page(addr));
-		memset((void *)addr, 0xcc, PAGE_SIZE);
-		free_page(addr);
-		totalram_pages++;
-	}
-	printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10);
-}
-
 #ifdef CONFIG_DEBUG_RODATA
 
 extern char __start_rodata, __end_rodata;
@@ -758,17 +743,31 @@ void mark_rodata_ro(void)
 }
 #endif
 
+void free_init_pages(char *what, unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		memset((void *)addr, 0xcc, PAGE_SIZE);
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+}
+
+void free_initmem(void)
+{
+	free_init_pages("unused kernel memory",
+			(unsigned long)(&__init_begin),
+			(unsigned long)(&__init_end));
+}
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	if (start < end)
-		printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
-	for (; start < end; start += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(start));
-		init_page_count(virt_to_page(start));
-		free_page(start);
-		totalram_pages++;
-	}
+	free_init_pages("initrd memory", start, end);
 }
 #endif
+

+ 2 - 5
arch/i386/oprofile/nmi_int.c

@@ -122,7 +122,7 @@ static void nmi_save_registers(void * dummy)
 static void free_msrs(void)
 {
 	int i;
-	for (i = 0; i < NR_CPUS; ++i) {
+	for_each_cpu(i) {
 		kfree(cpu_msrs[i].counters);
 		cpu_msrs[i].counters = NULL;
 		kfree(cpu_msrs[i].controls);
@@ -138,10 +138,7 @@ static int allocate_msrs(void)
 	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
 
 	int i;
-	for (i = 0; i < NR_CPUS; ++i) {
-		if (!cpu_online(i))
-			continue;
-
+	for_each_online_cpu(i) {
 		cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
 		if (!cpu_msrs[i].counters) {
 			success = 0;

+ 1 - 6
arch/ia64/hp/sim/simserial.c

@@ -46,11 +46,6 @@
 #define KEYBOARD_INTR	3	/* must match with simulator! */
 
 #define NR_PORTS	1	/* only one port for now */
-#define SERIAL_INLINE	1
-
-#ifdef SERIAL_INLINE
-#define _INLINE_ inline
-#endif
 
 #define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT)
 
@@ -237,7 +232,7 @@ static void rs_put_char(struct tty_struct *tty, unsigned char ch)
 	local_irq_restore(flags);
 }
 
-static _INLINE_ void transmit_chars(struct async_struct *info, int *intr_done)
+static void transmit_chars(struct async_struct *info, int *intr_done)
 {
 	int count;
 	unsigned long flags;

+ 4 - 6
arch/m32r/kernel/irq.c

@@ -37,9 +37,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i == 0) {
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 	}
 
@@ -52,9 +51,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, "  %s", action->name);

+ 1 - 3
arch/m68k/bvme6000/rtc.c

@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/mc146818rtc.h>	/* For struct rtc_time and ioctls, etc */
 #include <linux/smp_lock.h>
+#include <linux/bcd.h>
 #include <asm/bvme6000hw.h>
 
 #include <asm/io.h>
@@ -32,9 +33,6 @@
  *	ioctls.
  */
 
-#define BCD2BIN(val) (((val)&15) + ((val)>>4)*10)
-#define BIN2BCD(val) ((((val)/10)<<4) + (val)%10)
-
 static unsigned char days_in_mo[] =
 {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
 

+ 4 - 6
arch/mips/kernel/irq.c

@@ -68,9 +68,8 @@ int show_interrupts(struct seq_file *p, void *v)
 
 	if (i == 0) {
 		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
+		for_each_online_cpu(j)
+			seq_printf(p, "CPU%d       ",j);
 		seq_putc(p, '\n');
 	}
 
@@ -83,9 +82,8 @@ int show_interrupts(struct seq_file *p, void *v)
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %14s", irq_desc[i].handler->typename);
 		seq_printf(p, "  %s", action->name);

+ 2 - 2
arch/mips/kernel/smp.c

@@ -167,8 +167,8 @@ int smp_call_function (void (*func) (void *info), void *info, int retry,
 	mb();
 
 	/* Send a message to all other CPUs and wait for them to respond */
-	for (i = 0; i < NR_CPUS; i++)
-		if (cpu_online(i) && i != cpu)
+	for_each_online_cpu(i)
+		if (i != cpu)
 			core_send_ipi(i, SMP_CALL_FUNCTION);
 
 	/* Wait for response */

+ 1 - 4
arch/mips/sgi-ip27/ip27-irq.c

@@ -88,12 +88,9 @@ static inline int find_level(cpuid_t *cpunum, int irq)
 {
 	int cpu, i;
 
-	for (cpu = 0; cpu <= NR_CPUS; cpu++) {
+	for_each_online_cpu(cpu) {
 		struct slice_data *si = cpu_data[cpu].data;
 
-		if (!cpu_online(cpu))
-			continue;
-
 		for (i = BASE_PCI_IRQ; i < LEVELS_PER_SLICE; i++)
 			if (si->level_to_irq[i] == irq) {
 				*cpunum = cpu;

+ 10 - 15
arch/parisc/kernel/smp.c

@@ -298,8 +298,8 @@ send_IPI_allbutself(enum ipi_message_type op)
 {
 	int i;
 	
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_online(i) && i != smp_processor_id())
+	for_each_online_cpu(i) {
+		if (i != smp_processor_id())
 			send_IPI_single(i, op);
 	}
 }
@@ -643,14 +643,13 @@ int sys_cpus(int argc, char **argv)
 	if ( argc == 1 ){
 	
 #ifdef DUMP_MORE_STATE
-		for(i=0; i<NR_CPUS; i++) {
+		for_each_online_cpu(i) {
 			int cpus_per_line = 4;
-			if(cpu_online(i)) {
-				if (j++ % cpus_per_line)
-					printk(" %3d",i);
-				else
-					printk("\n %3d",i);
-			}
+
+			if (j++ % cpus_per_line)
+				printk(" %3d",i);
+			else
+				printk("\n %3d",i);
 		}
 		printk("\n"); 
 #else
@@ -659,9 +658,7 @@ int sys_cpus(int argc, char **argv)
 	} else if((argc==2) && !(strcmp(argv[1],"-l"))) {
 		printk("\nCPUSTATE  TASK CPUNUM CPUID HARDCPU(HPA)\n");
 #ifdef DUMP_MORE_STATE
-		for(i=0;i<NR_CPUS;i++) {
-			if (!cpu_online(i))
-				continue;
+		for_each_online_cpu(i) {
 			if (cpu_data[i].cpuid != NO_PROC_ID) {
 				switch(cpu_data[i].state) {
 					case STATE_RENDEZVOUS:
@@ -695,9 +692,7 @@ int sys_cpus(int argc, char **argv)
 	} else if ((argc==2) && !(strcmp(argv[1],"-s"))) { 
 #ifdef DUMP_MORE_STATE
      		printk("\nCPUSTATE   CPUID\n");
-		for (i=0;i<NR_CPUS;i++) {
-			if (!cpu_online(i))
-				continue;
+		for_each_online_cpu(i) {
 			if (cpu_data[i].cpuid != NO_PROC_ID) {
 				switch(cpu_data[i].state) {
 					case STATE_RENDEZVOUS:

+ 32 - 6
arch/powerpc/Kconfig

@@ -127,6 +127,12 @@ config PPC_83xx
 	select 83xx
 	select PPC_FPU
 
+config PPC_85xx
+	bool "Freescale 85xx"
+	select E500
+	select FSL_SOC
+	select 85xx
+
 config 40x
 	bool "AMCC 40x"
 
@@ -139,8 +145,6 @@ config 8xx
 config E200
 	bool "Freescale e200"
 
-config E500
-	bool "Freescale e500"
 endchoice
 
 config POWER4_ONLY
@@ -168,6 +172,13 @@ config 6xx
 config 83xx
 	bool
 
+# this is temp to handle compat with arch=ppc
+config 85xx
+	bool
+
+config E500
+	bool
+
 config PPC_FPU
 	bool
 	default y if PPC64
@@ -217,6 +228,7 @@ config ALTIVEC
 config SPE
 	bool "SPE Support"
 	depends on E200 || E500
+	default y
 	---help---
 	  This option enables kernel support for the Signal Processing
 	  Extensions (SPE) to the PowerPC processor. The kernel currently
@@ -238,6 +250,21 @@ config PPC_STD_MMU_32
 	def_bool y
 	depends on PPC_STD_MMU && PPC32
 
+config VIRT_CPU_ACCOUNTING
+	bool "Deterministic task and CPU time accounting"
+	depends on PPC64
+	default y
+	help
+	  Select this option to enable more accurate task and CPU time
+	  accounting.  This is done by reading a CPU counter on each
+	  kernel entry and exit and on transitions within the kernel
+	  between system, softirq and hardirq state, so there is a
+	  small performance impact.  This also enables accounting of
+	  stolen time on logically-partitioned systems running on
+	  IBM POWER5-based machines.
+
+	  If in doubt, say Y here.
+
 config SMP
 	depends on PPC_STD_MMU
 	bool "Symmetric multi-processing support"
@@ -734,13 +761,12 @@ config GENERIC_ISA_DMA
 
 config PPC_I8259
 	bool
-	default y if 85xx
 	default n
 
 config PPC_INDIRECT_PCI
 	bool
 	depends on PCI
-	default y if 40x || 44x || 85xx
+	default y if 40x || 44x
 	default n
 
 config EISA
@@ -757,8 +783,8 @@ config MCA
 	bool
 
 config PCI
-	bool "PCI support" if 40x || CPM2 || PPC_83xx || 85xx || PPC_MPC52xx || (EMBEDDED && PPC_ISERIES)
-	default y if !40x && !CPM2 && !8xx && !APUS && !PPC_83xx && !85xx
+	bool "PCI support" if 40x || CPM2 || PPC_83xx || PPC_85xx || PPC_MPC52xx || (EMBEDDED && PPC_ISERIES)
+	default y if !40x && !CPM2 && !8xx && !APUS && !PPC_83xx && !PPC_85xx
 	default PCI_PERMEDIA if !4xx && !CPM2 && !8xx && APUS
 	default PCI_QSPAN if !4xx && !CPM2 && 8xx
 	help

+ 1 - 1
arch/powerpc/Makefile

@@ -148,7 +148,7 @@ all: $(KBUILD_IMAGE)
 
 CPPFLAGS_vmlinux.lds	:= -Upowerpc
 
-BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm uImage
+BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm uImage vmlinux.bin
 
 .PHONY: $(BOOT_TARGETS)
 

+ 0 - 2
arch/powerpc/boot/install.sh

@@ -1,7 +1,5 @@
 #!/bin/sh
 #
-# arch/ppc64/boot/install.sh
-#
 # This file is subject to the terms and conditions of the GNU General Public
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.

+ 2 - 2
arch/powerpc/boot/main.c

@@ -152,7 +152,7 @@ static int is_elf64(void *hdr)
 	elf64ph = (Elf64_Phdr *)((unsigned long)elf64 +
 				 (unsigned long)elf64->e_phoff);
 	for (i = 0; i < (unsigned int)elf64->e_phnum; i++, elf64ph++)
-		if (elf64ph->p_type == PT_LOAD && elf64ph->p_offset != 0)
+		if (elf64ph->p_type == PT_LOAD)
 			break;
 	if (i >= (unsigned int)elf64->e_phnum)
 		return 0;
@@ -193,7 +193,7 @@ static int is_elf32(void *hdr)
 	elf32 = (Elf32_Ehdr *)elfheader;
 	elf32ph = (Elf32_Phdr *) ((unsigned long)elf32 + elf32->e_phoff);
 	for (i = 0; i < elf32->e_phnum; i++, elf32ph++)
-		if (elf32ph->p_type == PT_LOAD && elf32ph->p_offset != 0)
+		if (elf32ph->p_type == PT_LOAD)
 			break;
 	if (i >= elf32->e_phnum)
 		return 0;

+ 721 - 0
arch/powerpc/configs/mpc8540_ads_defconfig

@@ -0,0 +1,721 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 
+# Sat Jan 14 15:57:54 2006
+#
+# CONFIG_PPC64 is not set
+CONFIG_PPC32=y
+CONFIG_PPC_MERGE=y
+CONFIG_MMU=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_PPC=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_GENERIC_NVRAM=y
+CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_PPC_OF=y
+CONFIG_PPC_UDBG_16550=y
+# CONFIG_GENERIC_TBSYNC is not set
+
+#
+# Processor support
+#
+# CONFIG_CLASSIC32 is not set
+# CONFIG_PPC_52xx is not set
+# CONFIG_PPC_82xx is not set
+# CONFIG_PPC_83xx is not set
+CONFIG_PPC_85xx=y
+# CONFIG_40x is not set
+# CONFIG_44x is not set
+# CONFIG_8xx is not set
+# CONFIG_E200 is not set
+CONFIG_85xx=y
+CONFIG_E500=y
+CONFIG_BOOKE=y
+CONFIG_FSL_BOOKE=y
+# CONFIG_PHYS_64BIT is not set
+CONFIG_SPE=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_EMBEDDED=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+CONFIG_SLAB=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+# CONFIG_SLOB is not set
+
+#
+# Loadable module support
+#
+# CONFIG_MODULES is not set
+
+#
+# Block layer
+#
+# CONFIG_LBD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+CONFIG_MPIC=y
+# CONFIG_WANT_EARLY_SERIAL is not set
+
+#
+# Platform support
+#
+CONFIG_MPC8540_ADS=y
+CONFIG_MPC8540=y
+CONFIG_PPC_INDIRECT_PCI_BE=y
+
+#
+# Kernel options
+#
+# CONFIG_HIGHMEM is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=y
+CONFIG_MATH_EMULATION=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_PROC_DEVICETREE=y
+# CONFIG_CMDLINE_BOOL is not set
+# CONFIG_PM is not set
+# CONFIG_SOFTWARE_SUSPEND is not set
+# CONFIG_SECCOMP is not set
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options
+#
+# CONFIG_PPC_I8259 is not set
+CONFIG_PPC_INDIRECT_PCI=y
+CONFIG_FSL_SOC=y
+# CONFIG_PCI is not set
+# CONFIG_PCI_DOMAINS is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PCI Hotplug Support
+#
+
+#
+# Advanced setup
+#
+# CONFIG_ADVANCED_OPTIONS is not set
+
+#
+# Default settings for advanced configuration options are used
+#
+CONFIG_HIGHMEM_START=0xfe000000
+CONFIG_LOWMEM_SIZE=0x30000000
+CONFIG_KERNEL_START=0xc0000000
+CONFIG_TASK_SIZE=0x80000000
+CONFIG_BOOT_LOAD=0x00800000
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+# CONFIG_IPV6 is not set
+# CONFIG_NETFILTER is not set
+
+#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+
+#
+# I2O device support
+#
+
+#
+# Macintosh device drivers
+#
+# CONFIG_WINDFARM is not set
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+
+#
+# PHY device support
+#
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_CICADA_PHY is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+
+#
+# Ethernet (1000 Mbit)
+#
+CONFIG_GIANFAR=y
+CONFIG_GFAR_NAPI=y
+
+#
+# Ethernet (10000 Mbit)
+#
+
+#
+# Token Ring devices
+#
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_NVRAM is not set
+CONFIG_GEN_RTC=y
+# CONFIG_GEN_RTC_X is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_AGP is not set
+# CONFIG_RAW_DRIVER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Hardware Monitoring support
+#
+CONFIG_HWMON=y
+# CONFIG_HWMON_VID is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia Capabilities Port drivers
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+# CONFIG_USB_ARCH_HAS_HCD is not set
+# CONFIG_USB_ARCH_HAS_OHCI is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# InfiniBand support
+#
+
+#
+# SN Devices
+#
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+# CONFIG_RELAYFS_FS is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+# CONFIG_NFS_V3 is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+
+#
+# Native Language Support
+#
+# CONFIG_NLS is not set
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+
+#
+# Instrumentation Support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+# CONFIG_MAGIC_SYSRQ is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+CONFIG_DEBUG_MUTEXES=y
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_DEBUGGER is not set
+# CONFIG_BDI_SWITCH is not set
+# CONFIG_BOOTX_TEXT is not set
+# CONFIG_PPC_EARLY_DEBUG_LPAR is not set
+# CONFIG_PPC_EARLY_DEBUG_G5 is not set
+# CONFIG_PPC_EARLY_DEBUG_RTAS is not set
+# CONFIG_PPC_EARLY_DEBUG_MAPLE is not set
+# CONFIG_PPC_EARLY_DEBUG_ISERIES is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Hardware crypto devices
+#

+ 3 - 0
arch/powerpc/kernel/asm-offsets.c

@@ -136,6 +136,9 @@ int main(void)
 	DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
 	DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
 	DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
+	DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
+	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
+	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
 
 	DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
 	DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));

+ 11 - 1
arch/powerpc/kernel/cputable.c

@@ -894,7 +894,7 @@ struct cpu_spec	cpu_specs[] = {
 		.platform		= "ppc405",
 	},
 	{	/* Xilinx Virtex-II Pro  */
-		.pvr_mask		= 0xffff0000,
+		.pvr_mask		= 0xfffff000,
 		.pvr_value		= 0x20010000,
 		.cpu_name		= "Virtex-II Pro",
 		.cpu_features		= CPU_FTRS_40X,
@@ -904,6 +904,16 @@ struct cpu_spec	cpu_specs[] = {
 		.dcache_bsize		= 32,
 		.platform		= "ppc405",
 	},
+	{	/* Xilinx Virtex-4 FX */
+		.pvr_mask		= 0xfffff000,
+		.pvr_value		= 0x20011000,
+		.cpu_name		= "Virtex-4 FX",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 |
+			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+	},
 	{	/* 405EP */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x51210000,

+ 6 - 5
arch/powerpc/kernel/entry_64.S

@@ -1,6 +1,4 @@
 /*
- *  arch/ppc64/kernel/entry.S
- *
  *  PowerPC version 
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
@@ -63,6 +61,7 @@ system_call_common:
 	std	r12,_MSR(r1)
 	std	r0,GPR0(r1)
 	std	r10,GPR1(r1)
+	ACCOUNT_CPU_USER_ENTRY(r10, r11)
 	std	r2,GPR2(r1)
 	std	r3,GPR3(r1)
 	std	r4,GPR4(r1)
@@ -170,8 +169,9 @@ syscall_error_cont:
 	stdcx.	r0,0,r1			/* to clear the reservation */
 	andi.	r6,r8,MSR_PR
 	ld	r4,_LINK(r1)
-	beq-	1f			/* only restore r13 if */
-	ld	r13,GPR13(r1)		/* returning to usermode */
+	beq-	1f
+	ACCOUNT_CPU_USER_EXIT(r11, r12)
+	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */
 1:	ld	r2,GPR2(r1)
 	li	r12,MSR_RI
 	andc	r11,r10,r12
@@ -322,7 +322,7 @@ _GLOBAL(ret_from_fork)
  * the fork code also.
  *
  * The code which creates the new task context is in 'copy_thread'
- * in arch/ppc64/kernel/process.c
+ * in arch/powerpc/kernel/process.c 
  */
 	.align	7
 _GLOBAL(_switch)
@@ -486,6 +486,7 @@ restore:
 	 * userspace
 	 */
 	beq	1f
+	ACCOUNT_CPU_USER_EXIT(r3, r4)
 	REST_GPR(13, r1)
 1:
 	ld	r3,_CTR(r1)

+ 0 - 25
arch/powerpc/kernel/firmware.c

@@ -18,28 +18,3 @@
 #include <asm/firmware.h>
 
 unsigned long ppc64_firmware_features;
-
-#ifdef CONFIG_PPC_PSERIES
-firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
-	{FW_FEATURE_PFT,		"hcall-pft"},
-	{FW_FEATURE_TCE,		"hcall-tce"},
-	{FW_FEATURE_SPRG0,		"hcall-sprg0"},
-	{FW_FEATURE_DABR,		"hcall-dabr"},
-	{FW_FEATURE_COPY,		"hcall-copy"},
-	{FW_FEATURE_ASR,		"hcall-asr"},
-	{FW_FEATURE_DEBUG,		"hcall-debug"},
-	{FW_FEATURE_PERF,		"hcall-perf"},
-	{FW_FEATURE_DUMP,		"hcall-dump"},
-	{FW_FEATURE_INTERRUPT,		"hcall-interrupt"},
-	{FW_FEATURE_MIGRATE,		"hcall-migrate"},
-	{FW_FEATURE_PERFMON,		"hcall-perfmon"},
-	{FW_FEATURE_CRQ,		"hcall-crq"},
-	{FW_FEATURE_VIO,		"hcall-vio"},
-	{FW_FEATURE_RDMA,		"hcall-rdma"},
-	{FW_FEATURE_LLAN,		"hcall-lLAN"},
-	{FW_FEATURE_BULK,		"hcall-bulk"},
-	{FW_FEATURE_XDABR,		"hcall-xdabr"},
-	{FW_FEATURE_MULTITCE,		"hcall-multi-tce"},
-	{FW_FEATURE_SPLPAR,		"hcall-splpar"},
-};
-#endif

+ 0 - 2
arch/powerpc/kernel/head_44x.S

@@ -1,6 +1,4 @@
 /*
- * arch/ppc/kernel/head_44x.S
- *
  * Kernel execution entry point code.
  *
  *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>

+ 9 - 2
arch/powerpc/kernel/head_64.S

@@ -1,6 +1,4 @@
 /*
- *  arch/ppc64/kernel/head.S
- *
  *  PowerPC version
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *
@@ -279,6 +277,7 @@ exception_marker:
 	std	r10,0(r1);		/* make stack chain pointer	*/ \
 	std	r0,GPR0(r1);		/* save r0 in stackframe	*/ \
 	std	r10,GPR1(r1);		/* save r1 in stackframe	*/ \
+	ACCOUNT_CPU_USER_ENTRY(r9, r10);				   \
 	std	r2,GPR2(r1);		/* save r2 in stackframe	*/ \
 	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe	*/ \
 	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe	*/ \
@@ -846,6 +845,14 @@ fast_exception_return:
 	ld	r11,_NIP(r1)
 	andi.	r3,r12,MSR_RI		/* check if RI is set */
 	beq-	unrecov_fer
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	andi.	r3,r12,MSR_PR
+	beq	2f
+	ACCOUNT_CPU_USER_EXIT(r3, r4)
+2:
+#endif
+
 	ld	r3,_CCR(r1)
 	ld	r4,_LINK(r1)
 	ld	r5,_CTR(r1)

+ 0 - 2
arch/powerpc/kernel/head_8xx.S

@@ -1,6 +1,4 @@
 /*
- *  arch/ppc/kernel/except_8xx.S
- *
  *  PowerPC version
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP

+ 363 - 0
arch/powerpc/kernel/head_booke.h

@@ -0,0 +1,363 @@
+#ifndef __HEAD_BOOKE_H__
+#define __HEAD_BOOKE_H__
+
+/*
+ * Macros used for common Book-e exception handling
+ */
+
+#define SET_IVOR(vector_number, vector_label)		\
+		li	r26,vector_label@l; 		\
+		mtspr	SPRN_IVOR##vector_number,r26;	\
+		sync
+
+#define NORMAL_EXCEPTION_PROLOG						     \
+	mtspr	SPRN_SPRG0,r10;		/* save two registers to work with */\
+	mtspr	SPRN_SPRG1,r11;						     \
+	mtspr	SPRN_SPRG4W,r1;						     \
+	mfcr	r10;			/* save CR in r10 for now	   */\
+	mfspr	r11,SPRN_SRR1;		/* check whether user or kernel    */\
+	andi.	r11,r11,MSR_PR;						     \
+	beq	1f;							     \
+	mfspr	r1,SPRN_SPRG3;		/* if from user, start at top of   */\
+	lwz	r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack   */\
+	addi	r1,r1,THREAD_SIZE;					     \
+1:	subi	r1,r1,INT_FRAME_SIZE;	/* Allocate an exception frame     */\
+	mr	r11,r1;							     \
+	stw	r10,_CCR(r11);          /* save various registers	   */\
+	stw	r12,GPR12(r11);						     \
+	stw	r9,GPR9(r11);						     \
+	mfspr	r10,SPRN_SPRG0;						     \
+	stw	r10,GPR10(r11);						     \
+	mfspr	r12,SPRN_SPRG1;						     \
+	stw	r12,GPR11(r11);						     \
+	mflr	r10;							     \
+	stw	r10,_LINK(r11);						     \
+	mfspr	r10,SPRN_SPRG4R;					     \
+	mfspr	r12,SPRN_SRR0;						     \
+	stw	r10,GPR1(r11);						     \
+	mfspr	r9,SPRN_SRR1;						     \
+	stw	r10,0(r11);						     \
+	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
+	stw	r0,GPR0(r11);						     \
+	SAVE_4GPRS(3, r11);						     \
+	SAVE_2GPRS(7, r11)
+
+/* To handle the additional exception priority levels on 40x and Book-E
+ * processors we allocate a 4k stack per additional priority level. The various
+ * head_xxx.S files allocate space (exception_stack_top) for each priority's
+ * stack times the number of CPUs
+ *
+ * On 40x critical is the only additional level
+ * On 44x/e500 we have critical and machine check
+ * On e200 we have critical and debug (machine check occurs via critical)
+ *
+ * Additionally we reserve a SPRG for each priority level so we can free up a
+ * GPR to use as the base for indirect access to the exception stacks.  This
+ * is necessary since the MMU is always on, for Book-E parts, and the stacks
+ * are offset from KERNELBASE.
+ *
+ */
+#define BOOKE_EXCEPTION_STACK_SIZE	(8192)
+
+/* CRIT_SPRG only used in critical exception handling */
+#define CRIT_SPRG	SPRN_SPRG2
+/* MCHECK_SPRG only used in machine check exception handling */
+#define MCHECK_SPRG	SPRN_SPRG6W
+
+#define MCHECK_STACK_TOP	(exception_stack_top - 4096)
+#define CRIT_STACK_TOP		(exception_stack_top)
+
+/* only on e200 for now */
+#define DEBUG_STACK_TOP		(exception_stack_top - 4096)
+#define DEBUG_SPRG		SPRN_SPRG6W
+
+#ifdef CONFIG_SMP
+#define BOOKE_LOAD_EXC_LEVEL_STACK(level)		\
+	mfspr	r8,SPRN_PIR;				\
+	mulli	r8,r8,BOOKE_EXCEPTION_STACK_SIZE;	\
+	neg	r8,r8;					\
+	addis	r8,r8,level##_STACK_TOP@ha;		\
+	addi	r8,r8,level##_STACK_TOP@l
+#else
+#define BOOKE_LOAD_EXC_LEVEL_STACK(level)		\
+	lis	r8,level##_STACK_TOP@h;			\
+	ori	r8,r8,level##_STACK_TOP@l
+#endif
+
+/*
+ * Exception prolog for critical/machine check exceptions.  This is a
+ * little different from the normal exception prolog above since a
+ * critical/machine check exception can potentially occur at any point
+ * during normal exception processing. Thus we cannot use the same SPRG
+ * registers as the normal prolog above. Instead we use a portion of the
+ * critical/machine check exception stack at low physical addresses.
+ */
+#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \
+	mtspr	exc_level##_SPRG,r8;					     \
+	BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
+	stw	r10,GPR10-INT_FRAME_SIZE(r8);				     \
+	stw	r11,GPR11-INT_FRAME_SIZE(r8);				     \
+	mfcr	r10;			/* save CR in r10 for now	   */\
+	mfspr	r11,exc_level_srr1;	/* check whether user or kernel    */\
+	andi.	r11,r11,MSR_PR;						     \
+	mr	r11,r8;							     \
+	mfspr	r8,exc_level##_SPRG;					     \
+	beq	1f;							     \
+	/* COMING FROM USER MODE */					     \
+	mfspr	r11,SPRN_SPRG3;		/* if from user, start at top of   */\
+	lwz	r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+	addi	r11,r11,THREAD_SIZE;					     \
+1:	subi	r11,r11,INT_FRAME_SIZE;	/* Allocate an exception frame     */\
+	stw	r10,_CCR(r11);          /* save various registers	   */\
+	stw	r12,GPR12(r11);						     \
+	stw	r9,GPR9(r11);						     \
+	mflr	r10;							     \
+	stw	r10,_LINK(r11);						     \
+	mfspr	r12,SPRN_DEAR;		/* save DEAR and ESR in the frame  */\
+	stw	r12,_DEAR(r11);		/* since they may have had stuff   */\
+	mfspr	r9,SPRN_ESR;		/* in them at the point where the  */\
+	stw	r9,_ESR(r11);		/* exception was taken		   */\
+	mfspr	r12,exc_level_srr0;					     \
+	stw	r1,GPR1(r11);						     \
+	mfspr	r9,exc_level_srr1;					     \
+	stw	r1,0(r11);						     \
+	mr	r1,r11;							     \
+	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
+	stw	r0,GPR0(r11);						     \
+	SAVE_4GPRS(3, r11);						     \
+	SAVE_2GPRS(7, r11)
+
+#define CRITICAL_EXCEPTION_PROLOG \
+		EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1)
+#define DEBUG_EXCEPTION_PROLOG \
+		EXC_LEVEL_EXCEPTION_PROLOG(DEBUG, SPRN_DSRR0, SPRN_DSRR1)
+#define MCHECK_EXCEPTION_PROLOG \
+		EXC_LEVEL_EXCEPTION_PROLOG(MCHECK, SPRN_MCSRR0, SPRN_MCSRR1)
+
+/*
+ * Exception vectors.
+ */
+#define	START_EXCEPTION(label)						     \
+        .align 5;              						     \
+label:
+
+#define FINISH_EXCEPTION(func)					\
+	bl	transfer_to_handler_full;			\
+	.long	func;						\
+	.long	ret_from_except_full
+
+#define EXCEPTION(n, label, hdlr, xfer)				\
+	START_EXCEPTION(label);					\
+	NORMAL_EXCEPTION_PROLOG;				\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
+	xfer(n, hdlr)
+
+#define CRITICAL_EXCEPTION(n, label, hdlr)			\
+	START_EXCEPTION(label);					\
+	CRITICAL_EXCEPTION_PROLOG;				\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
+	EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+			  NOCOPY, crit_transfer_to_handler, \
+			  ret_from_crit_exc)
+
+#define MCHECK_EXCEPTION(n, label, hdlr)			\
+	START_EXCEPTION(label);					\
+	MCHECK_EXCEPTION_PROLOG;				\
+	mfspr	r5,SPRN_ESR;					\
+	stw	r5,_ESR(r11);					\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
+	EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+			  NOCOPY, mcheck_transfer_to_handler,   \
+			  ret_from_mcheck_exc)
+
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret)	\
+	li	r10,trap;					\
+	stw	r10,_TRAP(r11);					\
+	lis	r10,msr@h;					\
+	ori	r10,r10,msr@l;					\
+	copyee(r10, r9);					\
+	bl	tfer;		 				\
+	.long	hdlr;						\
+	.long	ret
+
+#define COPY_EE(d, s)		rlwimi d,s,0,16,16
+#define NOCOPY(d, s)
+
+#define EXC_XFER_STD(n, hdlr)		\
+	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
+			  ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr)		\
+	EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
+			  ret_from_except)
+
+#define EXC_XFER_EE(n, hdlr)		\
+	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
+			  ret_from_except_full)
+
+#define EXC_XFER_EE_LITE(n, hdlr)	\
+	EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
+			  ret_from_except)
+
+/* Check for a single step debug exception while in an exception
+ * handler before state has been saved.  This is to catch the case
+ * where an instruction that we are trying to single step causes
+ * an exception (eg ITLB/DTLB miss) and thus the first instruction of
+ * the exception handler generates a single step debug exception.
+ *
+ * If we get a debug trap on the first instruction of an exception handler,
+ * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is
+ * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR).
+ * The exception handler was handling a non-critical interrupt, so it will
+ * save (and later restore) the MSR via SPRN_CSRR1, which will still have
+ * the MSR_DE bit set.
+ */
+#ifdef CONFIG_E200
+#define DEBUG_EXCEPTION							      \
+	START_EXCEPTION(Debug);						      \
+	DEBUG_EXCEPTION_PROLOG;						      \
+									      \
+	/*								      \
+	 * If there is a single step or branch-taken exception in an	      \
+	 * exception entry sequence, it was probably meant to apply to	      \
+	 * the code where the exception occurred (since exception entry	      \
+	 * doesn't turn off DE automatically).  We simulate the effect	      \
+	 * of turning off DE on entry to an exception handler by turning      \
+	 * off DE in the CSRR1 value and clearing the debug status.	      \
+	 */								      \
+	mfspr	r10,SPRN_DBSR;		/* check single-step/branch taken */  \
+	andis.	r10,r10,DBSR_IC@h;					      \
+	beq+	2f;							      \
+									      \
+	lis	r10,KERNELBASE@h;	/* check if exception in vectors */   \
+	ori	r10,r10,KERNELBASE@l;					      \
+	cmplw	r12,r10;						      \
+	blt+	2f;			/* addr below exception vectors */    \
+									      \
+	lis	r10,Debug@h;						      \
+	ori	r10,r10,Debug@l;					      \
+	cmplw	r12,r10;						      \
+	bgt+	2f;			/* addr above exception vectors */    \
+									      \
+	/* here it looks like we got an inappropriate debug exception. */     \
+1:	rlwinm	r9,r9,0,~MSR_DE;	/* clear DE in the CDRR1 value */     \
+	lis	r10,DBSR_IC@h;		/* clear the IC event */	      \
+	mtspr	SPRN_DBSR,r10;						      \
+	/* restore state and get out */					      \
+	lwz	r10,_CCR(r11);						      \
+	lwz	r0,GPR0(r11);						      \
+	lwz	r1,GPR1(r11);						      \
+	mtcrf	0x80,r10;						      \
+	mtspr	SPRN_DSRR0,r12;						      \
+	mtspr	SPRN_DSRR1,r9;						      \
+	lwz	r9,GPR9(r11);						      \
+	lwz	r12,GPR12(r11);						      \
+	mtspr	DEBUG_SPRG,r8;						      \
+	BOOKE_LOAD_EXC_LEVEL_STACK(DEBUG); /* r8 points to the debug stack */ \
+	lwz	r10,GPR10-INT_FRAME_SIZE(r8);				      \
+	lwz	r11,GPR11-INT_FRAME_SIZE(r8);				      \
+	mfspr	r8,DEBUG_SPRG;						      \
+									      \
+	RFDI;								      \
+	b	.;							      \
+									      \
+	/* continue normal handling for a critical exception... */	      \
+2:	mfspr	r4,SPRN_DBSR;						      \
+	addi	r3,r1,STACK_FRAME_OVERHEAD;				      \
+	EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc)
+#else
+#define DEBUG_EXCEPTION							      \
+	START_EXCEPTION(Debug);						      \
+	CRITICAL_EXCEPTION_PROLOG;					      \
+									      \
+	/*								      \
+	 * If there is a single step or branch-taken exception in an	      \
+	 * exception entry sequence, it was probably meant to apply to	      \
+	 * the code where the exception occurred (since exception entry	      \
+	 * doesn't turn off DE automatically).  We simulate the effect	      \
+	 * of turning off DE on entry to an exception handler by turning      \
+	 * off DE in the CSRR1 value and clearing the debug status.	      \
+	 */								      \
+	mfspr	r10,SPRN_DBSR;		/* check single-step/branch taken */  \
+	andis.	r10,r10,DBSR_IC@h;					      \
+	beq+	2f;							      \
+									      \
+	lis	r10,KERNELBASE@h;	/* check if exception in vectors */   \
+	ori	r10,r10,KERNELBASE@l;					      \
+	cmplw	r12,r10;						      \
+	blt+	2f;			/* addr below exception vectors */    \
+									      \
+	lis	r10,Debug@h;						      \
+	ori	r10,r10,Debug@l;					      \
+	cmplw	r12,r10;						      \
+	bgt+	2f;			/* addr above exception vectors */    \
+									      \
+	/* here it looks like we got an inappropriate debug exception. */     \
+1:	rlwinm	r9,r9,0,~MSR_DE;	/* clear DE in the CSRR1 value */     \
+	lis	r10,DBSR_IC@h;		/* clear the IC event */	      \
+	mtspr	SPRN_DBSR,r10;						      \
+	/* restore state and get out */					      \
+	lwz	r10,_CCR(r11);						      \
+	lwz	r0,GPR0(r11);						      \
+	lwz	r1,GPR1(r11);						      \
+	mtcrf	0x80,r10;						      \
+	mtspr	SPRN_CSRR0,r12;						      \
+	mtspr	SPRN_CSRR1,r9;						      \
+	lwz	r9,GPR9(r11);						      \
+	lwz	r12,GPR12(r11);						      \
+	mtspr	CRIT_SPRG,r8;						      \
+	BOOKE_LOAD_EXC_LEVEL_STACK(CRIT); /* r8 points to the debug stack */  \
+	lwz	r10,GPR10-INT_FRAME_SIZE(r8);				      \
+	lwz	r11,GPR11-INT_FRAME_SIZE(r8);				      \
+	mfspr	r8,CRIT_SPRG;						      \
+									      \
+	rfci;								      \
+	b	.;							      \
+									      \
+	/* continue normal handling for a critical exception... */	      \
+2:	mfspr	r4,SPRN_DBSR;						      \
+	addi	r3,r1,STACK_FRAME_OVERHEAD;				      \
+	EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+#endif
+
+#define INSTRUCTION_STORAGE_EXCEPTION					      \
+	START_EXCEPTION(InstructionStorage)				      \
+	NORMAL_EXCEPTION_PROLOG;					      \
+	mfspr	r5,SPRN_ESR;		/* Grab the ESR and save it */	      \
+	stw	r5,_ESR(r11);						      \
+	mr      r4,r12;                 /* Pass SRR0 as arg2 */		      \
+	li      r5,0;                   /* Pass zero as arg3 */		      \
+	EXC_XFER_EE_LITE(0x0400, handle_page_fault)
+
+#define ALIGNMENT_EXCEPTION						      \
+	START_EXCEPTION(Alignment)					      \
+	NORMAL_EXCEPTION_PROLOG;					      \
+	mfspr   r4,SPRN_DEAR;           /* Grab the DEAR and save it */	      \
+	stw     r4,_DEAR(r11);						      \
+	addi    r3,r1,STACK_FRAME_OVERHEAD;				      \
+	EXC_XFER_EE(0x0600, alignment_exception)
+
+#define PROGRAM_EXCEPTION						      \
+	START_EXCEPTION(Program)					      \
+	NORMAL_EXCEPTION_PROLOG;					      \
+	mfspr	r4,SPRN_ESR;		/* Grab the ESR and save it */	      \
+	stw	r4,_ESR(r11);						      \
+	addi	r3,r1,STACK_FRAME_OVERHEAD;				      \
+	EXC_XFER_STD(0x0700, program_check_exception)
+
+#define DECREMENTER_EXCEPTION						      \
+	START_EXCEPTION(Decrementer)					      \
+	NORMAL_EXCEPTION_PROLOG;					      \
+	lis     r0,TSR_DIS@h;           /* Setup the DEC interrupt mask */    \
+	mtspr   SPRN_TSR,r0;		/* Clear the DEC interrupt */	      \
+	addi    r3,r1,STACK_FRAME_OVERHEAD;				      \
+	EXC_XFER_LITE(0x0900, timer_interrupt)
+
+#define FP_UNAVAILABLE_EXCEPTION					      \
+	START_EXCEPTION(FloatingPointUnavailable)			      \
+	NORMAL_EXCEPTION_PROLOG;					      \
+	bne	load_up_fpu;		/* if from user, just load it up */   \
+	addi	r3,r1,STACK_FRAME_OVERHEAD;				      \
+	EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+
+#endif /* __HEAD_BOOKE_H__ */

+ 4 - 2
arch/powerpc/kernel/head_fsl_booke.S

@@ -1,6 +1,4 @@
 /*
- * arch/ppc/kernel/head_fsl_booke.S
- *
  * Kernel execution entry point code.
  *
  *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
@@ -316,6 +314,7 @@ skpinv:	addi	r6,r6,1				/* Increment */
 	 */
 	lis	r2,DBCR0_IDM@h
 	mtspr	SPRN_DBCR0,r2
+	isync
 	/* clear any residual debug events */
 	li	r2,-1
 	mtspr	SPRN_DBSR,r2
@@ -1002,12 +1001,15 @@ _GLOBAL(giveup_fpu)
 _GLOBAL(abort)
 	li	r13,0
         mtspr   SPRN_DBCR0,r13		/* disable all debug events */
+	isync
 	mfmsr	r13
 	ori	r13,r13,MSR_DE@l	/* Enable Debug Events */
 	mtmsr	r13
+	isync
         mfspr   r13,SPRN_DBCR0
         lis	r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h
         mtspr   SPRN_DBCR0,r13
+	isync
 
 _GLOBAL(set_context)
 

+ 0 - 2
arch/powerpc/kernel/iomap.c

@@ -1,6 +1,4 @@
 /*
- * arch/ppc64/kernel/iomap.c
- *
  * ppc64 "iomap" interface implementation.
  *
  * (C) Copyright 2004 Linus Torvalds

+ 0 - 1
arch/powerpc/kernel/iommu.c

@@ -1,5 +1,4 @@
 /*
- * arch/ppc64/kernel/iommu.c
  * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
  * 
  * Rewrite, cleanup, new allocation schemes, virtual merging: 

+ 24 - 13
arch/powerpc/kernel/irq.c

@@ -1,6 +1,4 @@
 /*
- *  arch/ppc/kernel/irq.c
- *
  *  Derived from arch/i386/kernel/irq.c
  *    Copyright (C) 1992 Linus Torvalds
  *  Adapted from arch/i386 by Gary Thomas
@@ -137,9 +135,8 @@ skip:
 #ifdef CONFIG_TAU_INT
 		if (tau_initialized){
 			seq_puts(p, "TAU: ");
-			for (j = 0; j < NR_CPUS; j++)
-				if (cpu_online(j))
-					seq_printf(p, "%10u ", tau_interrupts(j));
+			for_each_online_cpu(j)
+				seq_printf(p, "%10u ", tau_interrupts(j));
 			seq_puts(p, "  PowerPC             Thermal Assist (cpu temp)\n");
 		}
 #endif
@@ -371,6 +368,7 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
 	return NO_IRQ;
 
 }
+#endif /* CONFIG_PPC64 */
 
 #ifdef CONFIG_IRQSTACKS
 struct thread_info *softirq_ctx[NR_CPUS];
@@ -394,10 +392,24 @@ void irq_ctx_init(void)
 	}
 }
 
+static inline void do_softirq_onstack(void)
+{
+	struct thread_info *curtp, *irqtp;
+
+	curtp = current_thread_info();
+	irqtp = softirq_ctx[smp_processor_id()];
+	irqtp->task = curtp->task;
+	call_do_softirq(irqtp);
+	irqtp->task = NULL;
+}
+
+#else
+#define do_softirq_onstack()	__do_softirq()
+#endif /* CONFIG_IRQSTACKS */
+
 void do_softirq(void)
 {
 	unsigned long flags;
-	struct thread_info *curtp, *irqtp;
 
 	if (in_interrupt())
 		return;
@@ -405,19 +417,18 @@ void do_softirq(void)
 	local_irq_save(flags);
 
 	if (local_softirq_pending()) {
-		curtp = current_thread_info();
-		irqtp = softirq_ctx[smp_processor_id()];
-		irqtp->task = curtp->task;
-		call_do_softirq(irqtp);
-		irqtp->task = NULL;
+		account_system_vtime(current);
+		local_bh_disable();
+		do_softirq_onstack();
+		account_system_vtime(current);
+		__local_bh_enable();
 	}
 
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(do_softirq);
 
-#endif /* CONFIG_IRQSTACKS */
-
+#ifdef CONFIG_PPC64
 static int __init setup_noirqdistrib(char *str)
 {
 	distribute_irqs = 0;

+ 2 - 3
arch/powerpc/kernel/kprobes.c

@@ -1,6 +1,5 @@
 /*
  *  Kernel Probes (KProbes)
- *  arch/ppc64/kernel/kprobes.c
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -82,9 +81,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-	down(&kprobe_mutex);
+	mutex_lock(&kprobe_mutex);
 	free_insn_slot(p->ainsn.insn);
-	up(&kprobe_mutex);
+	mutex_unlock(&kprobe_mutex);
 }
 
 static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)

+ 1 - 4
arch/powerpc/kernel/of_device.c

@@ -147,15 +147,12 @@ postcore_initcall(of_bus_driver_init);
 
 int of_register_driver(struct of_platform_driver *drv)
 {
-	int count = 0;
-
 	/* initialize common driver fields */
 	drv->driver.name = drv->name;
 	drv->driver.bus = &of_platform_bus_type;
 
 	/* register with core */
-	count = driver_register(&drv->driver);
-	return count ? count : 1;
+	return driver_register(&drv->driver);
 }
 
 void of_unregister_driver(struct of_platform_driver *drv)

+ 0 - 1
arch/powerpc/kernel/pci_iommu.c

@@ -1,5 +1,4 @@
 /*
- * arch/ppc64/kernel/pci_iommu.c
  * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
  *
  * Rewrite, cleanup, new allocation schemes:

+ 0 - 1
arch/powerpc/kernel/ppc_ksyms.c

@@ -57,7 +57,6 @@ extern void machine_check_exception(struct pt_regs *regs);
 extern void alignment_exception(struct pt_regs *regs);
 extern void program_check_exception(struct pt_regs *regs);
 extern void single_step_exception(struct pt_regs *regs);
-extern int pmac_newworld;
 extern int sys_sigreturn(struct pt_regs *regs);
 
 EXPORT_SYMBOL(clear_pages);

+ 6 - 3
arch/powerpc/kernel/process.c

@@ -1,6 +1,4 @@
 /*
- *  arch/ppc/kernel/process.c
- *
  *  Derived from "arch/i386/kernel/process.c"
  *    Copyright (C) 1995  Linus Torvalds
  *
@@ -47,9 +45,9 @@
 #include <asm/mmu.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
+#include <asm/time.h>
 #ifdef CONFIG_PPC64
 #include <asm/firmware.h>
-#include <asm/time.h>
 #endif
 
 extern unsigned long _get_SP(void);
@@ -330,6 +328,11 @@ struct task_struct *__switch_to(struct task_struct *prev,
 #endif
 
 	local_irq_save(flags);
+
+	account_system_vtime(current);
+	account_process_vtime(current);
+	calculate_steal_time();
+
 	last = _switch(old_thread, new_thread);
 
 	local_irq_restore(flags);

+ 0 - 4
arch/powerpc/kernel/prom.c

@@ -829,10 +829,6 @@ void __init unflatten_device_tree(void)
 
 	/* Allocate memory for the expanded device tree */
 	mem = lmb_alloc(size + 4, __alignof__(struct device_node));
-	if (!mem) {
-		DBG("Couldn't allocate memory with lmb_alloc()!\n");
-		panic("Couldn't allocate memory with lmb_alloc()!\n");
-	}
 	mem = (unsigned long) __va(mem);
 
 	((u32 *)mem)[size / 4] = 0xdeadbeef;

+ 0 - 2
arch/powerpc/kernel/ptrace-common.h

@@ -1,6 +1,4 @@
 /*
- *  linux/arch/ppc64/kernel/ptrace-common.h
- *
  *    Copyright (c) 2002 Stephen Rothwell, IBM Coproration
  *    Extracted from ptrace.c and ptrace32.c
  *

+ 0 - 1
arch/powerpc/kernel/rtas-proc.c

@@ -1,5 +1,4 @@
 /*
- *   arch/ppc64/kernel/rtas-proc.c
  *   Copyright (C) 2000 Tilmann Bitterberg
  *   (tilmann@bitterberg.de)
  *

+ 0 - 2
arch/powerpc/kernel/rtas_pci.c

@@ -1,6 +1,4 @@
 /*
- * arch/ppc64/kernel/rtas_pci.c
- *
  * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
  * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
  *

+ 2 - 3
arch/powerpc/kernel/setup-common.c

@@ -162,9 +162,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 #if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
 		unsigned long bogosum = 0;
 		int i;
-		for (i = 0; i < NR_CPUS; ++i)
-			if (cpu_online(i))
-				bogosum += loops_per_jiffy;
+		for_each_online_cpu(i)
+			bogosum += loops_per_jiffy;
 		seq_printf(m, "total bogomips\t: %lu.%02lu\n",
 			   bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
 #endif /* CONFIG_SMP && CONFIG_PPC32 */

+ 2 - 3
arch/powerpc/kernel/setup_32.c

@@ -272,9 +272,8 @@ int __init ppc_init(void)
 	if ( ppc_md.progress ) ppc_md.progress("             ", 0xffff);
 
 	/* register CPU devices */
-	for (i = 0; i < NR_CPUS; i++)
-		if (cpu_possible(i))
-			register_cpu(&cpu_devices[i], i, NULL);
+	for_each_cpu(i)
+		register_cpu(&cpu_devices[i], i, NULL);
 
 	/* call platform init */
 	if (ppc_md.init != NULL) {

+ 0 - 2
arch/powerpc/kernel/setup_64.c

@@ -497,8 +497,6 @@ void __init setup_system(void)
 #endif
 	printk("-----------------------------------------------------\n");
 
-	mm_init_ppc64();
-
 	DBG(" <- setup_system()\n");
 }
 

+ 0 - 2
arch/powerpc/kernel/signal_64.c

@@ -1,6 +1,4 @@
 /*
- *  linux/arch/ppc64/kernel/signal.c
- *
  *  PowerPC version 
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *

+ 3 - 1
arch/powerpc/kernel/smp.c

@@ -541,7 +541,7 @@ int __devinit start_secondary(void *unused)
 		smp_ops->take_timebase();
 
 	if (system_state > SYSTEM_BOOTING)
-		per_cpu(last_jiffy, cpu) = get_tb();
+		snapshot_timebase();
 
 	spin_lock(&call_lock);
 	cpu_set(cpu, cpu_online_map);
@@ -573,6 +573,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 	set_cpus_allowed(current, old_mask);
 
+	snapshot_timebases();
+
 	dump_numa_cpu_topology();
 }
 

+ 239 - 2
arch/powerpc/kernel/time.c

@@ -51,6 +51,7 @@
 #include <linux/percpu.h>
 #include <linux/rtc.h>
 #include <linux/jiffies.h>
+#include <linux/posix-timers.h>
 
 #include <asm/io.h>
 #include <asm/processor.h>
@@ -98,6 +99,7 @@ unsigned long tb_ticks_per_jiffy;
 unsigned long tb_ticks_per_usec = 100; /* sane default */
 EXPORT_SYMBOL(tb_ticks_per_usec);
 unsigned long tb_ticks_per_sec;
+EXPORT_SYMBOL(tb_ticks_per_sec);	/* for cputime_t conversions */
 u64 tb_to_xs;
 unsigned tb_to_us;
 
@@ -135,6 +137,224 @@ unsigned long tb_last_stamp;
  */
 DEFINE_PER_CPU(unsigned long, last_jiffy);
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/*
+ * Factors for converting from cputime_t (timebase ticks) to
+ * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds).
+ * These are all stored as 0.64 fixed-point binary fractions.
+ */
+u64 __cputime_jiffies_factor;
+EXPORT_SYMBOL(__cputime_jiffies_factor);
+u64 __cputime_msec_factor;
+EXPORT_SYMBOL(__cputime_msec_factor);
+u64 __cputime_sec_factor;
+EXPORT_SYMBOL(__cputime_sec_factor);
+u64 __cputime_clockt_factor;
+EXPORT_SYMBOL(__cputime_clockt_factor);
+
+static void calc_cputime_factors(void)
+{
+	struct div_result res;
+
+	div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
+	__cputime_jiffies_factor = res.result_low;
+	div128_by_32(1000, 0, tb_ticks_per_sec, &res);
+	__cputime_msec_factor = res.result_low;
+	div128_by_32(1, 0, tb_ticks_per_sec, &res);
+	__cputime_sec_factor = res.result_low;
+	div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
+	__cputime_clockt_factor = res.result_low;
+}
+
+/*
+ * Read the PURR on systems that have it, otherwise the timebase.
+ */
+static u64 read_purr(void)
+{
+	if (cpu_has_feature(CPU_FTR_PURR))
+		return mfspr(SPRN_PURR);
+	return mftb();
+}
+
+/*
+ * Account time for a transition between system, hard irq
+ * or soft irq state.
+ */
+void account_system_vtime(struct task_struct *tsk)
+{
+	u64 now, delta;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	now = read_purr();
+	delta = now - get_paca()->startpurr;
+	get_paca()->startpurr = now;
+	if (!in_interrupt()) {
+		delta += get_paca()->system_time;
+		get_paca()->system_time = 0;
+	}
+	account_system_time(tsk, 0, delta);
+	local_irq_restore(flags);
+}
+
+/*
+ * Transfer the user and system times accumulated in the paca
+ * by the exception entry and exit code to the generic process
+ * user and system time records.
+ * Must be called with interrupts disabled.
+ */
+void account_process_vtime(struct task_struct *tsk)
+{
+	cputime_t utime;
+
+	utime = get_paca()->user_time;
+	get_paca()->user_time = 0;
+	account_user_time(tsk, utime);
+}
+
+static void account_process_time(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	account_process_vtime(current);
+	run_local_timers();
+	if (rcu_pending(cpu))
+		rcu_check_callbacks(cpu, user_mode(regs));
+	scheduler_tick();
+ 	run_posix_cpu_timers(current);
+}
+
+#ifdef CONFIG_PPC_SPLPAR
+/*
+ * Stuff for accounting stolen time.
+ */
+struct cpu_purr_data {
+	int	initialized;			/* thread is running */
+	u64	tb0;			/* timebase at origin time */
+	u64	purr0;			/* PURR at origin time */
+	u64	tb;			/* last TB value read */
+	u64	purr;			/* last PURR value read */
+	u64	stolen;			/* stolen time so far */
+	spinlock_t lock;
+};
+
+static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data);
+
+static void snapshot_tb_and_purr(void *data)
+{
+	struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data);
+
+	p->tb0 = mftb();
+	p->purr0 = mfspr(SPRN_PURR);
+	p->tb = p->tb0;
+	p->purr = 0;
+	wmb();
+	p->initialized = 1;
+}
+
+/*
+ * Called during boot when all cpus have come up.
+ */
+void snapshot_timebases(void)
+{
+	int cpu;
+
+	if (!cpu_has_feature(CPU_FTR_PURR))
+		return;
+	for_each_cpu(cpu)
+		spin_lock_init(&per_cpu(cpu_purr_data, cpu).lock);
+	on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1);
+}
+
+void calculate_steal_time(void)
+{
+	u64 tb, purr, t0;
+	s64 stolen;
+	struct cpu_purr_data *p0, *pme, *phim;
+	int cpu;
+
+	if (!cpu_has_feature(CPU_FTR_PURR))
+		return;
+	cpu = smp_processor_id();
+	pme = &per_cpu(cpu_purr_data, cpu);
+	if (!pme->initialized)
+		return;		/* this can happen in early boot */
+	p0 = &per_cpu(cpu_purr_data, cpu & ~1);
+	phim = &per_cpu(cpu_purr_data, cpu ^ 1);
+	spin_lock(&p0->lock);
+	tb = mftb();
+	purr = mfspr(SPRN_PURR) - pme->purr0;
+	if (!phim->initialized || !cpu_online(cpu ^ 1)) {
+		stolen = (tb - pme->tb) - (purr - pme->purr);
+	} else {
+		t0 = pme->tb0;
+		if (phim->tb0 < t0)
+			t0 = phim->tb0;
+		stolen = phim->tb - t0 - phim->purr - purr - p0->stolen;
+	}
+	if (stolen > 0) {
+		account_steal_time(current, stolen);
+		p0->stolen += stolen;
+	}
+	pme->tb = tb;
+	pme->purr = purr;
+	spin_unlock(&p0->lock);
+}
+
+/*
+ * Must be called before the cpu is added to the online map when
+ * a cpu is being brought up at runtime.
+ */
+static void snapshot_purr(void)
+{
+	int cpu;
+	u64 purr;
+	struct cpu_purr_data *p0, *pme, *phim;
+	unsigned long flags;
+
+	if (!cpu_has_feature(CPU_FTR_PURR))
+		return;
+	cpu = smp_processor_id();
+	pme = &per_cpu(cpu_purr_data, cpu);
+	p0 = &per_cpu(cpu_purr_data, cpu & ~1);
+	phim = &per_cpu(cpu_purr_data, cpu ^ 1);
+	spin_lock_irqsave(&p0->lock, flags);
+	pme->tb = pme->tb0 = mftb();
+	purr = mfspr(SPRN_PURR);
+	if (!phim->initialized) {
+		pme->purr = 0;
+		pme->purr0 = purr;
+	} else {
+		/* set p->purr and p->purr0 for no change in p0->stolen */
+		pme->purr = phim->tb - phim->tb0 - phim->purr - p0->stolen;
+		pme->purr0 = purr - pme->purr;
+	}
+	pme->initialized = 1;
+	spin_unlock_irqrestore(&p0->lock, flags);
+}
+
+#endif /* CONFIG_PPC_SPLPAR */
+
+#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
+#define calc_cputime_factors()
+#define account_process_time(regs)	update_process_times(user_mode(regs))
+#define calculate_steal_time()		do { } while (0)
+#endif
+
+#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR))
+#define snapshot_purr()			do { } while (0)
+#endif
+
+/*
+ * Called when a cpu comes up after the system has finished booting,
+ * i.e. as a result of a hotplug cpu action.
+ */
+void snapshot_timebase(void)
+{
+	__get_cpu_var(last_jiffy) = get_tb();
+	snapshot_purr();
+}
+
 void __delay(unsigned long loops)
 {
 	unsigned long start;
@@ -392,6 +612,7 @@ static void iSeries_tb_recal(void)
 						new_tb_ticks_per_jiffy, sign, tick_diff );
 				tb_ticks_per_jiffy = new_tb_ticks_per_jiffy;
 				tb_ticks_per_sec   = new_tb_ticks_per_sec;
+				calc_cputime_factors();
 				div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres );
 				do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
 				tb_to_xs = divres.result_low;
@@ -440,6 +661,7 @@ void timer_interrupt(struct pt_regs * regs)
 	irq_enter();
 
 	profile_tick(CPU_PROFILING, regs);
+	calculate_steal_time();
 
 #ifdef CONFIG_PPC_ISERIES
 	get_lppaca()->int_dword.fields.decr_int = 0;
@@ -461,7 +683,7 @@ void timer_interrupt(struct pt_regs * regs)
 		 * is the case.
 		 */
 		if (!cpu_is_offline(cpu))
-			update_process_times(user_mode(regs));
+			account_process_time(regs);
 
 		/*
 		 * No need to check whether cpu is offline here; boot_cpuid
@@ -518,13 +740,27 @@ void wakeup_decrementer(void)
 void __init smp_space_timers(unsigned int max_cpus)
 {
 	int i;
+	unsigned long half = tb_ticks_per_jiffy / 2;
 	unsigned long offset = tb_ticks_per_jiffy / max_cpus;
 	unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid);
 
 	/* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */
 	previous_tb -= tb_ticks_per_jiffy;
+	/*
+	 * The stolen time calculation for POWER5 shared-processor LPAR
+	 * systems works better if the two threads' timebase interrupts
+	 * are staggered by half a jiffy with respect to each other.
+	 */
 	for_each_cpu(i) {
-		if (i != boot_cpuid) {
+		if (i == boot_cpuid)
+			continue;
+		if (i == (boot_cpuid ^ 1))
+			per_cpu(last_jiffy, i) =
+				per_cpu(last_jiffy, boot_cpuid) - half;
+		else if (i & 1)
+			per_cpu(last_jiffy, i) =
+				per_cpu(last_jiffy, i ^ 1) + half;
+		else {
 			previous_tb += offset;
 			per_cpu(last_jiffy, i) = previous_tb;
 		}
@@ -720,6 +956,7 @@ void __init time_init(void)
 	tb_ticks_per_sec = ppc_tb_freq;
 	tb_ticks_per_usec = ppc_tb_freq / 1000000;
 	tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
+	calc_cputime_factors();
 
 	/*
 	 * Calculate the length of each tick in ns.  It will not be

+ 0 - 2
arch/powerpc/kernel/vdso.c

@@ -1,6 +1,4 @@
 /*
- *  linux/arch/ppc64/kernel/vdso.c
- *
  *    Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
  *			 <benh@kernel.crashing.org>
  *

+ 0 - 2
arch/powerpc/lib/copypage_64.S

@@ -1,6 +1,4 @@
 /*
- * arch/ppc64/lib/copypage.S
- *
  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  *
  * This program is free software; you can redistribute it and/or

+ 0 - 2
arch/powerpc/lib/copyuser_64.S

@@ -1,6 +1,4 @@
 /*
- * arch/ppc64/lib/copyuser.S
- *
  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  *
  * This program is free software; you can redistribute it and/or

+ 11 - 3
arch/powerpc/lib/e2a.c

@@ -1,9 +1,7 @@
 /*
- *  arch/ppc64/lib/e2a.c
- *
  *  EBCDIC to ASCII conversion
  *
- * This function moved here from arch/ppc64/kernel/viopath.c
+ * This function moved here from arch/powerpc/platforms/iseries/viopath.c 
  *
  * (C) Copyright 2000-2004 IBM Corporation
  *
@@ -105,4 +103,14 @@ unsigned char e2a(unsigned char x)
 }
 EXPORT_SYMBOL(e2a);
 
+unsigned char* strne2a(unsigned char *dest, const unsigned char *src, size_t n)
+{
+	int i;
+
+	n = strnlen(src, n);
 
+	for (i = 0; i < n; i++)
+		dest[i] = e2a(src[i]);
+
+	return dest;
+}

+ 0 - 2
arch/powerpc/lib/memcpy_64.S

@@ -1,6 +1,4 @@
 /*
- * arch/ppc64/lib/memcpy.S
- *
  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  *
  * This program is free software; you can redistribute it and/or

+ 0 - 2
arch/powerpc/lib/rheap.c

@@ -1,6 +1,4 @@
 /*
- * arch/ppc/syslib/rheap.c
- *
  * A Remote Heap.  Remote means that we don't touch the memory that the
  * heap points to. Normal heap implementations use the memory they manage
  * to place their list. We cannot do that because the memory we manage may

+ 0 - 2
arch/powerpc/mm/fault.c

@@ -1,6 +1,4 @@
 /*
- *  arch/ppc/mm/fault.c
- *
  *  PowerPC version
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *

+ 0 - 2
arch/powerpc/mm/hash_low_32.S

@@ -1,6 +1,4 @@
 /*
- *  arch/ppc/kernel/hashtable.S
- *
  *  $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $
  *
  *  PowerPC version

+ 16 - 16
arch/powerpc/mm/hash_utils_64.c

@@ -169,7 +169,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 #ifdef CONFIG_PPC_ISERIES
 		if (_machine == PLATFORM_ISERIES_LPAR)
 			ret = iSeries_hpte_insert(hpteg, va,
-						  __pa(vaddr),
+						  paddr,
 						  tmp_mode,
 						  HPTE_V_BOLTED,
 						  psize);
@@ -178,7 +178,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 #ifdef CONFIG_PPC_PSERIES
 		if (_machine & PLATFORM_LPAR)
 			ret = pSeries_lpar_hpte_insert(hpteg, va,
-						       virt_to_abs(paddr),
+						       paddr,
 						       tmp_mode,
 						       HPTE_V_BOLTED,
 						       psize);
@@ -186,7 +186,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 #endif
 #ifdef CONFIG_PPC_MULTIPLATFORM
 			ret = native_hpte_insert(hpteg, va,
-						 virt_to_abs(paddr),
+						 paddr,
 						 tmp_mode, HPTE_V_BOLTED,
 						 psize);
 #endif
@@ -392,7 +392,7 @@ static unsigned long __init htab_get_table_size(void)
 #ifdef CONFIG_MEMORY_HOTPLUG
 void create_section_mapping(unsigned long start, unsigned long end)
 {
-		BUG_ON(htab_bolt_mapping(start, end, start,
+		BUG_ON(htab_bolt_mapping(start, end, __pa(start),
 			_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX,
 			mmu_linear_psize));
 }
@@ -422,7 +422,7 @@ void __init htab_initialize(void)
 
 	htab_hash_mask = pteg_count - 1;
 
-	if (platform_is_lpar()) {
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
 		/* Using a hypervisor which owns the htab */
 		htab_address = NULL;
 		_SDR1 = 0; 
@@ -431,7 +431,6 @@ void __init htab_initialize(void)
 		 * the absolute address space.
 		 */
 		table = lmb_alloc(htab_size_bytes, htab_size_bytes);
-		BUG_ON(table == 0);
 
 		DBG("Hash table allocated at %lx, size: %lx\n", table,
 		    htab_size_bytes);
@@ -474,21 +473,22 @@ void __init htab_initialize(void)
 
 		if (dart_tablebase != 0 && dart_tablebase >= base
 		    && dart_tablebase < (base + size)) {
+			unsigned long dart_table_end = dart_tablebase + 16 * MB;
 			if (base != dart_tablebase)
 				BUG_ON(htab_bolt_mapping(base, dart_tablebase,
-							 base, mode_rw,
-							 mmu_linear_psize));
-			if ((base + size) > (dart_tablebase + 16*MB))
+							__pa(base), mode_rw,
+							mmu_linear_psize));
+			if ((base + size) > dart_table_end)
 				BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
-							 base + size,
-							 dart_tablebase+16*MB,
+							base + size,
+							__pa(dart_table_end),
 							 mode_rw,
 							 mmu_linear_psize));
 			continue;
 		}
 #endif /* CONFIG_U3_DART */
-		BUG_ON(htab_bolt_mapping(base, base + size, base,
-					 mode_rw, mmu_linear_psize));
+		BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
+					mode_rw, mmu_linear_psize));
        }
 
 	/*
@@ -505,8 +505,8 @@ void __init htab_initialize(void)
 		if (base + size >= tce_alloc_start)
 			tce_alloc_start = base + size + 1;
 
- 		BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
- 					 tce_alloc_start, mode_rw,
+		BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
+					 __pa(tce_alloc_start), mode_rw,
 					 mmu_linear_psize));
 	}
 
@@ -517,7 +517,7 @@ void __init htab_initialize(void)
 
 void htab_initialize_secondary(void)
 {
-	if (!platform_is_lpar())
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		mtspr(SPRN_SDR1, _SDR1);
 }
 

+ 0 - 48
arch/powerpc/mm/init_64.c

@@ -84,54 +84,6 @@
 /* max amount of RAM to use */
 unsigned long __max_memory;
 
-/* info on what we think the IO hole is */
-unsigned long 	io_hole_start;
-unsigned long	io_hole_size;
-
-/*
- * Do very early mm setup.
- */
-void __init mm_init_ppc64(void)
-{
-#ifndef CONFIG_PPC_ISERIES
-	unsigned long i;
-#endif
-
-	ppc64_boot_msg(0x100, "MM Init");
-
-	/* This is the story of the IO hole... please, keep seated,
-	 * unfortunately, we are out of oxygen masks at the moment.
-	 * So we need some rough way to tell where your big IO hole
-	 * is. On pmac, it's between 2G and 4G, on POWER3, it's around
-	 * that area as well, on POWER4 we don't have one, etc...
-	 * We need that as a "hint" when sizing the TCE table on POWER3
-	 * So far, the simplest way that seem work well enough for us it
-	 * to just assume that the first discontinuity in our physical
-	 * RAM layout is the IO hole. That may not be correct in the future
-	 * (and isn't on iSeries but then we don't care ;)
-	 */
-
-#ifndef CONFIG_PPC_ISERIES
-	for (i = 1; i < lmb.memory.cnt; i++) {
-		unsigned long base, prevbase, prevsize;
-
-		prevbase = lmb.memory.region[i-1].base;
-		prevsize = lmb.memory.region[i-1].size;
-		base = lmb.memory.region[i].base;
-		if (base > (prevbase + prevsize)) {
-			io_hole_start = prevbase + prevsize;
-			io_hole_size = base  - (prevbase + prevsize);
-			break;
-		}
-	}
-#endif /* CONFIG_PPC_ISERIES */
-	if (io_hole_start)
-		printk("IO Hole assumed to be %lx -> %lx\n",
-		       io_hole_start, io_hole_start + io_hole_size - 1);
-
-	ppc64_boot_msg(0x100, "MM Init Done");
-}
-
 void free_initmem(void)
 {
 	unsigned long addr;

+ 16 - 0
arch/powerpc/mm/lmb.c

@@ -31,6 +31,8 @@
 #define DBG(fmt...)
 #endif
 
+#define LMB_ALLOC_ANYWHERE	0
+
 struct lmb lmb;
 
 void lmb_dump_all(void)
@@ -225,6 +227,20 @@ unsigned long __init lmb_alloc(unsigned long size, unsigned long align)
 
 unsigned long __init lmb_alloc_base(unsigned long size, unsigned long align,
 				    unsigned long max_addr)
+{
+	unsigned long alloc;
+
+	alloc = __lmb_alloc_base(size, align, max_addr);
+
+	if (alloc == 0)
+		panic("ERROR: Failed to allocate 0x%lx bytes below 0x%lx.\n",
+				size, max_addr);
+
+	return alloc;
+}
+
+unsigned long __init __lmb_alloc_base(unsigned long size, unsigned long align,
+				    unsigned long max_addr)
 {
 	long i, j;
 	unsigned long base = 0;

+ 1 - 2
arch/powerpc/mm/mem.c

@@ -125,7 +125,7 @@ int __devinit add_memory(u64 start, u64 size)
 	nid = hot_add_scn_to_nid(start);
 	pgdata = NODE_DATA(nid);
 
-	start = __va(start);
+	start = (unsigned long)__va(start);
 	create_section_mapping(start, start + size);
 
 	/* this should work for most non-highmem platforms */
@@ -249,7 +249,6 @@ void __init do_init_bootmem(void)
 	bootmap_pages = bootmem_bootmap_pages(total_pages);
 
 	start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
-	BUG_ON(!start);
 
 	boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
 

+ 0 - 2
arch/powerpc/mm/mmap.c

@@ -1,6 +1,4 @@
 /*
- *  linux/arch/ppc64/mm/mmap.c
- *
  *  flexible mmap layout support
  *
  * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.

+ 74 - 86
arch/powerpc/mm/numa.c

@@ -129,10 +129,12 @@ void __init get_region(unsigned int nid, unsigned long *start_pfn,
 		*start_pfn = 0;
 }
 
-static inline void map_cpu_to_node(int cpu, int node)
+static void __cpuinit map_cpu_to_node(int cpu, int node)
 {
 	numa_cpu_lookup_table[cpu] = node;
 
+	dbg("adding cpu %d to node %d\n", cpu, node);
+
 	if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node])))
 		cpu_set(cpu, numa_cpumask_lookup_table[node]);
 }
@@ -153,7 +155,7 @@ static void unmap_cpu_from_node(unsigned long cpu)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static struct device_node *find_cpu_node(unsigned int cpu)
+static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
 {
 	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
 	struct device_node *cpu_node = NULL;
@@ -189,23 +191,29 @@ static int *of_get_associativity(struct device_node *dev)
 	return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
 }
 
-static int of_node_numa_domain(struct device_node *device)
+/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
+ * info is found.
+ */
+static int of_node_to_nid(struct device_node *device)
 {
-	int numa_domain;
+	int nid = -1;
 	unsigned int *tmp;
 
 	if (min_common_depth == -1)
-		return 0;
+		goto out;
 
 	tmp = of_get_associativity(device);
-	if (tmp && (tmp[0] >= min_common_depth)) {
-		numa_domain = tmp[min_common_depth];
-	} else {
-		dbg("WARNING: no NUMA information for %s\n",
-		    device->full_name);
-		numa_domain = 0;
-	}
-	return numa_domain;
+	if (!tmp)
+		goto out;
+
+	if (tmp[0] >= min_common_depth)
+		nid = tmp[min_common_depth];
+
+	/* POWER4 LPAR uses 0xffff as invalid node */
+	if (nid == 0xffff || nid >= MAX_NUMNODES)
+		nid = -1;
+out:
+	return nid;
 }
 
 /*
@@ -246,8 +254,7 @@ static int __init find_min_common_depth(void)
 	if ((len >= 1) && ref_points) {
 		depth = ref_points[1];
 	} else {
-		dbg("WARNING: could not find NUMA "
-		    "associativity reference point\n");
+		dbg("NUMA: ibm,associativity-reference-points not found.\n");
 		depth = -1;
 	}
 	of_node_put(rtas_root);
@@ -283,9 +290,9 @@ static unsigned long __devinit read_n_cells(int n, unsigned int **buf)
  * Figure out to which domain a cpu belongs and stick it there.
  * Return the id of the domain used.
  */
-static int numa_setup_cpu(unsigned long lcpu)
+static int __cpuinit numa_setup_cpu(unsigned long lcpu)
 {
-	int numa_domain = 0;
+	int nid = 0;
 	struct device_node *cpu = find_cpu_node(lcpu);
 
 	if (!cpu) {
@@ -293,27 +300,16 @@ static int numa_setup_cpu(unsigned long lcpu)
 		goto out;
 	}
 
-	numa_domain = of_node_numa_domain(cpu);
+	nid = of_node_to_nid(cpu);
 
-	if (numa_domain >= num_online_nodes()) {
-		/*
-		 * POWER4 LPAR uses 0xffff as invalid node,
-		 * dont warn in this case.
-		 */
-		if (numa_domain != 0xffff)
-			printk(KERN_ERR "WARNING: cpu %ld "
-			       "maps to invalid NUMA node %d\n",
-			       lcpu, numa_domain);
-		numa_domain = 0;
-	}
+	if (nid < 0 || !node_online(nid))
+		nid = any_online_node(NODE_MASK_ALL);
 out:
-	node_set_online(numa_domain);
-
-	map_cpu_to_node(lcpu, numa_domain);
+	map_cpu_to_node(lcpu, nid);
 
 	of_node_put(cpu);
 
-	return numa_domain;
+	return nid;
 }
 
 static int cpu_numa_callback(struct notifier_block *nfb,
@@ -325,10 +321,7 @@ static int cpu_numa_callback(struct notifier_block *nfb,
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-		if (min_common_depth == -1 || !numa_enabled)
-			map_cpu_to_node(lcpu, 0);
-		else
-			numa_setup_cpu(lcpu);
+		numa_setup_cpu(lcpu);
 		ret = NOTIFY_OK;
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
@@ -375,7 +368,7 @@ static int __init parse_numa_properties(void)
 {
 	struct device_node *cpu = NULL;
 	struct device_node *memory = NULL;
-	int max_domain;
+	int default_nid = 0;
 	unsigned long i;
 
 	if (numa_enabled == 0) {
@@ -385,32 +378,32 @@ static int __init parse_numa_properties(void)
 
 	min_common_depth = find_min_common_depth();
 
-	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
 	if (min_common_depth < 0)
 		return min_common_depth;
 
-	max_domain = numa_setup_cpu(boot_cpuid);
+	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
 
 	/*
-	 * Even though we connect cpus to numa domains later in SMP init,
-	 * we need to know the maximum node id now. This is because each
-	 * node id must have NODE_DATA etc backing it.
-	 * As a result of hotplug we could still have cpus appear later on
-	 * with larger node ids. In that case we force the cpu into node 0.
+	 * Even though we connect cpus to numa domains later in SMP
+	 * init, we need to know the node ids now. This is because
+	 * each node to be onlined must have NODE_DATA etc backing it.
 	 */
-	for_each_cpu(i) {
-		int numa_domain;
+	for_each_present_cpu(i) {
+		int nid;
 
 		cpu = find_cpu_node(i);
+		BUG_ON(!cpu);
+		nid = of_node_to_nid(cpu);
+		of_node_put(cpu);
 
-		if (cpu) {
-			numa_domain = of_node_numa_domain(cpu);
-			of_node_put(cpu);
-
-			if (numa_domain < MAX_NUMNODES &&
-			    max_domain < numa_domain)
-				max_domain = numa_domain;
-		}
+		/*
+		 * Don't fall back to default_nid yet -- we will plug
+		 * cpus into nodes once the memory scan has discovered
+		 * the topology.
+		 */
+		if (nid < 0)
+			continue;
+		node_set_online(nid);
 	}
 
 	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
@@ -418,7 +411,7 @@ static int __init parse_numa_properties(void)
 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
 		unsigned long start;
 		unsigned long size;
-		int numa_domain;
+		int nid;
 		int ranges;
 		unsigned int *memcell_buf;
 		unsigned int len;
@@ -439,18 +432,15 @@ new_range:
 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
 
-		numa_domain = of_node_numa_domain(memory);
-
-		if (numa_domain >= MAX_NUMNODES) {
-			if (numa_domain != 0xffff)
-				printk(KERN_ERR "WARNING: memory at %lx maps "
-				       "to invalid NUMA node %d\n", start,
-				       numa_domain);
-			numa_domain = 0;
-		}
-
-		if (max_domain < numa_domain)
-			max_domain = numa_domain;
+		/*
+		 * Assumption: either all memory nodes or none will
+		 * have associativity properties.  If none, then
+		 * everything goes to default_nid.
+		 */
+		nid = of_node_to_nid(memory);
+		if (nid < 0)
+			nid = default_nid;
+		node_set_online(nid);
 
 		if (!(size = numa_enforce_memory_limit(start, size))) {
 			if (--ranges)
@@ -459,16 +449,13 @@ new_range:
 				continue;
 		}
 
-		add_region(numa_domain, start >> PAGE_SHIFT,
+		add_region(nid, start >> PAGE_SHIFT,
 			   size >> PAGE_SHIFT);
 
 		if (--ranges)
 			goto new_range;
 	}
 
-	for (i = 0; i <= max_domain; i++)
-		node_set_online(i);
-
 	return 0;
 }
 
@@ -483,7 +470,6 @@ static void __init setup_nonnuma(void)
 	printk(KERN_INFO "Memory hole size: %ldMB\n",
 	       (top_of_ram - total_ram) >> 20);
 
-	map_cpu_to_node(boot_cpuid, 0);
 	for (i = 0; i < lmb.memory.cnt; ++i)
 		add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT,
 			   lmb_size_pages(&lmb.memory, i));
@@ -570,11 +556,11 @@ static void __init *careful_allocation(int nid, unsigned long size,
 				       unsigned long end_pfn)
 {
 	int new_nid;
-	unsigned long ret = lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
+	unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
 
 	/* retry over all memory */
 	if (!ret)
-		ret = lmb_alloc_base(size, align, lmb_end_of_DRAM());
+		ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
 
 	if (!ret)
 		panic("numa.c: cannot allocate %lu bytes on node %d",
@@ -620,6 +606,8 @@ void __init do_init_bootmem(void)
 		dump_numa_memory_topology();
 
 	register_cpu_notifier(&ppc64_numa_nb);
+	cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
+			  (void *)(unsigned long)boot_cpuid);
 
 	for_each_online_node(nid) {
 		unsigned long start_pfn, end_pfn, pages_present;
@@ -767,10 +755,10 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 {
 	struct device_node *memory = NULL;
 	nodemask_t nodes;
-	int numa_domain = 0;
+	int default_nid = any_online_node(NODE_MASK_ALL);
 
 	if (!numa_enabled || (min_common_depth < 0))
-		return numa_domain;
+		return default_nid;
 
 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
 		unsigned long start, size;
@@ -787,15 +775,15 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 ha_new_range:
 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
-		numa_domain = of_node_numa_domain(memory);
+		nid = of_node_to_nid(memory);
 
 		/* Domains not present at boot default to 0 */
-		if (!node_online(numa_domain))
-			numa_domain = any_online_node(NODE_MASK_ALL);
+		if (nid < 0 || !node_online(nid))
+			nid = default_nid;
 
 		if ((scn_addr >= start) && (scn_addr < (start + size))) {
 			of_node_put(memory);
-			goto got_numa_domain;
+			goto got_nid;
 		}
 
 		if (--ranges)		/* process all ranges in cell */
@@ -804,12 +792,12 @@ ha_new_range:
 	BUG();	/* section address should be found above */
 
 	/* Temporary code to ensure that returned node is not empty */
-got_numa_domain:
+got_nid:
 	nodes_setall(nodes);
-	while (NODE_DATA(numa_domain)->node_spanned_pages == 0) {
-		node_clear(numa_domain, nodes);
-		numa_domain = any_online_node(nodes);
+	while (NODE_DATA(nid)->node_spanned_pages == 0) {
+		node_clear(nid, nodes);
+		nid = any_online_node(nodes);
 	}
-	return numa_domain;
+	return nid;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */

+ 0 - 2
arch/powerpc/mm/slb_low.S

@@ -1,6 +1,4 @@
 /*
- * arch/ppc64/mm/slb_low.S
- *
  * Low-level SLB routines
  *
  * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM

+ 0 - 4
arch/powerpc/mm/stab.c

@@ -247,10 +247,6 @@ void stabs_alloc(void)
 
 		newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
 					 1<<SID_SHIFT);
-		if (! newstab)
-			panic("Unable to allocate segment table for CPU %d.\n",
-			      cpu);
-
 		newstab = (unsigned long)__va(newstab);
 
 		memset((void *)newstab, 0, HW_PAGE_SIZE);

+ 1 - 1
arch/powerpc/mm/tlb_64.c

@@ -36,7 +36,7 @@
 DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
 
 /* This is declared as we are using the more or less generic
- * include/asm-ppc64/tlb.h file -- tgall
+ * include/asm-powerpc/tlb.h file -- tgall
  */
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);

この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません