ソースを参照

Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc

* 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (36 commits)
  powerpc/gc/wii: Remove get_irq_desc()
  powerpc/gc/wii: hlwd-pic: convert irq_desc.lock to raw_spinlock
  powerpc/gamecube/wii: Fix off-by-one error in ugecon/usbgecko_udbg
  powerpc/mpic: Fix problem that affinity is not updated
  powerpc/mm: Fix stupid bug in subpge protection handling
  powerpc/iseries: use DECLARE_COMPLETION_ONSTACK for non-constant completion
  powerpc: Fix MSI support on U4 bridge PCIe slot
  powerpc: Handle VSX alignment faults correctly in little-endian mode
  powerpc/mm: Fix typo of cpumask_clear_cpu()
  powerpc/mm: Fix hash_utils_64.c compile errors with DEBUG enabled.
  powerpc: Convert BUG() to use unreachable()
  powerpc/pseries: Make declarations of cpu_hotplug_driver_lock() ANSI compatible.
  powerpc/pseries: Don't panic when H_PROD fails during cpu-online.
  powerpc/mm: Fix a WARN_ON() with CONFIG_DEBUG_PAGEALLOC and CONFIG_DEBUG_VM
  powerpc/defconfigs: Set HZ=100 on pseries and ppc64 defconfigs
  powerpc/defconfigs: Disable token ring in powerpc defconfigs
  powerpc/defconfigs: Reduce 64bit vmlinux by making acenic and cramfs modules
  powerpc/pseries: Select XICS and PCI_MSI PSERIES
  powerpc/85xx: Wrong variable returned on error
  powerpc/iseries: Convert to proc_fops
  ...
Linus Torvalds 15 年 前
コミット
dd508ae2db
37 ファイル変更876 行追加178 行削除
  1. 42 0
      Documentation/powerpc/dts-bindings/fsl/mpic.txt
  2. 51 1
      arch/powerpc/boot/dts/katmai.dts
  3. 27 0
      arch/powerpc/boot/dts/mpc8315erdb.dts
  4. 81 1
      arch/powerpc/boot/dts/mpc8349emitx.dts
  5. 1 1
      arch/powerpc/boot/dts/warp.dts
  6. 1 1
      arch/powerpc/boot/ugecon.c
  7. 3 3
      arch/powerpc/configs/g5_defconfig
  8. 2 2
      arch/powerpc/configs/iseries_defconfig
  9. 7 7
      arch/powerpc/configs/ppc64_defconfig
  10. 2 2
      arch/powerpc/configs/ppc64e_defconfig
  11. 7 7
      arch/powerpc/configs/pseries_defconfig
  12. 1 1
      arch/powerpc/include/asm/bug.h
  13. 1 4
      arch/powerpc/include/asm/gpio.h
  14. 46 17
      arch/powerpc/kernel/align.c
  15. 6 6
      arch/powerpc/mm/hash_utils_64.c
  16. 1 1
      arch/powerpc/mm/mmu_context_nohash.c
  17. 1 1
      arch/powerpc/mm/pgtable_32.c
  18. 50 2
      arch/powerpc/platforms/83xx/suspend.c
  19. 1 1
      arch/powerpc/platforms/85xx/mpc85xx_mds.c
  20. 1 1
      arch/powerpc/platforms/embedded6xx/flipper-pic.c
  21. 5 5
      arch/powerpc/platforms/embedded6xx/hlwd-pic.c
  22. 1 1
      arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
  23. 83 64
      arch/powerpc/platforms/iseries/mf.c
  24. 1 1
      arch/powerpc/platforms/iseries/viopath.c
  25. 2 0
      arch/powerpc/platforms/pseries/Kconfig
  26. 248 6
      arch/powerpc/platforms/pseries/cmm.c
  27. 4 2
      arch/powerpc/platforms/pseries/dlpar.c
  28. 2 2
      arch/powerpc/platforms/pseries/smp.c
  29. 21 7
      arch/powerpc/sysdev/cpm2_pic.c
  30. 6 2
      arch/powerpc/sysdev/fsl_pci.c
  31. 20 1
      arch/powerpc/sysdev/mpc8xxx_gpio.c
  32. 6 13
      arch/powerpc/sysdev/mpic.c
  33. 10 1
      arch/powerpc/sysdev/mpic_msi.c
  34. 39 7
      arch/powerpc/sysdev/mpic_u3msi.c
  35. 19 0
      drivers/base/memory.c
  36. 27 0
      include/linux/memory.h
  37. 50 7
      mm/page_alloc.c

+ 42 - 0
Documentation/powerpc/dts-bindings/fsl/mpic.txt

@@ -0,0 +1,42 @@
+* OpenPIC and its interrupt numbers on Freescale's e500/e600 cores
+
+The OpenPIC specification does not specify which interrupt source has to
+become which interrupt number. This is up to the software implementation
+of the interrupt controller. The only requirement is that every
+interrupt source has to have an unique interrupt number / vector number.
+To accomplish this the current implementation assigns the number zero to
+the first source, the number one to the second source and so on until
+all interrupt sources have their unique number.
+Usually the assigned vector number equals the interrupt number mentioned
+in the documentation for a given core / CPU. This is however not true
+for the e500 cores (MPC85XX CPUs) where the documentation distinguishes
+between internal and external interrupt sources and starts counting at
+zero for both of them.
+
+So what to write for external interrupt source X or internal interrupt
+source Y into the device tree? Here is an example:
+
+The memory map for the interrupt controller in the MPC8544[0] shows,
+that the first interrupt source starts at 0x5_0000 (PIC Register Address
+Map-Interrupt Source Configuration Registers). This source becomes the
+number zero therefore:
+ External interrupt 0 = interrupt number 0
+ External interrupt 1 = interrupt number 1
+ External interrupt 2 = interrupt number 2
+ ...
+Every interrupt number allocates 0x20 bytes register space. So to get
+its number it is sufficient to shift the lower 16bits to right by five.
+So for the external interrupt 10 we have:
+  0x0140 >> 5 = 10
+
+After the external sources, the internal sources follow. The in core I2C
+controller on the MPC8544 for instance has the internal source number
+27. Oo obtain its interrupt number we take the lower 16bits of its memory
+address (0x5_0560) and shift it right:
+ 0x0560 >> 5 = 43
+
+Therefore the I2C device node for the MPC8544 CPU has to have the
+interrupt number 43 specified in the device tree.
+
+[0] MPC8544E PowerQUICCTM III, Integrated Host Processor Family Reference Manual
+    MPC8544ERM Rev. 1 10/2007

+ 51 - 1
arch/powerpc/boot/dts/katmai.dts

@@ -108,12 +108,19 @@
 		dcr-reg = <0x00c 0x002>;
 	};
 
+	MQ0: mq {
+		compatible = "ibm,mq-440spe";
+		dcr-reg = <0x040 0x020>;
+	};
+
 	plb {
 		compatible = "ibm,plb-440spe", "ibm,plb-440gp", "ibm,plb4";
 		#address-cells = <2>;
 		#size-cells = <1>;
 		/*        addr-child     addr-parent    size */
-		ranges = <0x4 0xe0000000 0x4 0xe0000000 0x20000000
+		ranges = <0x4 0x00100000 0x4 0x00100000 0x00001000
+			  0x4 0x00200000 0x4 0x00200000 0x00000400
+			  0x4 0xe0000000 0x4 0xe0000000 0x20000000
 			  0xc 0x00000000 0xc 0x00000000 0x20000000
 			  0xd 0x00000000 0xd 0x00000000 0x80000000
 			  0xd 0x80000000 0xd 0x80000000 0x80000000
@@ -400,6 +407,49 @@
 				0x0 0x0 0x0 0x3 &UIC3 0xa 0x4 /* swizzled int C */
 				0x0 0x0 0x0 0x4 &UIC3 0xb 0x4 /* swizzled int D */>;
 		};
+
+		I2O: i2o@400100000 {
+			compatible = "ibm,i2o-440spe";
+			reg = <0x00000004 0x00100000 0x100>;
+			dcr-reg = <0x060 0x020>;
+		};
+
+		DMA0: dma0@400100100 {
+			compatible = "ibm,dma-440spe";
+			cell-index = <0>;
+			reg = <0x00000004 0x00100100 0x100>;
+			dcr-reg = <0x060 0x020>;
+			interrupt-parent = <&DMA0>;
+			interrupts = <0 1>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = <
+				0 &UIC0 0x14 4
+				1 &UIC1 0x16 4>;
+		};
+
+		DMA1: dma1@400100200 {
+			compatible = "ibm,dma-440spe";
+			cell-index = <1>;
+			reg = <0x00000004 0x00100200 0x100>;
+			dcr-reg = <0x060 0x020>;
+			interrupt-parent = <&DMA1>;
+			interrupts = <0 1>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = <
+				0 &UIC0 0x16 4
+				1 &UIC1 0x16 4>;
+		};
+
+		xor-accel@400200000 {
+			compatible = "amcc,xor-accelerator";
+			reg = <0x00000004 0x00200000 0x400>;
+			interrupt-parent = <&UIC1>;
+			interrupts = <0x1f 4>;
+		};
 	};
 
 	chosen {

+ 27 - 0
arch/powerpc/boot/dts/mpc8315erdb.dts

@@ -204,6 +204,7 @@
 			interrupt-parent = <&ipic>;
 			tbi-handle = <&tbi0>;
 			phy-handle = < &phy0 >;
+			fsl,magic-packet;
 
 			mdio@520 {
 				#address-cells = <1>;
@@ -246,6 +247,7 @@
 			interrupt-parent = <&ipic>;
 			tbi-handle = <&tbi1>;
 			phy-handle = < &phy1 >;
+			fsl,magic-packet;
 
 			mdio@520 {
 				#address-cells = <1>;
@@ -309,6 +311,22 @@
 			interrupt-parent = <&ipic>;
 		};
 
+		gtm1: timer@500 {
+			compatible = "fsl,mpc8315-gtm", "fsl,gtm";
+			reg = <0x500 0x100>;
+			interrupts = <90 8 78 8 84 8 72 8>;
+			interrupt-parent = <&ipic>;
+			clock-frequency = <133333333>;
+		};
+
+		timer@600 {
+			compatible = "fsl,mpc8315-gtm", "fsl,gtm";
+			reg = <0x600 0x100>;
+			interrupts = <91 8 79 8 85 8 73 8>;
+			interrupt-parent = <&ipic>;
+			clock-frequency = <133333333>;
+		};
+
 		/* IPIC
 		 * interrupts cell = <intr #, sense>
 		 * sense values match linux IORESOURCE_IRQ_* defines:
@@ -337,6 +355,15 @@
 				      0x59 0x8>;
 			interrupt-parent = < &ipic >;
 		};
+
+		pmc: power@b00 {
+			compatible = "fsl,mpc8315-pmc", "fsl,mpc8313-pmc",
+				     "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 8>;
+			interrupt-parent = <&ipic>;
+			fsl,mpc8313-wakeup-timer = <&gtm1>;
+		};
 	};
 
 	pci0: pci@e0008500 {

+ 81 - 1
arch/powerpc/boot/dts/mpc8349emitx.dts

@@ -63,6 +63,24 @@
 			reg = <0x200 0x100>;
 		};
 
+		gpio1: gpio-controller@c00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8349-gpio";
+			reg = <0xc00 0x100>;
+			interrupts = <74 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		gpio2: gpio-controller@d00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8349-gpio";
+			reg = <0xd00 0x100>;
+			interrupts = <75 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
 		i2c@3000 {
 			#address-cells = <1>;
 			#size-cells = <0>;
@@ -72,6 +90,12 @@
 			interrupts = <14 0x8>;
 			interrupt-parent = <&ipic>;
 			dfsrr;
+
+			eeprom: at24@50 {
+				compatible = "st-micro,24c256";
+				reg = <0x50>;
+			};
+
 		};
 
 		i2c@3100 {
@@ -91,6 +115,25 @@
 				interrupt-parent = <&ipic>;
 			};
 
+			pcf1: iexp@38 {
+				#gpio-cells = <2>;
+				compatible = "ti,pcf8574a";
+				reg = <0x38>;
+				gpio-controller;
+			};
+
+			pcf2: iexp@39 {
+				#gpio-cells = <2>;
+				compatible = "ti,pcf8574a";
+				reg = <0x39>;
+				gpio-controller;
+			};
+
+			spd: at24@51 {
+				compatible = "at24,spd";
+				reg = <0x51>;
+			};
+
 			mcu_pio: mcu@a {
 				#gpio-cells = <2>;
 				compatible = "fsl,mc9s08qg8-mpc8349emitx",
@@ -275,6 +318,24 @@
 			reg = <0x700 0x100>;
 			device_type = "ipic";
 		};
+
+		gpio-leds {
+			compatible = "gpio-leds";
+
+			green {
+				label = "Green";
+				gpios = <&pcf1 0 1>;
+				linux,default-trigger = "heartbeat";
+			};
+
+			yellow {
+				label = "Yellow";
+				gpios = <&pcf1 1 1>;
+				/* linux,default-trigger = "heartbeat"; */
+				default-state = "on";
+			};
+		};
+
 	};
 
 	pci0: pci@e0008500 {
@@ -331,7 +392,26 @@
 		compatible = "fsl,mpc8349e-localbus",
 			     "fsl,pq2pro-localbus";
 		reg = <0xe0005000 0xd8>;
-		ranges = <0x3 0x0 0xf0000000 0x210>;
+		ranges = <0x0 0x0 0xfe000000 0x1000000	/* flash */
+			  0x1 0x0 0xf8000000 0x20000	/* VSC 7385 */
+			  0x2 0x0 0xf9000000 0x200000	/* exp slot */
+			  0x3 0x0 0xf0000000 0x210>;	/* CF slot */
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0x0      0x0 0x800000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		flash@0,800000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x800000 0x800000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
 
 		pata@3,0 {
 			compatible = "fsl,mpc8349emitx-pata", "ata-generic";

+ 1 - 1
arch/powerpc/boot/dts/warp.dts

@@ -146,7 +146,7 @@
 
 				fpga@2,4000 {
 					compatible = "pika,fpga-sd";
-					reg = <0x00000002 0x00004000 0x00000A00>;
+					reg = <0x00000002 0x00004000 0x00004000>;
 				};
 
 				nor@0,0 {

+ 1 - 1
arch/powerpc/boot/ugecon.c

@@ -86,7 +86,7 @@ static void ug_putc(char ch)
 
 	while (!ug_is_txfifo_ready() && count--)
 		barrier();
-	if (count)
+	if (count >= 0)
 		ug_raw_putc(ch);
 }
 

+ 3 - 3
arch/powerpc/configs/g5_defconfig

@@ -757,7 +757,7 @@ CONFIG_SUNGEM=y
 # CONFIG_B44 is not set
 # CONFIG_ATL2 is not set
 CONFIG_NETDEV_1000=y
-CONFIG_ACENIC=y
+CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
 # CONFIG_DL2K is not set
 CONFIG_E1000=y
@@ -794,8 +794,8 @@ CONFIG_NETDEV_10000=y
 # CONFIG_BNX2X is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
-CONFIG_TR=y
-CONFIG_IBMOL=y
+# CONFIG_TR is not set
+# CONFIG_IBMOL is not set
 # CONFIG_3C359 is not set
 # CONFIG_TMS380TR is not set
 

+ 2 - 2
arch/powerpc/configs/iseries_defconfig

@@ -714,8 +714,8 @@ CONFIG_NETDEV_10000=y
 # CONFIG_BNX2X is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
-CONFIG_TR=y
-CONFIG_IBMOL=y
+# CONFIG_TR is not set
+# CONFIG_IBMOL is not set
 # CONFIG_3C359 is not set
 # CONFIG_TMS380TR is not set
 

+ 7 - 7
arch/powerpc/configs/ppc64_defconfig

@@ -304,11 +304,11 @@ CONFIG_TICK_ONESHOT=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
 # CONFIG_HZ_300 is not set
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
 CONFIG_SCHED_HRTICK=y
 CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
@@ -980,7 +980,7 @@ CONFIG_E100=y
 # CONFIG_SC92031 is not set
 # CONFIG_ATL2 is not set
 CONFIG_NETDEV_1000=y
-CONFIG_ACENIC=y
+CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
 # CONFIG_DL2K is not set
 CONFIG_E1000=y
@@ -1023,8 +1023,8 @@ CONFIG_PASEMI_MAC=y
 # CONFIG_BNX2X is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
-CONFIG_TR=y
-CONFIG_IBMOL=y
+# CONFIG_TR is not set
+# CONFIG_IBMOL is not set
 # CONFIG_3C359 is not set
 # CONFIG_TMS380TR is not set
 
@@ -1863,7 +1863,7 @@ CONFIG_HFSPLUS_FS=m
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_CRAMFS=y
+CONFIG_CRAMFS=m
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_OMFS_FS is not set

+ 2 - 2
arch/powerpc/configs/ppc64e_defconfig

@@ -1008,8 +1008,8 @@ CONFIG_IXGB=m
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
 # CONFIG_BE2NET is not set
-CONFIG_TR=y
-CONFIG_IBMOL=y
+# CONFIG_TR is not set
+# CONFIG_IBMOL is not set
 # CONFIG_3C359 is not set
 # CONFIG_TMS380TR is not set
 CONFIG_WLAN=y

+ 7 - 7
arch/powerpc/configs/pseries_defconfig

@@ -230,11 +230,11 @@ CONFIG_TICK_ONESHOT=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
 # CONFIG_HZ_300 is not set
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
 CONFIG_SCHED_HRTICK=y
 CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
@@ -796,7 +796,7 @@ CONFIG_E100=y
 # CONFIG_NET_POCKET is not set
 # CONFIG_ATL2 is not set
 CONFIG_NETDEV_1000=y
-CONFIG_ACENIC=y
+CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
 # CONFIG_DL2K is not set
 CONFIG_E1000=y
@@ -834,8 +834,8 @@ CONFIG_S2IO=m
 # CONFIG_BNX2X is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
-CONFIG_TR=y
-CONFIG_IBMOL=y
+# CONFIG_TR is not set
+# CONFIG_IBMOL is not set
 # CONFIG_3C359 is not set
 # CONFIG_TMS380TR is not set
 
@@ -1494,7 +1494,7 @@ CONFIG_CONFIGFS_FS=m
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_CRAMFS=y
+CONFIG_CRAMFS=m
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_OMFS_FS is not set

+ 1 - 1
arch/powerpc/include/asm/bug.h

@@ -68,7 +68,7 @@
 		_EMIT_BUG_ENTRY					\
 		: : "i" (__FILE__), "i" (__LINE__),		\
 		    "i" (0), "i"  (sizeof(struct bug_entry)));	\
-	for(;;) ;						\
+	unreachable();						\
 } while (0)
 
 #define BUG_ON(x) do {						\

+ 1 - 4
arch/powerpc/include/asm/gpio.h

@@ -38,12 +38,9 @@ static inline int gpio_cansleep(unsigned int gpio)
 	return __gpio_cansleep(gpio);
 }
 
-/*
- * Not implemented, yet.
- */
 static inline int gpio_to_irq(unsigned int gpio)
 {
-	return -ENOSYS;
+	return __gpio_to_irq(gpio);
 }
 
 static inline int irq_to_gpio(unsigned int irq)

+ 46 - 17
arch/powerpc/kernel/align.c

@@ -642,10 +642,14 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
  */
 static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
 		       unsigned int areg, struct pt_regs *regs,
-		       unsigned int flags, unsigned int length)
+		       unsigned int flags, unsigned int length,
+		       unsigned int elsize)
 {
 	char *ptr;
+	unsigned long *lptr;
 	int ret = 0;
+	int sw = 0;
+	int i, j;
 
 	flush_vsx_to_thread(current);
 
@@ -654,19 +658,35 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
 	else
 		ptr = (char *) &current->thread.vr[reg - 32];
 
-	if (flags & ST)
-		ret = __copy_to_user(addr, ptr, length);
-        else {
-		if (flags & SPLT){
-			ret = __copy_from_user(ptr, addr, length);
-			ptr += length;
+	lptr = (unsigned long *) ptr;
+
+	if (flags & SW)
+		sw = elsize-1;
+
+	for (j = 0; j < length; j += elsize) {
+		for (i = 0; i < elsize; ++i) {
+			if (flags & ST)
+				ret |= __put_user(ptr[i^sw], addr + i);
+			else
+				ret |= __get_user(ptr[i^sw], addr + i);
 		}
-		ret |= __copy_from_user(ptr, addr, length);
+		ptr  += elsize;
+		addr += elsize;
 	}
-	if (flags & U)
-		regs->gpr[areg] = regs->dar;
-	if (ret)
+
+	if (!ret) {
+		if (flags & U)
+			regs->gpr[areg] = regs->dar;
+
+		/* Splat load copies the same data to top and bottom 8 bytes */
+		if (flags & SPLT)
+			lptr[1] = lptr[0];
+		/* For 8 byte loads, zero the top 8 bytes */
+		else if (!(flags & ST) && (8 == length))
+			lptr[1] = 0;
+	} else
 		return -EFAULT;
+
 	return 1;
 }
 #endif
@@ -767,16 +787,25 @@ int fix_alignment(struct pt_regs *regs)
 
 #ifdef CONFIG_VSX
 	if ((instruction & 0xfc00003e) == 0x7c000018) {
-		/* Additional register addressing bit (64 VSX vs 32 FPR/GPR */
+		unsigned int elsize;
+
+		/* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */
 		reg |= (instruction & 0x1) << 5;
 		/* Simple inline decoder instead of a table */
+		/* VSX has only 8 and 16 byte memory accesses */
+		nb = 8;
 		if (instruction & 0x200)
 			nb = 16;
-		else if (instruction & 0x080)
-			nb = 8;
-		else
-			nb = 4;
+
+		/* Vector stores in little-endian mode swap individual
+		   elements, so process them separately */
+		elsize = 4;
+		if (instruction & 0x80)
+			elsize = 8;
+
 		flags = 0;
+		if (regs->msr & MSR_LE)
+			flags |= SW;
 		if (instruction & 0x100)
 			flags |= ST;
 		if (instruction & 0x040)
@@ -787,7 +816,7 @@ int fix_alignment(struct pt_regs *regs)
 			nb = 8;
 		}
 		PPC_WARN_ALIGNMENT(vsx, regs);
-		return emulate_vsx(addr, reg, areg, regs, flags, nb);
+		return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize);
 	}
 #endif
 	/* A size of 0 indicates an instruction we don't support, with

+ 6 - 6
arch/powerpc/mm/hash_utils_64.c

@@ -340,7 +340,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 			else
 				def->tlbiel = 0;
 
-			DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, "
+			DBG(" %d: shift=%02x, sllp=%04lx, avpnm=%08lx, "
 			    "tlbiel=%d, penc=%d\n",
 			    idx, shift, def->sllp, def->avpnm, def->tlbiel,
 			    def->penc);
@@ -663,7 +663,7 @@ static void __init htab_initialize(void)
 		base = (unsigned long)__va(lmb.memory.region[i].base);
 		size = lmb.memory.region[i].size;
 
-		DBG("creating mapping for region: %lx..%lx (prot: %x)\n",
+		DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
 		    base, size, prot);
 
 #ifdef CONFIG_U3_DART
@@ -879,7 +879,7 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
  */
 int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 {
-	void *pgdir;
+	pgd_t *pgdir;
 	unsigned long vsid;
 	struct mm_struct *mm;
 	pte_t *ptep;
@@ -1025,7 +1025,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 	else
 #endif /* CONFIG_PPC_HAS_HASH_64K */
 	{
-		int spp = subpage_protection(pgdir, ea);
+		int spp = subpage_protection(mm, ea);
 		if (access & spp)
 			rc = -2;
 		else
@@ -1115,7 +1115,7 @@ void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize,
 {
 	unsigned long hash, index, shift, hidx, slot;
 
-	DBG_LOW("flush_hash_page(va=%016x)\n", va);
+	DBG_LOW("flush_hash_page(va=%016lx)\n", va);
 	pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
 		hash = hpt_hash(va, shift, ssize);
 		hidx = __rpte_to_hidx(pte, index);
@@ -1123,7 +1123,7 @@ void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize,
 			hash = ~hash;
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 		slot += hidx & _PTEIDX_GROUP_IX;
-		DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx);
+		DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
 		ppc_md.hpte_invalidate(slot, va, psize, ssize, local);
 	} pte_iterate_hashed_end();
 }

+ 1 - 1
arch/powerpc/mm/mmu_context_nohash.c

@@ -353,7 +353,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
 		read_lock(&tasklist_lock);
 		for_each_process(p) {
 			if (p->mm)
-				cpu_mask_clear_cpu(cpu, mm_cpumask(p->mm));
+				cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
 		}
 		read_unlock(&tasklist_lock);
 	break;

+ 1 - 1
arch/powerpc/mm/pgtable_32.c

@@ -382,7 +382,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 		return 0;
 	if (!get_pteptr(&init_mm, address, &kpte, &kpmd))
 		return -EINVAL;
-	set_pte_at(&init_mm, address, kpte, mk_pte(page, prot));
+	__set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0);
 	wmb();
 #ifdef CONFIG_PPC_STD_MMU
 	flush_hash_pages(0, address, pmd_val(*kpmd), 1);

+ 50 - 2
arch/powerpc/platforms/83xx/suspend.c

@@ -32,6 +32,7 @@
 #define PMCCR1_NEXT_STATE       0x0C /* Next state for power management */
 #define PMCCR1_NEXT_STATE_SHIFT 2
 #define PMCCR1_CURR_STATE       0x03 /* Current state for power management*/
+#define IMMR_SYSCR_OFFSET       0x100
 #define IMMR_RCW_OFFSET         0x900
 #define RCW_PCI_HOST            0x80000000
 
@@ -78,6 +79,22 @@ struct mpc83xx_clock {
 	u32 sccr;
 };
 
+struct mpc83xx_syscr {
+	__be32 sgprl;
+	__be32 sgprh;
+	__be32 spridr;
+	__be32 :32;
+	__be32 spcr;
+	__be32 sicrl;
+	__be32 sicrh;
+};
+
+struct mpc83xx_saved {
+	u32 sicrl;
+	u32 sicrh;
+	u32 sccr;
+};
+
 struct pmc_type {
 	int has_deep_sleep;
 };
@@ -87,6 +104,8 @@ static int has_deep_sleep, deep_sleeping;
 static int pmc_irq;
 static struct mpc83xx_pmc __iomem *pmc_regs;
 static struct mpc83xx_clock __iomem *clock_regs;
+static struct mpc83xx_syscr __iomem *syscr_regs;
+static struct mpc83xx_saved saved_regs;
 static int is_pci_agent, wake_from_pci;
 static phys_addr_t immrbase;
 static int pci_pm_state;
@@ -137,6 +156,20 @@ static irqreturn_t pmc_irq_handler(int irq, void *dev_id)
 	return ret;
 }
 
+static void mpc83xx_suspend_restore_regs(void)
+{
+	out_be32(&syscr_regs->sicrl, saved_regs.sicrl);
+	out_be32(&syscr_regs->sicrh, saved_regs.sicrh);
+	out_be32(&clock_regs->sccr, saved_regs.sccr);
+}
+
+static void mpc83xx_suspend_save_regs(void)
+{
+	saved_regs.sicrl = in_be32(&syscr_regs->sicrl);
+	saved_regs.sicrh = in_be32(&syscr_regs->sicrh);
+	saved_regs.sccr = in_be32(&clock_regs->sccr);
+}
+
 static int mpc83xx_suspend_enter(suspend_state_t state)
 {
 	int ret = -EAGAIN;
@@ -166,6 +199,8 @@ static int mpc83xx_suspend_enter(suspend_state_t state)
 	 */
 
 	if (deep_sleeping) {
+		mpc83xx_suspend_save_regs();
+
 		out_be32(&pmc_regs->mask, PMCER_ALL);
 
 		out_be32(&pmc_regs->config1,
@@ -179,6 +214,8 @@ static int mpc83xx_suspend_enter(suspend_state_t state)
 		         in_be32(&pmc_regs->config1) & ~PMCCR1_POWER_OFF);
 
 		out_be32(&pmc_regs->mask, PMCER_PMCI);
+
+		mpc83xx_suspend_restore_regs();
 	} else {
 		out_be32(&pmc_regs->mask, PMCER_PMCI);
 
@@ -194,7 +231,7 @@ out:
 	return ret;
 }
 
-static void mpc83xx_suspend_finish(void)
+static void mpc83xx_suspend_end(void)
 {
 	deep_sleeping = 0;
 }
@@ -278,7 +315,7 @@ static struct platform_suspend_ops mpc83xx_suspend_ops = {
 	.valid = mpc83xx_suspend_valid,
 	.begin = mpc83xx_suspend_begin,
 	.enter = mpc83xx_suspend_enter,
-	.finish = mpc83xx_suspend_finish,
+	.end = mpc83xx_suspend_end,
 };
 
 static int pmc_probe(struct of_device *ofdev,
@@ -333,12 +370,23 @@ static int pmc_probe(struct of_device *ofdev,
 		goto out_pmc;
 	}
 
+	if (has_deep_sleep) {
+		syscr_regs = ioremap(immrbase + IMMR_SYSCR_OFFSET,
+				     sizeof(*syscr_regs));
+		if (!syscr_regs) {
+			ret = -ENOMEM;
+			goto out_syscr;
+		}
+	}
+
 	if (is_pci_agent)
 		mpc83xx_set_agent();
 
 	suspend_set_ops(&mpc83xx_suspend_ops);
 	return 0;
 
+out_syscr:
+	iounmap(clock_regs);
 out_pmc:
 	iounmap(pmc_regs);
 out:

+ 1 - 1
arch/powerpc/platforms/85xx/mpc85xx_mds.c

@@ -86,7 +86,7 @@ static int mpc8568_fixup_125_clock(struct phy_device *phydev)
 	scr = phy_read(phydev, MV88E1111_SCR);
 
 	if (scr < 0)
-		return err;
+		return scr;
 
 	err = phy_write(phydev, MV88E1111_SCR, scr | 0x0008);
 

+ 1 - 1
arch/powerpc/platforms/embedded6xx/flipper-pic.c

@@ -102,7 +102,7 @@ static int flipper_pic_map(struct irq_host *h, unsigned int virq,
 			   irq_hw_number_t hwirq)
 {
 	set_irq_chip_data(virq, h->host_data);
-	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	irq_to_desc(virq)->status |= IRQ_LEVEL;
 	set_irq_chip_and_handler(virq, &flipper_pic, handle_level_irq);
 	return 0;
 }

+ 5 - 5
arch/powerpc/platforms/embedded6xx/hlwd-pic.c

@@ -95,7 +95,7 @@ static int hlwd_pic_map(struct irq_host *h, unsigned int virq,
 			   irq_hw_number_t hwirq)
 {
 	set_irq_chip_data(virq, h->host_data);
-	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	irq_to_desc(virq)->status |= IRQ_LEVEL;
 	set_irq_chip_and_handler(virq, &hlwd_pic, handle_level_irq);
 	return 0;
 }
@@ -132,9 +132,9 @@ static void hlwd_pic_irq_cascade(unsigned int cascade_virq,
 	struct irq_host *irq_host = get_irq_data(cascade_virq);
 	unsigned int virq;
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	desc->chip->mask(cascade_virq); /* IRQ_LEVEL */
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 
 	virq = __hlwd_pic_get_irq(irq_host);
 	if (virq != NO_IRQ)
@@ -142,11 +142,11 @@ static void hlwd_pic_irq_cascade(unsigned int cascade_virq,
 	else
 		pr_err("spurious interrupt!\n");
 
-	spin_lock(&desc->lock);
+	raw_spin_lock(&desc->lock);
 	desc->chip->ack(cascade_virq); /* IRQ_LEVEL */
 	if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
 		desc->chip->unmask(cascade_virq);
-	spin_unlock(&desc->lock);
+	raw_spin_unlock(&desc->lock);
 }
 
 /*

+ 1 - 1
arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c

@@ -120,7 +120,7 @@ static void ug_putc(char ch)
 
 	while (!ug_is_txfifo_ready() && count--)
 		barrier();
-	if (count)
+	if (count >= 0)
 		ug_raw_putc(ch);
 }
 

+ 83 - 64
arch/powerpc/platforms/iseries/mf.c

@@ -855,59 +855,58 @@ static int mf_get_boot_rtc(struct rtc_time *tm)
 }
 
 #ifdef CONFIG_PROC_FS
-
-static int proc_mf_dump_cmdline(char *page, char **start, off_t off,
-		int count, int *eof, void *data)
+static int mf_cmdline_proc_show(struct seq_file *m, void *v)
 {
-	int len;
-	char *p;
+	char *page, *p;
 	struct vsp_cmd_data vsp_cmd;
 	int rc;
 	dma_addr_t dma_addr;
 
 	/* The HV appears to return no more than 256 bytes of command line */
-	if (off >= 256)
-		return 0;
-	if ((off + count) > 256)
-		count = 256 - off;
+	page = kmalloc(256, GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
 
-	dma_addr = iseries_hv_map(page, off + count, DMA_FROM_DEVICE);
-	if (dma_addr == DMA_ERROR_CODE)
+	dma_addr = iseries_hv_map(page, 256, DMA_FROM_DEVICE);
+	if (dma_addr == DMA_ERROR_CODE) {
+		kfree(page);
 		return -ENOMEM;
-	memset(page, 0, off + count);
+	}
+	memset(page, 0, 256);
 	memset(&vsp_cmd, 0, sizeof(vsp_cmd));
 	vsp_cmd.cmd = 33;
 	vsp_cmd.sub_data.kern.token = dma_addr;
 	vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex;
-	vsp_cmd.sub_data.kern.side = (u64)data;
-	vsp_cmd.sub_data.kern.length = off + count;
+	vsp_cmd.sub_data.kern.side = (u64)m->private;
+	vsp_cmd.sub_data.kern.length = 256;
 	mb();
 	rc = signal_vsp_instruction(&vsp_cmd);
-	iseries_hv_unmap(dma_addr, off + count, DMA_FROM_DEVICE);
-	if (rc)
+	iseries_hv_unmap(dma_addr, 256, DMA_FROM_DEVICE);
+	if (rc) {
+		kfree(page);
 		return rc;
-	if (vsp_cmd.result_code != 0)
+	}
+	if (vsp_cmd.result_code != 0) {
+		kfree(page);
 		return -ENOMEM;
+	}
 	p = page;
-	len = 0;
-	while (len < (off + count)) {
-		if ((*p == '\0') || (*p == '\n')) {
-			if (*p == '\0')
-				*p = '\n';
-			p++;
-			len++;
-			*eof = 1;
+	while (p - page < 256) {
+		if (*p == '\0' || *p == '\n') {
+			*p = '\n';
 			break;
 		}
 		p++;
-		len++;
-	}
 
-	if (len < off) {
-		*eof = 1;
-		len = 0;
 	}
-	return len;
+	seq_write(m, page, p - page);
+	kfree(page);
+	return 0;
+}
+
+static int mf_cmdline_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mf_cmdline_proc_show, PDE(inode)->data);
 }
 
 #if 0
@@ -962,10 +961,8 @@ static int proc_mf_dump_vmlinux(char *page, char **start, off_t off,
 }
 #endif
 
-static int proc_mf_dump_side(char *page, char **start, off_t off,
-		int count, int *eof, void *data)
+static int mf_side_proc_show(struct seq_file *m, void *v)
 {
-	int len;
 	char mf_current_side = ' ';
 	struct vsp_cmd_data vsp_cmd;
 
@@ -989,21 +986,17 @@ static int proc_mf_dump_side(char *page, char **start, off_t off,
 		}
 	}
 
-	len = sprintf(page, "%c\n", mf_current_side);
+	seq_printf(m, "%c\n", mf_current_side);
+	return 0;
+}
 
-	if (len <= (off + count))
-		*eof = 1;
-	*start = page + off;
-	len -= off;
-	if (len > count)
-		len = count;
-	if (len < 0)
-		len = 0;
-	return len;
+static int mf_side_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mf_side_proc_show, NULL);
 }
 
-static int proc_mf_change_side(struct file *file, const char __user *buffer,
-		unsigned long count, void *data)
+static ssize_t mf_side_proc_write(struct file *file, const char __user *buffer,
+				  size_t count, loff_t *pos)
 {
 	char side;
 	u64 newSide;
@@ -1041,6 +1034,15 @@ static int proc_mf_change_side(struct file *file, const char __user *buffer,
 	return count;
 }
 
+static const struct file_operations mf_side_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= mf_side_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= mf_side_proc_write,
+};
+
 #if 0
 static void mf_getSrcHistory(char *buffer, int size)
 {
@@ -1087,8 +1089,7 @@ static void mf_getSrcHistory(char *buffer, int size)
 }
 #endif
 
-static int proc_mf_dump_src(char *page, char **start, off_t off,
-		int count, int *eof, void *data)
+static int mf_src_proc_show(struct seq_file *m, void *v)
 {
 #if 0
 	int len;
@@ -1109,8 +1110,13 @@ static int proc_mf_dump_src(char *page, char **start, off_t off,
 #endif
 }
 
-static int proc_mf_change_src(struct file *file, const char __user *buffer,
-		unsigned long count, void *data)
+static int mf_src_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mf_src_proc_show, NULL);
+}
+
+static ssize_t mf_src_proc_write(struct file *file, const char __user *buffer,
+				 size_t count, loff_t *pos)
 {
 	char stkbuf[10];
 
@@ -1135,9 +1141,19 @@ static int proc_mf_change_src(struct file *file, const char __user *buffer,
 	return count;
 }
 
-static int proc_mf_change_cmdline(struct file *file, const char __user *buffer,
-		unsigned long count, void *data)
+static const struct file_operations mf_src_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= mf_src_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= mf_src_proc_write,
+};
+
+static ssize_t mf_cmdline_proc_write(struct file *file, const char __user *buffer,
+				     size_t count, loff_t *pos)
 {
+	void *data = PDE(file->f_path.dentry->d_inode)->data;
 	struct vsp_cmd_data vsp_cmd;
 	dma_addr_t dma_addr;
 	char *page;
@@ -1172,6 +1188,15 @@ out:
 	return ret;
 }
 
+static const struct file_operations mf_cmdline_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= mf_cmdline_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= mf_cmdline_proc_write,
+};
+
 static ssize_t proc_mf_change_vmlinux(struct file *file,
 				      const char __user *buf,
 				      size_t count, loff_t *ppos)
@@ -1246,12 +1271,10 @@ static int __init mf_proc_init(void)
 		if (!mf)
 			return 1;
 
-		ent = create_proc_entry("cmdline", S_IFREG|S_IRUSR|S_IWUSR, mf);
+		ent = proc_create_data("cmdline", S_IRUSR|S_IWUSR, mf,
+				       &mf_cmdline_proc_fops, (void *)(long)i);
 		if (!ent)
 			return 1;
-		ent->data = (void *)(long)i;
-		ent->read_proc = proc_mf_dump_cmdline;
-		ent->write_proc = proc_mf_change_cmdline;
 
 		if (i == 3)	/* no vmlinux entry for 'D' */
 			continue;
@@ -1263,19 +1286,15 @@ static int __init mf_proc_init(void)
 			return 1;
 	}
 
-	ent = create_proc_entry("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root);
+	ent = proc_create("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root,
+			  &mf_side_proc_fops);
 	if (!ent)
 		return 1;
-	ent->data = (void *)0;
-	ent->read_proc = proc_mf_dump_side;
-	ent->write_proc = proc_mf_change_side;
 
-	ent = create_proc_entry("src", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root);
+	ent = proc_create("src", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root,
+			  &mf_src_proc_fops);
 	if (!ent)
 		return 1;
-	ent->data = (void *)0;
-	ent->read_proc = proc_mf_dump_src;
-	ent->write_proc = proc_mf_change_src;
 
 	return 0;
 }

+ 1 - 1
arch/powerpc/platforms/iseries/viopath.c

@@ -116,7 +116,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
 	u16 vlanMap;
 	dma_addr_t handle;
 	HvLpEvent_Rc hvrc;
-	DECLARE_COMPLETION(done);
+	DECLARE_COMPLETION_ONSTACK(done);
 	struct device_node *node;
 	const char *sysid;
 

+ 2 - 0
arch/powerpc/platforms/pseries/Kconfig

@@ -2,6 +2,8 @@ config PPC_PSERIES
 	depends on PPC64 && PPC_BOOK3S
 	bool "IBM pSeries & new (POWER5-based) iSeries"
 	select MPIC
+	select PCI_MSI
+	select XICS
 	select PPC_I8259
 	select PPC_RTAS
 	select PPC_RTAS_DAEMON

+ 248 - 6
arch/powerpc/platforms/pseries/cmm.c

@@ -38,19 +38,28 @@
 #include <asm/mmu.h>
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
+#include <linux/memory.h>
 
 #include "plpar_wrappers.h"
 
 #define CMM_DRIVER_VERSION	"1.0.0"
 #define CMM_DEFAULT_DELAY	1
+#define CMM_HOTPLUG_DELAY	5
 #define CMM_DEBUG			0
 #define CMM_DISABLE		0
 #define CMM_OOM_KB		1024
 #define CMM_MIN_MEM_MB		256
 #define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
 #define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
+/*
+ * The priority level tries to ensure that this notifier is called as
+ * late as possible to reduce thrashing in the shared memory pool.
+ */
+#define CMM_MEM_HOTPLUG_PRI	1
+#define CMM_MEM_ISOLATE_PRI	15
 
 static unsigned int delay = CMM_DEFAULT_DELAY;
+static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
 static unsigned int oom_kb = CMM_OOM_KB;
 static unsigned int cmm_debug = CMM_DEBUG;
 static unsigned int cmm_disabled = CMM_DISABLE;
@@ -65,6 +74,10 @@ MODULE_VERSION(CMM_DRIVER_VERSION);
 module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
 		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
+module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove "
+		 "before loaning resumes. "
+		 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
 module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
 		 "[Default=" __stringify(CMM_OOM_KB) "]");
@@ -92,6 +105,9 @@ static unsigned long oom_freed_pages;
 static struct cmm_page_array *cmm_page_list;
 static DEFINE_SPINLOCK(cmm_lock);
 
+static DEFINE_MUTEX(hotplug_mutex);
+static int hotplug_occurred; /* protected by the hotplug mutex */
+
 static struct task_struct *cmm_thread_ptr;
 
 /**
@@ -110,6 +126,17 @@ static long cmm_alloc_pages(long nr)
 	cmm_dbg("Begin request for %ld pages\n", nr);
 
 	while (nr) {
+		/* Exit if a hotplug operation is in progress or occurred */
+		if (mutex_trylock(&hotplug_mutex)) {
+			if (hotplug_occurred) {
+				mutex_unlock(&hotplug_mutex);
+				break;
+			}
+			mutex_unlock(&hotplug_mutex);
+		} else {
+			break;
+		}
+
 		addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
 				       __GFP_NORETRY | __GFP_NOMEMALLOC);
 		if (!addr)
@@ -119,8 +146,9 @@ static long cmm_alloc_pages(long nr)
 		if (!pa || pa->index >= CMM_NR_PAGES) {
 			/* Need a new page for the page list. */
 			spin_unlock(&cmm_lock);
-			npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
-								       __GFP_NORETRY | __GFP_NOMEMALLOC);
+			npa = (struct cmm_page_array *)__get_free_page(
+					GFP_NOIO | __GFP_NOWARN |
+					__GFP_NORETRY | __GFP_NOMEMALLOC);
 			if (!npa) {
 				pr_info("%s: Can not allocate new page list\n", __func__);
 				free_page(addr);
@@ -282,9 +310,28 @@ static int cmm_thread(void *dummy)
 	while (1) {
 		timeleft = msleep_interruptible(delay * 1000);
 
-		if (kthread_should_stop() || timeleft) {
-			loaned_pages_target = loaned_pages;
+		if (kthread_should_stop() || timeleft)
 			break;
+
+		if (mutex_trylock(&hotplug_mutex)) {
+			if (hotplug_occurred) {
+				hotplug_occurred = 0;
+				mutex_unlock(&hotplug_mutex);
+				cmm_dbg("Hotplug operation has occurred, "
+						"loaning activity suspended "
+						"for %d seconds.\n",
+						hotplug_delay);
+				timeleft = msleep_interruptible(hotplug_delay *
+						1000);
+				if (kthread_should_stop() || timeleft)
+					break;
+				continue;
+			}
+			mutex_unlock(&hotplug_mutex);
+		} else {
+			cmm_dbg("Hotplug operation in progress, activity "
+					"suspended\n");
+			continue;
 		}
 
 		cmm_get_mpp();
@@ -413,6 +460,193 @@ static struct notifier_block cmm_reboot_nb = {
 	.notifier_call = cmm_reboot_notifier,
 };
 
+/**
+ * cmm_count_pages - Count the number of pages loaned in a particular range.
+ *
+ * @arg: memory_isolate_notify structure with address range and count
+ *
+ * Return value:
+ *      0 on success
+ **/
+static unsigned long cmm_count_pages(void *arg)
+{
+	struct memory_isolate_notify *marg = arg;
+	struct cmm_page_array *pa;
+	unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
+	unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
+	unsigned long idx;
+
+	spin_lock(&cmm_lock);
+	pa = cmm_page_list;
+	while (pa) {
+		if ((unsigned long)pa >= start && (unsigned long)pa < end)
+			marg->pages_found++;
+		for (idx = 0; idx < pa->index; idx++)
+			if (pa->page[idx] >= start && pa->page[idx] < end)
+				marg->pages_found++;
+		pa = pa->next;
+	}
+	spin_unlock(&cmm_lock);
+	return 0;
+}
+
+/**
+ * cmm_memory_isolate_cb - Handle memory isolation notifier calls
+ * @self:	notifier block struct
+ * @action:	action to take
+ * @arg:	struct memory_isolate_notify data for handler
+ *
+ * Return value:
+ *	NOTIFY_OK or notifier error based on subfunction return value
+ **/
+static int cmm_memory_isolate_cb(struct notifier_block *self,
+				 unsigned long action, void *arg)
+{
+	int ret = 0;
+
+	if (action == MEM_ISOLATE_COUNT)
+		ret = cmm_count_pages(arg);
+
+	if (ret)
+		ret = notifier_from_errno(ret);
+	else
+		ret = NOTIFY_OK;
+
+	return ret;
+}
+
+static struct notifier_block cmm_mem_isolate_nb = {
+	.notifier_call = cmm_memory_isolate_cb,
+	.priority = CMM_MEM_ISOLATE_PRI
+};
+
+/**
+ * cmm_mem_going_offline - Unloan pages where memory is to be removed
+ * @arg: memory_notify structure with page range to be offlined
+ *
+ * Return value:
+ *	0 on success
+ **/
+static int cmm_mem_going_offline(void *arg)
+{
+	struct memory_notify *marg = arg;
+	unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
+	unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
+	struct cmm_page_array *pa_curr, *pa_last, *npa;
+	unsigned long idx;
+	unsigned long freed = 0;
+
+	cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
+			start_page, marg->nr_pages);
+	spin_lock(&cmm_lock);
+
+	/* Search the page list for pages in the range to be offlined */
+	pa_last = pa_curr = cmm_page_list;
+	while (pa_curr) {
+		for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
+			if ((pa_curr->page[idx] < start_page) ||
+			    (pa_curr->page[idx] >= end_page))
+				continue;
+
+			plpar_page_set_active(__pa(pa_curr->page[idx]));
+			free_page(pa_curr->page[idx]);
+			freed++;
+			loaned_pages--;
+			totalram_pages++;
+			pa_curr->page[idx] = pa_last->page[--pa_last->index];
+			if (pa_last->index == 0) {
+				if (pa_curr == pa_last)
+					pa_curr = pa_last->next;
+				pa_last = pa_last->next;
+				free_page((unsigned long)cmm_page_list);
+				cmm_page_list = pa_last;
+				continue;
+			}
+		}
+		pa_curr = pa_curr->next;
+	}
+
+	/* Search for page list structures in the range to be offlined */
+	pa_last = NULL;
+	pa_curr = cmm_page_list;
+	while (pa_curr) {
+		if (((unsigned long)pa_curr >= start_page) &&
+				((unsigned long)pa_curr < end_page)) {
+			npa = (struct cmm_page_array *)__get_free_page(
+					GFP_NOIO | __GFP_NOWARN |
+					__GFP_NORETRY | __GFP_NOMEMALLOC);
+			if (!npa) {
+				spin_unlock(&cmm_lock);
+				cmm_dbg("Failed to allocate memory for list "
+						"management. Memory hotplug "
+						"failed.\n");
+				return ENOMEM;
+			}
+			memcpy(npa, pa_curr, PAGE_SIZE);
+			if (pa_curr == cmm_page_list)
+				cmm_page_list = npa;
+			if (pa_last)
+				pa_last->next = npa;
+			free_page((unsigned long) pa_curr);
+			freed++;
+			pa_curr = npa;
+		}
+
+		pa_last = pa_curr;
+		pa_curr = pa_curr->next;
+	}
+
+	spin_unlock(&cmm_lock);
+	cmm_dbg("Released %ld pages in the search range.\n", freed);
+
+	return 0;
+}
+
+/**
+ * cmm_memory_cb - Handle memory hotplug notifier calls
+ * @self:	notifier block struct
+ * @action:	action to take
+ * @arg:	struct memory_notify data for handler
+ *
+ * Return value:
+ *	NOTIFY_OK or notifier error based on subfunction return value
+ *
+ **/
+static int cmm_memory_cb(struct notifier_block *self,
+			unsigned long action, void *arg)
+{
+	int ret = 0;
+
+	switch (action) {
+	case MEM_GOING_OFFLINE:
+		mutex_lock(&hotplug_mutex);
+		hotplug_occurred = 1;
+		ret = cmm_mem_going_offline(arg);
+		break;
+	case MEM_OFFLINE:
+	case MEM_CANCEL_OFFLINE:
+		mutex_unlock(&hotplug_mutex);
+		cmm_dbg("Memory offline operation complete.\n");
+		break;
+	case MEM_GOING_ONLINE:
+	case MEM_ONLINE:
+	case MEM_CANCEL_ONLINE:
+		break;
+	}
+
+	if (ret)
+		ret = notifier_from_errno(ret);
+	else
+		ret = NOTIFY_OK;
+
+	return ret;
+}
+
+static struct notifier_block cmm_mem_nb = {
+	.notifier_call = cmm_memory_cb,
+	.priority = CMM_MEM_HOTPLUG_PRI
+};
+
 /**
  * cmm_init - Module initialization
  *
@@ -435,18 +669,24 @@ static int cmm_init(void)
 	if ((rc = cmm_sysfs_register(&cmm_sysdev)))
 		goto out_reboot_notifier;
 
+	if (register_memory_notifier(&cmm_mem_nb) ||
+	    register_memory_isolate_notifier(&cmm_mem_isolate_nb))
+		goto out_unregister_notifier;
+
 	if (cmm_disabled)
 		return rc;
 
 	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
 	if (IS_ERR(cmm_thread_ptr)) {
 		rc = PTR_ERR(cmm_thread_ptr);
-		goto out_unregister_sysfs;
+		goto out_unregister_notifier;
 	}
 
 	return rc;
 
-out_unregister_sysfs:
+out_unregister_notifier:
+	unregister_memory_notifier(&cmm_mem_nb);
+	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
 	cmm_unregister_sysfs(&cmm_sysdev);
 out_reboot_notifier:
 	unregister_reboot_notifier(&cmm_reboot_nb);
@@ -467,6 +707,8 @@ static void cmm_exit(void)
 		kthread_stop(cmm_thread_ptr);
 	unregister_oom_notifier(&cmm_oom_nb);
 	unregister_reboot_notifier(&cmm_reboot_nb);
+	unregister_memory_notifier(&cmm_mem_nb);
+	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
 	cmm_free_pages(loaned_pages);
 	cmm_unregister_sysfs(&cmm_sysdev);
 }

+ 4 - 2
arch/powerpc/platforms/pseries/dlpar.c

@@ -346,12 +346,14 @@ int dlpar_release_drc(u32 drc_index)
 
 static DEFINE_MUTEX(pseries_cpu_hotplug_mutex);
 
-void cpu_hotplug_driver_lock()
+void cpu_hotplug_driver_lock(void)
+__acquires(pseries_cpu_hotplug_mutex)
 {
 	mutex_lock(&pseries_cpu_hotplug_mutex);
 }
 
-void cpu_hotplug_driver_unlock()
+void cpu_hotplug_driver_unlock(void)
+__releases(pseries_cpu_hotplug_mutex)
 {
 	mutex_unlock(&pseries_cpu_hotplug_mutex);
 }

+ 2 - 2
arch/powerpc/platforms/pseries/smp.c

@@ -144,8 +144,8 @@ static void __devinit smp_pSeries_kick_cpu(int nr)
 		hcpuid = get_hard_smp_processor_id(nr);
 		rc = plpar_hcall_norets(H_PROD, hcpuid);
 		if (rc != H_SUCCESS)
-			panic("Error: Prod to wake up processor %d Ret= %ld\n",
-				nr, rc);
+			printk(KERN_ERR "Error: Prod to wake up processor %d\
+						Ret= %ld\n", nr, rc);
 	}
 }
 

+ 21 - 7
arch/powerpc/sysdev/cpm2_pic.c

@@ -143,13 +143,23 @@ static int cpm2_set_irq_type(unsigned int virq, unsigned int flow_type)
 	struct irq_desc *desc = irq_to_desc(virq);
 	unsigned int vold, vnew, edibit;
 
-	if (flow_type == IRQ_TYPE_NONE)
-		flow_type = IRQ_TYPE_LEVEL_LOW;
-
-	if (flow_type & IRQ_TYPE_EDGE_RISING) {
-		printk(KERN_ERR "CPM2 PIC: sense type 0x%x not supported\n",
-			flow_type);
-		return -EINVAL;
+	/* Port C interrupts are either IRQ_TYPE_EDGE_FALLING or
+	 * IRQ_TYPE_EDGE_BOTH (default).  All others are IRQ_TYPE_EDGE_FALLING
+	 * or IRQ_TYPE_LEVEL_LOW (default)
+	 */
+	if (src >= CPM2_IRQ_PORTC15 && src <= CPM2_IRQ_PORTC0) {
+		if (flow_type == IRQ_TYPE_NONE)
+			flow_type = IRQ_TYPE_EDGE_BOTH;
+
+		if (flow_type != IRQ_TYPE_EDGE_BOTH &&
+		    flow_type != IRQ_TYPE_EDGE_FALLING)
+			goto err_sense;
+	} else {
+		if (flow_type == IRQ_TYPE_NONE)
+			flow_type = IRQ_TYPE_LEVEL_LOW;
+
+		if (flow_type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_LEVEL_HIGH))
+			goto err_sense;
 	}
 
 	desc->status &= ~(IRQ_TYPE_SENSE_MASK | IRQ_LEVEL);
@@ -181,6 +191,10 @@ static int cpm2_set_irq_type(unsigned int virq, unsigned int flow_type)
 	if (vold != vnew)
 		out_be32(&cpm2_intctl->ic_siexr, vnew);
 	return 0;
+
+err_sense:
+	pr_err("CPM2 PIC: sense type 0x%x not supported\n", flow_type);
+	return -EINVAL;
 }
 
 static struct irq_chip cpm2_pic = {

+ 6 - 2
arch/powerpc/sysdev/fsl_pci.c

@@ -464,8 +464,7 @@ static void __iomem *mpc83xx_pcie_remap_cfg(struct pci_bus *bus,
 {
 	struct pci_controller *hose = pci_bus_to_host(bus);
 	struct mpc83xx_pcie_priv *pcie = hose->dn->data;
-	u8 bus_no = bus->number - hose->first_busno;
-	u32 dev_base = bus_no << 24 | devfn << 16;
+	u32 dev_base = bus->number << 24 | devfn << 16;
 	int ret;
 
 	ret = mpc83xx_pcie_exclude_device(bus, devfn);
@@ -515,12 +514,17 @@ static int mpc83xx_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
 static int mpc83xx_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
 				     int offset, int len, u32 val)
 {
+	struct pci_controller *hose = pci_bus_to_host(bus);
 	void __iomem *cfg_addr;
 
 	cfg_addr = mpc83xx_pcie_remap_cfg(bus, devfn, offset);
 	if (!cfg_addr)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
+	/* PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS */
+	if (offset == PCI_PRIMARY_BUS && bus->number == hose->first_busno)
+		val &= 0xffffff00;
+
 	switch (len) {
 	case 1:
 		out_8(cfg_addr, val);

+ 20 - 1
arch/powerpc/sysdev/mpc8xxx_gpio.c

@@ -54,6 +54,22 @@ static void mpc8xxx_gpio_save_regs(struct of_mm_gpio_chip *mm)
 	mpc8xxx_gc->data = in_be32(mm->regs + GPIO_DAT);
 }
 
+/* Workaround GPIO 1 errata on MPC8572/MPC8536. The status of GPIOs
+ * defined as output cannot be determined by reading GPDAT register,
+ * so we use shadow data register instead. The status of input pins
+ * is determined by reading GPDAT register.
+ */
+static int mpc8572_gpio_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	u32 val;
+	struct of_mm_gpio_chip *mm = to_of_mm_gpio_chip(gc);
+	struct mpc8xxx_gpio_chip *mpc8xxx_gc = to_mpc8xxx_gpio_chip(mm);
+
+	val = in_be32(mm->regs + GPIO_DAT) & ~in_be32(mm->regs + GPIO_DIR);
+
+	return (val | mpc8xxx_gc->data) & mpc8xxx_gpio2mask(gpio);
+}
+
 static int mpc8xxx_gpio_get(struct gpio_chip *gc, unsigned int gpio)
 {
 	struct of_mm_gpio_chip *mm = to_of_mm_gpio_chip(gc);
@@ -136,7 +152,10 @@ static void __init mpc8xxx_add_controller(struct device_node *np)
 	gc->ngpio = MPC8XXX_GPIO_PINS;
 	gc->direction_input = mpc8xxx_gpio_dir_in;
 	gc->direction_output = mpc8xxx_gpio_dir_out;
-	gc->get = mpc8xxx_gpio_get;
+	if (of_device_is_compatible(np, "fsl,mpc8572-gpio"))
+		gc->get = mpc8572_gpio_get;
+	else
+		gc->get = mpc8xxx_gpio_get;
 	gc->set = mpc8xxx_gpio_set;
 
 	ret = of_mm_gpiochip_add(np, mm_gc);

+ 6 - 13
arch/powerpc/sysdev/mpic.c

@@ -567,13 +567,11 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic)
 #endif /* CONFIG_MPIC_U3_HT_IRQS */
 
 #ifdef CONFIG_SMP
-static int irq_choose_cpu(unsigned int virt_irq)
+static int irq_choose_cpu(const cpumask_t *mask)
 {
-	cpumask_t mask;
 	int cpuid;
 
-	cpumask_copy(&mask, irq_to_desc(virt_irq)->affinity);
-	if (cpus_equal(mask, CPU_MASK_ALL)) {
+	if (cpumask_equal(mask, cpu_all_mask)) {
 		static int irq_rover;
 		static DEFINE_SPINLOCK(irq_rover_lock);
 		unsigned long flags;
@@ -594,20 +592,15 @@ static int irq_choose_cpu(unsigned int virt_irq)
 
 		spin_unlock_irqrestore(&irq_rover_lock, flags);
 	} else {
-		cpumask_t tmp;
-
-		cpus_and(tmp, cpu_online_map, mask);
-
-		if (cpus_empty(tmp))
+		cpuid = cpumask_first_and(mask, cpu_online_mask);
+		if (cpuid >= nr_cpu_ids)
 			goto do_round_robin;
-
-		cpuid = first_cpu(tmp);
 	}
 
 	return get_hard_smp_processor_id(cpuid);
 }
 #else
-static int irq_choose_cpu(unsigned int virt_irq)
+static int irq_choose_cpu(const cpumask_t *mask)
 {
 	return hard_smp_processor_id();
 }
@@ -816,7 +809,7 @@ int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	unsigned int src = mpic_irq_to_hw(irq);
 
 	if (mpic->flags & MPIC_SINGLE_DEST_CPU) {
-		int cpuid = irq_choose_cpu(irq);
+		int cpuid = irq_choose_cpu(cpumask);
 
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 1 << cpuid);
 	} else {

+ 10 - 1
arch/powerpc/sysdev/mpic_msi.c

@@ -39,7 +39,12 @@ static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
 
 	pr_debug("mpic: found U3, guessing msi allocator setup\n");
 
-	/* Reserve source numbers we know are reserved in the HW */
+	/* Reserve source numbers we know are reserved in the HW.
+	 *
+	 * This is a bit of a mix of U3 and U4 reserves but that's going
+	 * to work fine, we have plenty enugh numbers left so let's just
+	 * mark anything we don't like reserved.
+	 */
 	for (i = 0;   i < 8;   i++)
 		msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i);
 
@@ -49,6 +54,10 @@ static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
 	for (i = 100; i < 105; i++)
 		msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i);
 
+	for (i = 124; i < mpic->irq_count; i++)
+		msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i);
+
+
 	np = NULL;
 	while ((np = of_find_all_nodes(np))) {
 		pr_debug("mpic: mapping hwirqs for %s\n", np->full_name);

+ 39 - 7
arch/powerpc/sysdev/mpic_u3msi.c

@@ -64,12 +64,12 @@ static u64 read_ht_magic_addr(struct pci_dev *pdev, unsigned int pos)
 	return addr;
 }
 
-static u64 find_ht_magic_addr(struct pci_dev *pdev)
+static u64 find_ht_magic_addr(struct pci_dev *pdev, unsigned int hwirq)
 {
 	struct pci_bus *bus;
 	unsigned int pos;
 
-	for (bus = pdev->bus; bus; bus = bus->parent) {
+	for (bus = pdev->bus; bus && bus->self; bus = bus->parent) {
 		pos = pci_find_ht_capability(bus->self, HT_CAPTYPE_MSI_MAPPING);
 		if (pos)
 			return read_ht_magic_addr(bus->self, pos);
@@ -78,13 +78,41 @@ static u64 find_ht_magic_addr(struct pci_dev *pdev)
 	return 0;
 }
 
+static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+
+	/* U4 PCIe MSIs need to write to the special register in
+	 * the bridge that generates interrupts. There should be
+	 * theorically a register at 0xf8005000 where you just write
+	 * the MSI number and that triggers the right interrupt, but
+	 * unfortunately, this is busted in HW, the bridge endian swaps
+	 * the value and hits the wrong nibble in the register.
+	 *
+	 * So instead we use another register set which is used normally
+	 * for converting HT interrupts to MPIC interrupts, which decodes
+	 * the interrupt number as part of the low address bits
+	 *
+	 * This will not work if we ever use more than one legacy MSI in
+	 * a block but we never do. For one MSI or multiple MSI-X where
+	 * each interrupt address can be specified separately, it works
+	 * just fine.
+	 */
+	if (of_device_is_compatible(hose->dn, "u4-pcie") ||
+	    of_device_is_compatible(hose->dn, "U4-pcie"))
+		return 0xf8004000 | (hwirq << 4);
+
+	return 0;
+}
+
 static int u3msi_msi_check_device(struct pci_dev *pdev, int nvec, int type)
 {
 	if (type == PCI_CAP_ID_MSIX)
 		pr_debug("u3msi: MSI-X untested, trying anyway.\n");
 
 	/* If we can't find a magic address then MSI ain't gonna work */
-	if (find_ht_magic_addr(pdev) == 0) {
+	if (find_ht_magic_addr(pdev, 0) == 0 &&
+	    find_u4_magic_addr(pdev, 0) == 0) {
 		pr_debug("u3msi: no magic address found for %s\n",
 			 pci_name(pdev));
 		return -ENXIO;
@@ -118,10 +146,6 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 	u64 addr;
 	int hwirq;
 
-	addr = find_ht_magic_addr(pdev);
-	msg.address_lo = addr & 0xFFFFFFFF;
-	msg.address_hi = addr >> 32;
-
 	list_for_each_entry(entry, &pdev->msi_list, list) {
 		hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap, 1);
 		if (hwirq < 0) {
@@ -129,6 +153,12 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 			return hwirq;
 		}
 
+		addr = find_ht_magic_addr(pdev, hwirq);
+		if (addr == 0)
+			addr = find_u4_magic_addr(pdev, hwirq);
+		msg.address_lo = addr & 0xFFFFFFFF;
+		msg.address_hi = addr >> 32;
+
 		virq = irq_create_mapping(msi_mpic->irqhost, hwirq);
 		if (virq == NO_IRQ) {
 			pr_debug("u3msi: failed mapping hwirq 0x%x\n", hwirq);
@@ -143,6 +173,8 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 		pr_debug("u3msi: allocated virq 0x%x (hw 0x%x) addr 0x%lx\n",
 			  virq, hwirq, (unsigned long)addr);
 
+		printk("u3msi: allocated virq 0x%x (hw 0x%x) addr 0x%lx\n",
+			  virq, hwirq, (unsigned long)addr);
 		msg.data = hwirq;
 		write_msi_msg(virq, &msg);
 

+ 19 - 0
drivers/base/memory.c

@@ -63,6 +63,20 @@ void unregister_memory_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_memory_notifier);
 
+static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain);
+
+int register_memory_isolate_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(register_memory_isolate_notifier);
+
+void unregister_memory_isolate_notifier(struct notifier_block *nb)
+{
+	atomic_notifier_chain_unregister(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(unregister_memory_isolate_notifier);
+
 /*
  * register_memory - Setup a sysfs device for a memory block
  */
@@ -157,6 +171,11 @@ int memory_notify(unsigned long val, void *v)
 	return blocking_notifier_call_chain(&memory_chain, val, v);
 }
 
+int memory_isolate_notify(unsigned long val, void *v)
+{
+	return atomic_notifier_call_chain(&memory_isolate_chain, val, v);
+}
+
 /*
  * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
  * OK to have direct references to sparsemem variables in here.

+ 27 - 0
include/linux/memory.h

@@ -50,6 +50,19 @@ struct memory_notify {
 	int status_change_nid;
 };
 
+/*
+ * During pageblock isolation, count the number of pages within the
+ * range [start_pfn, start_pfn + nr_pages) which are owned by code
+ * in the notifier chain.
+ */
+#define MEM_ISOLATE_COUNT	(1<<0)
+
+struct memory_isolate_notify {
+	unsigned long start_pfn;	/* Start of range to check */
+	unsigned int nr_pages;		/* # pages in range to check */
+	unsigned int pages_found;	/* # pages owned found by callbacks */
+};
+
 struct notifier_block;
 struct mem_section;
 
@@ -76,14 +89,28 @@ static inline int memory_notify(unsigned long val, void *v)
 {
 	return 0;
 }
+static inline int register_memory_isolate_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+static inline void unregister_memory_isolate_notifier(struct notifier_block *nb)
+{
+}
+static inline int memory_isolate_notify(unsigned long val, void *v)
+{
+	return 0;
+}
 #else
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
+extern int register_memory_isolate_notifier(struct notifier_block *nb);
+extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
 extern int register_new_memory(int, struct mem_section *);
 extern int unregister_memory_section(struct mem_section *);
 extern int memory_dev_init(void);
 extern int remove_memory_block(unsigned long, struct mem_section *, int);
 extern int memory_notify(unsigned long val, void *v);
+extern int memory_isolate_notify(unsigned long val, void *v);
 extern struct memory_block *find_memory_block(struct mem_section *);
 #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
 enum mem_add_context { BOOT, HOTPLUG };

+ 50 - 7
mm/page_alloc.c

@@ -48,6 +48,7 @@
 #include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 #include <linux/kmemleak.h>
+#include <linux/memory.h>
 #include <trace/events/kmem.h>
 
 #include <asm/tlbflush.h>
@@ -5008,23 +5009,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
 int set_migratetype_isolate(struct page *page)
 {
 	struct zone *zone;
-	unsigned long flags;
+	struct page *curr_page;
+	unsigned long flags, pfn, iter;
+	unsigned long immobile = 0;
+	struct memory_isolate_notify arg;
+	int notifier_ret;
 	int ret = -EBUSY;
 	int zone_idx;
 
 	zone = page_zone(page);
 	zone_idx = zone_idx(zone);
+
 	spin_lock_irqsave(&zone->lock, flags);
+	if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
+	    zone_idx == ZONE_MOVABLE) {
+		ret = 0;
+		goto out;
+	}
+
+	pfn = page_to_pfn(page);
+	arg.start_pfn = pfn;
+	arg.nr_pages = pageblock_nr_pages;
+	arg.pages_found = 0;
+
 	/*
-	 * In future, more migrate types will be able to be isolation target.
+	 * It may be possible to isolate a pageblock even if the
+	 * migratetype is not MIGRATE_MOVABLE. The memory isolation
+	 * notifier chain is used by balloon drivers to return the
+	 * number of pages in a range that are held by the balloon
+	 * driver to shrink memory. If all the pages are accounted for
+	 * by balloons, are free, or on the LRU, isolation can continue.
+	 * Later, for example, when memory hotplug notifier runs, these
+	 * pages reported as "can be isolated" should be isolated(freed)
+	 * by the balloon driver through the memory notifier chain.
 	 */
-	if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE &&
-	    zone_idx != ZONE_MOVABLE)
+	notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
+	notifier_ret = notifier_to_errno(notifier_ret);
+	if (notifier_ret || !arg.pages_found)
 		goto out;
-	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
-	move_freepages_block(zone, page, MIGRATE_ISOLATE);
-	ret = 0;
+
+	for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
+		if (!pfn_valid_within(pfn))
+			continue;
+
+		curr_page = pfn_to_page(iter);
+		if (!page_count(curr_page) || PageLRU(curr_page))
+			continue;
+
+		immobile++;
+	}
+
+	if (arg.pages_found == immobile)
+		ret = 0;
+
 out:
+	if (!ret) {
+		set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+		move_freepages_block(zone, page, MIGRATE_ISOLATE);
+	}
+
 	spin_unlock_irqrestore(&zone->lock, flags);
 	if (!ret)
 		drain_all_pages();