Browse Source

Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc

Pull powerpc updates from Benjamin Herrenschmidt:
 "The bulk of this is LE updates.  One should now be able to build an LE
  kernel and even run some things in it.

  I'm still sitting on a handful of patches to enable the new ABI that I
  *might* still send this merge window around, but due to the
  incertainty (they are pretty fresh) I want to keep them separate.

  Other notable changes are some infrastructure bits to better handle
  PCI pass-through under KVM, some bits and pieces added to the new
  PowerNV platform support such as access to the CPU SCOM bus via sysfs,
  and support for EEH error handling on PHB3 (Power8 PCIe).

  We also grew arch_get_random_long() for both pseries and powernv when
  running on P7+ and P8, exploiting the HW rng.

  And finally various embedded updates from freescale"

* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (154 commits)
  powerpc: Fix fatal SLB miss when restoring PPR
  powerpc/powernv: Reserve the correct PE number
  powerpc/powernv: Add PE to its own PELTV
  powerpc/powernv: Add support for indirect XSCOM via debugfs
  powerpc/scom: Improve debugfs interface
  powerpc/scom: Enable 64-bit addresses
  powerpc/boot: Properly handle the base "of" boot wrapper
  powerpc/bpf: Support MOD operation
  powerpc/bpf: Fix DIVWU instruction opcode
  of: Move definition of of_find_next_cache_node into common code.
  powerpc: Remove big endianness assumption in of_find_next_cache_node
  powerpc/tm: Remove interrupt disable in __switch_to()
  powerpc: word-at-a-time optimization for 64-bit Little Endian
  powerpc/bpf: BPF JIT compiler for 64-bit Little Endian
  powerpc: Only save/restore SDR1 if in hypervisor mode
  powerpc/pmu: Fix ADB_PMU_LED_IDE dependencies
  powerpc/nvram: Fix endian issue when using the partition length
  powerpc/nvram: Fix endian issue when reading the NVRAM size
  powerpc/nvram: Scan partitions only once
  powerpc/mpc512x: remove unnecessary #if
  ...
Linus Torvalds 11 years ago
parent
commit
66a173b926
100 changed files with 2209 additions and 955 deletions
  1. 9 3
      arch/powerpc/Kconfig
  2. 32 5
      arch/powerpc/Makefile
  3. 2 1
      arch/powerpc/boot/Makefile
  4. 218 0
      arch/powerpc/boot/dts/b4860emu.dts
  5. 32 19
      arch/powerpc/boot/dts/b4qds.dtsi
  6. 1 0
      arch/powerpc/boot/dts/c293pcie.dts
  7. 2 0
      arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi
  8. 1 1
      arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
  9. 2 0
      arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
  10. 1 1
      arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi
  11. 3 0
      arch/powerpc/boot/dts/fsl/bsc9131si-pre.dtsi
  12. 268 0
      arch/powerpc/boot/dts/t4240emu.dts
  13. 45 28
      arch/powerpc/boot/dts/t4240qds.dts
  14. 8 0
      arch/powerpc/boot/wrapper
  15. 2 5
      arch/powerpc/configs/corenet32_smp_defconfig
  16. 1 4
      arch/powerpc/configs/corenet64_smp_defconfig
  17. 1 0
      arch/powerpc/configs/mpc85xx_defconfig
  18. 1 0
      arch/powerpc/configs/mpc85xx_smp_defconfig
  19. 15 12
      arch/powerpc/configs/ppc64_defconfig
  20. 3 9
      arch/powerpc/configs/ppc64e_defconfig
  21. 1 1
      arch/powerpc/configs/ppc6xx_defconfig
  22. 15 10
      arch/powerpc/configs/pseries_defconfig
  23. 32 0
      arch/powerpc/include/asm/archrandom.h
  24. 5 0
      arch/powerpc/include/asm/checksum.h
  25. 1 0
      arch/powerpc/include/asm/emulated_ops.h
  26. 8 8
      arch/powerpc/include/asm/hvsi.h
  27. 50 19
      arch/powerpc/include/asm/io.h
  28. 6 6
      arch/powerpc/include/asm/lppaca.h
  29. 16 0
      arch/powerpc/include/asm/machdep.h
  30. 2 2
      arch/powerpc/include/asm/mmu-hash64.h
  31. 92 17
      arch/powerpc/include/asm/opal.h
  32. 2 2
      arch/powerpc/include/asm/page.h
  33. 2 0
      arch/powerpc/include/asm/pgtable-ppc64.h
  34. 7 1
      arch/powerpc/include/asm/ppc-opcode.h
  35. 46 96
      arch/powerpc/include/asm/ppc_asm.h
  36. 53 38
      arch/powerpc/include/asm/processor.h
  37. 0 3
      arch/powerpc/include/asm/prom.h
  38. 6 1
      arch/powerpc/include/asm/reg.h
  39. 4 4
      arch/powerpc/include/asm/reg_booke.h
  40. 17 6
      arch/powerpc/include/asm/scom.h
  41. 4 0
      arch/powerpc/include/asm/setup.h
  42. 1 1
      arch/powerpc/include/asm/sfp-machine.h
  43. 4 0
      arch/powerpc/include/asm/string.h
  44. 1 0
      arch/powerpc/include/asm/switch_to.h
  45. 78 0
      arch/powerpc/include/asm/word-at-a-time.h
  46. 67 0
      arch/powerpc/include/asm/xor.h
  47. 4 0
      arch/powerpc/include/uapi/asm/byteorder.h
  48. 112 61
      arch/powerpc/kernel/align.c
  49. 11 18
      arch/powerpc/kernel/asm-offsets.c
  50. 3 3
      arch/powerpc/kernel/eeh.c
  51. 30 20
      arch/powerpc/kernel/entry_64.S
  52. 4 2
      arch/powerpc/kernel/exceptions-64e.S
  53. 39 47
      arch/powerpc/kernel/fpu.S
  54. 4 0
      arch/powerpc/kernel/ftrace.c
  55. 3 0
      arch/powerpc/kernel/head_64.S
  56. 3 0
      arch/powerpc/kernel/head_8xx.S
  57. 5 5
      arch/powerpc/kernel/head_fsl_booke.S
  58. 3 3
      arch/powerpc/kernel/kgdb.c
  59. 1 1
      arch/powerpc/kernel/legacy_serial.c
  60. 1 1
      arch/powerpc/kernel/machine_kexec_64.c
  61. 14 0
      arch/powerpc/kernel/misc_32.S
  62. 1 2
      arch/powerpc/kernel/module.c
  63. 1 2
      arch/powerpc/kernel/module_32.c
  64. 17 2
      arch/powerpc/kernel/module_64.c
  65. 8 2
      arch/powerpc/kernel/nvram_64.c
  66. 3 3
      arch/powerpc/kernel/paca.c
  67. 2 2
      arch/powerpc/kernel/pci_of_scan.c
  68. 10 0
      arch/powerpc/kernel/ppc_ksyms.c
  69. 35 32
      arch/powerpc/kernel/process.c
  70. 0 31
      arch/powerpc/kernel/prom.c
  71. 22 6
      arch/powerpc/kernel/prom_init.c
  72. 105 104
      arch/powerpc/kernel/ptrace.c
  73. 5 8
      arch/powerpc/kernel/ptrace32.c
  74. 3 3
      arch/powerpc/kernel/rtas_pci.c
  75. 0 2
      arch/powerpc/kernel/setup-common.c
  76. 0 9
      arch/powerpc/kernel/setup.h
  77. 0 2
      arch/powerpc/kernel/setup_32.c
  78. 0 2
      arch/powerpc/kernel/setup_64.c
  79. 42 39
      arch/powerpc/kernel/signal_32.c
  80. 17 15
      arch/powerpc/kernel/signal_64.c
  81. 4 0
      arch/powerpc/kernel/swsusp_asm64.S
  82. 26 23
      arch/powerpc/kernel/tm.S
  83. 31 25
      arch/powerpc/kernel/traps.c
  84. 1 2
      arch/powerpc/kernel/vdso.c
  85. 4 0
      arch/powerpc/kernel/vdso32/vdso32.lds.S
  86. 4 0
      arch/powerpc/kernel/vdso64/vdso64.lds.S
  87. 3 3
      arch/powerpc/kernel/vecemu.c
  88. 44 36
      arch/powerpc/kernel/vector.S
  89. 2 2
      arch/powerpc/kernel/vio.c
  90. 1 0
      arch/powerpc/kvm/Kconfig
  91. 15 21
      arch/powerpc/kvm/book3s_pr.c
  92. 9 10
      arch/powerpc/kvm/booke.c
  93. 16 5
      arch/powerpc/lib/Makefile
  94. 31 23
      arch/powerpc/lib/copyuser_power7.S
  95. 32 23
      arch/powerpc/lib/memcpy_power7.S
  96. 85 12
      arch/powerpc/lib/sstep.c
  97. 177 0
      arch/powerpc/lib/xor_vmx.c
  98. 26 20
      arch/powerpc/mm/hash_native_64.c
  99. 18 20
      arch/powerpc/mm/hash_utils_64.c
  100. 5 0
      arch/powerpc/mm/init_32.c

+ 9 - 3
arch/powerpc/Kconfig

@@ -97,7 +97,7 @@ config PPC
 	select VIRT_TO_BUS if !PPC64
 	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
-	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
 	select HAVE_KPROBES
 	select HAVE_ARCH_KGDB
 	select HAVE_KRETPROBES
@@ -140,6 +140,9 @@ config PPC
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK
 
+config GENERIC_CSUM
+	def_bool CPU_LITTLE_ENDIAN
+
 config EARLY_PRINTK
 	bool
 	default y
@@ -405,7 +408,7 @@ config CRASH_DUMP
 
 config FA_DUMP
 	bool "Firmware-assisted dump"
-	depends on PPC64 && PPC_RTAS && CRASH_DUMP
+	depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC
 	help
 	  A robust mechanism to get reliable kernel crash dump with
 	  assistance from firmware. This approach does not use kexec,
@@ -418,7 +421,7 @@ config FA_DUMP
 
 config IRQ_ALL_CPUS
 	bool "Distribute interrupts on all CPUs by default"
-	depends on SMP && !MV64360
+	depends on SMP
 	help
 	  This option gives the kernel permission to distribute IRQs across
 	  multiple CPUs.  Saying N here will route all IRQs to the first
@@ -1010,6 +1013,9 @@ config PHYSICAL_START
 	default "0x00000000"
 endif
 
+config	ARCH_RANDOM
+	def_bool n
+
 source "net/Kconfig"
 
 source "drivers/Kconfig"

+ 32 - 5
arch/powerpc/Makefile

@@ -36,17 +36,26 @@ KBUILD_DEFCONFIG := ppc64_defconfig
 endif
 
 ifeq ($(CONFIG_PPC64),y)
-OLDARCH	:= ppc64
-
 new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
 
 ifeq ($(new_nm),y)
 NM		:= $(NM) --synthetic
 endif
+endif
 
+ifeq ($(CONFIG_PPC64),y)
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+OLDARCH	:= ppc64le
+else
+OLDARCH	:= ppc64
+endif
+else
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+OLDARCH	:= ppcle
 else
 OLDARCH	:= ppc
 endif
+endif
 
 # It seems there are times we use this Makefile without
 # including the config file, but this replicates the old behaviour
@@ -56,11 +65,29 @@ endif
 
 UTS_MACHINE := $(OLDARCH)
 
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+override CC	+= -mlittle-endian -mno-strict-align
+override AS	+= -mlittle-endian
+override LD	+= -EL
+override CROSS32CC += -mlittle-endian
+override CROSS32AS += -mlittle-endian
+LDEMULATION	:= lppc
+GNUTARGET	:= powerpcle
+MULTIPLEWORD	:= -mno-multiple
+else
+override CC	+= -mbig-endian
+override AS	+= -mbig-endian
+override LD	+= -EB
+LDEMULATION	:= ppc
+GNUTARGET	:= powerpc
+MULTIPLEWORD	:= -mmultiple
+endif
+
 ifeq ($(HAS_BIARCH),y)
 override AS	+= -a$(CONFIG_WORD_SIZE)
-override LD	+= -m elf$(CONFIG_WORD_SIZE)ppc
+override LD	+= -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION)
 override CC	+= -m$(CONFIG_WORD_SIZE)
-override AR	:= GNUTARGET=elf$(CONFIG_WORD_SIZE)-powerpc $(AR)
+override AR	:= GNUTARGET=elf$(CONFIG_WORD_SIZE)-$(GNUTARGET) $(AR)
 endif
 
 LDFLAGS_vmlinux-y := -Bstatic
@@ -86,7 +113,7 @@ endif
 CFLAGS-$(CONFIG_PPC64)	:= -mtraceback=no -mcall-aixdesc
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcmodel=medium,-mminimal-toc)
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mno-pointers-to-nested-functions)
-CFLAGS-$(CONFIG_PPC32)	:= -ffixed-r2 -mmultiple
+CFLAGS-$(CONFIG_PPC32)	:= -ffixed-r2 $(MULTIPLEWORD)
 
 ifeq ($(CONFIG_PPC_BOOK3S_64),y)
 CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,-mtune=power4)

+ 2 - 1
arch/powerpc/boot/Makefile

@@ -22,7 +22,8 @@ all: $(obj)/zImage
 BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		 -fno-strict-aliasing -Os -msoft-float -pipe \
 		 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
-		 -isystem $(shell $(CROSS32CC) -print-file-name=include)
+		 -isystem $(shell $(CROSS32CC) -print-file-name=include) \
+		 -mbig-endian
 BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
 
 ifdef CONFIG_DEBUG_INFO

+ 218 - 0
arch/powerpc/boot/dts/b4860emu.dts

@@ -0,0 +1,218 @@
+/*
+ * B4860 emulator Device Tree Source
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * This software is provided by Freescale Semiconductor "as is" and any
+ * express or implied warranties, including, but not limited to, the implied
+ * warranties of merchantability and fitness for a particular purpose are
+ * disclaimed. In no event shall Freescale Semiconductor be liable for any
+ * direct, indirect, incidental, special, exemplary, or consequential damages
+ * (including, but not limited to, procurement of substitute goods or services;
+ * loss of use, data, or profits; or business interruption) however caused and
+ * on any theory of liability, whether in contract, strict liability, or tort
+ * (including negligence or otherwise) arising in any way out of the use of
+ * this software, even if advised of the possibility of such damage.
+ */
+
+/dts-v1/;
+
+/include/ "fsl/e6500_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,B4860";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		dma0 = &dma0;
+		dma1 = &dma1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e6500@0 {
+			device_type = "cpu";
+			reg = <0 1>;
+			next-level-cache = <&L2>;
+		};
+		cpu1: PowerPC,e6500@2 {
+			device_type = "cpu";
+			reg = <2 3>;
+			next-level-cache = <&L2>;
+		};
+		cpu2: PowerPC,e6500@4 {
+			device_type = "cpu";
+			reg = <4 5>;
+			next-level-cache = <&L2>;
+		};
+		cpu3: PowerPC,e6500@6 {
+			device_type = "cpu";
+			reg = <6 7>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
+
+/ {
+	model = "fsl,B4860QDS";
+	compatible = "fsl,B4860EMU", "fsl,B4860QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+	};
+};
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <25 2 0 0>;
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 2>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <32>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 8>;
+	};
+
+	ddr2: memory-controller@9000 {
+		compatible = "fsl,qoriq-memory-controller-v4.5","fsl,qoriq-memory-controller";
+		reg = <0x9000 0x1000>;
+		interrupts = <16 2 1 9>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,b4-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000
+		       0x11000 0x1000>;
+		interrupts = <16 2 1 4>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,b4-corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 0>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x4000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 1>;
+		pamu0: pamu@0 {
+			reg = <0 0x1000>;
+			fsl,primary-cache-geometry = <8 1>;
+			fsl,secondary-cache-geometry = <32 2>;
+		};
+	};
+
+/include/ "fsl/qoriq-mpic.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,b4-device-config";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		fsl,liodn-bits = <12>;
+	};
+
+	clockgen: global-utilities@e1000 {
+		compatible = "fsl,b4-clockgen", "fsl,qoriq-clockgen-2.0";
+		reg = <0xe1000 0x1000>;
+	};
+
+/include/ "fsl/qoriq-dma-0.dtsi"
+	dma@100300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
+	};
+
+/include/ "fsl/qoriq-dma-1.dtsi"
+	dma@101300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
+	};
+
+/include/ "fsl/qoriq-i2c-0.dtsi"
+/include/ "fsl/qoriq-i2c-1.dtsi"
+/include/ "fsl/qoriq-duart-0.dtsi"
+/include/ "fsl/qoriq-duart-1.dtsi"
+
+	L2: l2-cache-controller@c20000 {
+		compatible = "fsl,b4-l2-cache-controller";
+		reg = <0xc20000 0x1000>;
+		next-level-cache = <&cpc>;
+	};
+};

+ 32 - 19
arch/powerpc/boot/dts/b4qds.dtsi

@@ -120,25 +120,38 @@
 		};
 
 		i2c@118000 {
-			eeprom@50 {
-				compatible = "at24,24c64";
-				reg = <0x50>;
-			};
-			eeprom@51 {
-				compatible = "at24,24c256";
-				reg = <0x51>;
-			};
-			eeprom@53 {
-				compatible = "at24,24c256";
-				reg = <0x53>;
-			};
-			eeprom@57 {
-				compatible = "at24,24c256";
-				reg = <0x57>;
-			};
-			rtc@68 {
-				compatible = "dallas,ds3232";
-				reg = <0x68>;
+			mux@77 {
+				compatible = "nxp,pca9547";
+				reg = <0x77>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				i2c@0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0>;
+
+					eeprom@50 {
+						compatible = "at24,24c64";
+						reg = <0x50>;
+					};
+					eeprom@51 {
+						compatible = "at24,24c256";
+						reg = <0x51>;
+					};
+					eeprom@53 {
+						compatible = "at24,24c256";
+						reg = <0x53>;
+					};
+					eeprom@57 {
+						compatible = "at24,24c256";
+						reg = <0x57>;
+					};
+					rtc@68 {
+						compatible = "dallas,ds3232";
+						reg = <0x68>;
+					};
+				};
 			};
 		};
 

+ 1 - 0
arch/powerpc/boot/dts/c293pcie.dts

@@ -45,6 +45,7 @@
 	ifc: ifc@fffe1e000 {
 		reg = <0xf 0xffe1e000 0 0x2000>;
 		ranges = <0x0 0x0 0xf 0xec000000 0x04000000
+			  0x1 0x0 0xf 0xff800000 0x00010000
 			  0x2 0x0 0xf 0xffdf0000 0x00010000>;
 
 	};

+ 2 - 0
arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi

@@ -34,6 +34,8 @@
 
 /dts-v1/;
 
+/include/ "e6500_power_isa.dtsi"
+
 / {
 	compatible = "fsl,B4420";
 	#address-cells = <2>;

+ 1 - 1
arch/powerpc/boot/dts/fsl/b4860si-post.dtsi

@@ -41,7 +41,7 @@
 
 &rio {
 	compatible = "fsl,srio";
-	interrupts = <16 2 1 11>;
+	interrupts = <16 2 1 20>;
 	#address-cells = <2>;
 	#size-cells = <2>;
 	fsl,iommu-parent = <&pamu0>;

+ 2 - 0
arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi

@@ -34,6 +34,8 @@
 
 /dts-v1/;
 
+/include/ "e6500_power_isa.dtsi"
+
 / {
 	compatible = "fsl,B4860";
 	#address-cells = <2>;

+ 1 - 1
arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi

@@ -130,7 +130,7 @@ usb@22000 {
 
 /include/ "pq3-esdhc-0.dtsi"
 	sdhc@2e000 {
-		fsl,sdhci-auto-cmd12;
+		sdhci,auto-cmd12;
 		interrupts = <41 0x2 0 0>;
 	};
 

+ 3 - 0
arch/powerpc/boot/dts/fsl/bsc9131si-pre.dtsi

@@ -33,6 +33,9 @@
  */
 
 /dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
 / {
 	compatible = "fsl,BSC9131";
 	#address-cells = <2>;

+ 268 - 0
arch/powerpc/boot/dts/t4240emu.dts

@@ -0,0 +1,268 @@
+/*
+ * T4240 emulator Device Tree Source
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "fsl/e6500_power_isa.dtsi"
+/ {
+	compatible = "fsl,T4240";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		dma0 = &dma0;
+		dma1 = &dma1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e6500@0 {
+			device_type = "cpu";
+			reg = <0 1>;
+			next-level-cache = <&L2_1>;
+		};
+		cpu1: PowerPC,e6500@2 {
+			device_type = "cpu";
+			reg = <2 3>;
+			next-level-cache = <&L2_1>;
+		};
+		cpu2: PowerPC,e6500@4 {
+			device_type = "cpu";
+			reg = <4 5>;
+			next-level-cache = <&L2_1>;
+		};
+		cpu3: PowerPC,e6500@6 {
+			device_type = "cpu";
+			reg = <6 7>;
+			next-level-cache = <&L2_1>;
+		};
+
+		cpu4: PowerPC,e6500@8 {
+			device_type = "cpu";
+			reg = <8 9>;
+			next-level-cache = <&L2_2>;
+		};
+		cpu5: PowerPC,e6500@10 {
+			device_type = "cpu";
+			reg = <10 11>;
+			next-level-cache = <&L2_2>;
+		};
+		cpu6: PowerPC,e6500@12 {
+			device_type = "cpu";
+			reg = <12 13>;
+			next-level-cache = <&L2_2>;
+		};
+		cpu7: PowerPC,e6500@14 {
+			device_type = "cpu";
+			reg = <14 15>;
+			next-level-cache = <&L2_2>;
+		};
+
+		cpu8: PowerPC,e6500@16 {
+			device_type = "cpu";
+			reg = <16 17>;
+			next-level-cache = <&L2_3>;
+		};
+		cpu9: PowerPC,e6500@18 {
+			device_type = "cpu";
+			reg = <18 19>;
+			next-level-cache = <&L2_3>;
+		};
+		cpu10: PowerPC,e6500@20 {
+			device_type = "cpu";
+			reg = <20 21>;
+			next-level-cache = <&L2_3>;
+		};
+		cpu11: PowerPC,e6500@22 {
+			device_type = "cpu";
+			reg = <22 23>;
+			next-level-cache = <&L2_3>;
+		};
+	};
+};
+
+/ {
+	model = "fsl,T4240QDS";
+	compatible = "fsl,T4240EMU", "fsl,T4240QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+
+	};
+};
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <25 2 0 0>;
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <32>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v4.7",
+				"fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	ddr2: memory-controller@9000 {
+		compatible = "fsl,qoriq-memory-controller-v4.7",
+				"fsl,qoriq-memory-controller";
+		reg = <0x9000 0x1000>;
+		interrupts = <16 2 1 22>;
+	};
+
+	ddr3: memory-controller@a000 {
+		compatible = "fsl,qoriq-memory-controller-v4.7",
+				"fsl,qoriq-memory-controller";
+		reg = <0xa000 0x1000>;
+		interrupts = <16 2 1 21>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,t4240-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000
+		       0x11000 0x1000
+		       0x12000 0x1000>;
+		interrupts = <16 2 1 27
+			      16 2 1 26
+			      16 2 1 25>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x6000>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 30>;
+	};
+
+/include/ "fsl/qoriq-mpic.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,t4240-device-config", "fsl,qoriq-device-config-2.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		fsl,liodn-bits = <12>;
+	};
+
+	clockgen: global-utilities@e1000 {
+		compatible = "fsl,t4240-clockgen", "fsl,qoriq-clockgen-2.0";
+		reg = <0xe1000 0x1000>;
+	};
+
+/include/ "fsl/qoriq-dma-0.dtsi"
+/include/ "fsl/qoriq-dma-1.dtsi"
+
+/include/ "fsl/qoriq-i2c-0.dtsi"
+/include/ "fsl/qoriq-i2c-1.dtsi"
+/include/ "fsl/qoriq-duart-0.dtsi"
+/include/ "fsl/qoriq-duart-1.dtsi"
+
+	L2_1: l2-cache-controller@c20000 {
+		compatible = "fsl,t4240-l2-cache-controller";
+		reg = <0xc20000 0x40000>;
+		next-level-cache = <&cpc>;
+	};
+	L2_2: l2-cache-controller@c60000 {
+		compatible = "fsl,t4240-l2-cache-controller";
+		reg = <0xc60000 0x40000>;
+		next-level-cache = <&cpc>;
+	};
+	L2_3: l2-cache-controller@ca0000 {
+		compatible = "fsl,t4240-l2-cache-controller";
+		reg = <0xca0000 0x40000>;
+		next-level-cache = <&cpc>;
+	};
+};

+ 45 - 28
arch/powerpc/boot/dts/t4240qds.dts

@@ -118,36 +118,53 @@
 		};
 
 		i2c@118000 {
-			eeprom@51 {
-				compatible = "at24,24c256";
-				reg = <0x51>;
-			};
-			eeprom@52 {
-				compatible = "at24,24c256";
-				reg = <0x52>;
-			};
-			eeprom@53 {
-				compatible = "at24,24c256";
-				reg = <0x53>;
-			};
-			eeprom@54 {
-				compatible = "at24,24c256";
-				reg = <0x54>;
-			};
-			eeprom@55 {
-				compatible = "at24,24c256";
-				reg = <0x55>;
-			};
-			eeprom@56 {
-				compatible = "at24,24c256";
-				reg = <0x56>;
-			};
-			rtc@68 {
-				compatible = "dallas,ds3232";
-				reg = <0x68>;
-				interrupts = <0x1 0x1 0 0>;
+			mux@77 {
+				compatible = "nxp,pca9547";
+				reg = <0x77>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				i2c@0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0>;
+
+					eeprom@51 {
+						compatible = "at24,24c256";
+						reg = <0x51>;
+					};
+					eeprom@52 {
+						compatible = "at24,24c256";
+						reg = <0x52>;
+					};
+					eeprom@53 {
+						compatible = "at24,24c256";
+						reg = <0x53>;
+					};
+					eeprom@54 {
+						compatible = "at24,24c256";
+						reg = <0x54>;
+					};
+					eeprom@55 {
+						compatible = "at24,24c256";
+						reg = <0x55>;
+					};
+					eeprom@56 {
+						compatible = "at24,24c256";
+						reg = <0x56>;
+					};
+					rtc@68 {
+						compatible = "dallas,ds3232";
+						reg = <0x68>;
+						interrupts = <0x1 0x1 0 0>;
+					};
+				};
 			};
 		};
+
+		sdhc@114000 {
+			voltage-ranges = <1800 1800 3300 3300>;
+		};
 	};
 
 	pci0: pcie@ffe240000 {

+ 8 - 0
arch/powerpc/boot/wrapper

@@ -147,21 +147,29 @@ link_address='0x400000'
 make_space=y
 
 case "$platform" in
+of)
+    platformo="$object/of.o $object/epapr.o"
+    make_space=n
+    ;;
 pseries)
     platformo="$object/of.o $object/epapr.o"
     link_address='0x4000000'
+    make_space=n
     ;;
 maple)
     platformo="$object/of.o $object/epapr.o"
     link_address='0x400000'
+    make_space=n
     ;;
 pmac|chrp)
     platformo="$object/of.o $object/epapr.o"
+    make_space=n
     ;;
 coff)
     platformo="$object/crt0.o $object/of.o $object/epapr.o"
     lds=$object/zImage.coff.lds
     link_address='0x500000'
+    make_space=n
     pie=
     ;;
 miboot|uboot*)

+ 2 - 5
arch/powerpc/configs/corenet32_smp_defconfig

@@ -23,11 +23,7 @@ CONFIG_MODVERSIONS=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_MAC_PARTITION=y
-CONFIG_P2041_RDB=y
-CONFIG_P3041_DS=y
-CONFIG_P4080_DS=y
-CONFIG_P5020_DS=y
-CONFIG_P5040_DS=y
+CONFIG_CORENET_GENERIC=y
 CONFIG_HIGHMEM=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_BINFMT_MISC=m
@@ -104,6 +100,7 @@ CONFIG_FSL_PQ_MDIO=y
 CONFIG_E1000=y
 CONFIG_E1000E=y
 CONFIG_VITESSE_PHY=y
+CONFIG_AT803X_PHY=y
 CONFIG_FIXED_PHY=y
 # CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set

+ 1 - 4
arch/powerpc/configs/corenet64_smp_defconfig

@@ -21,10 +21,7 @@ CONFIG_MODVERSIONS=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_MAC_PARTITION=y
-CONFIG_B4_QDS=y
-CONFIG_P5020_DS=y
-CONFIG_P5040_DS=y
-CONFIG_T4240_QDS=y
+CONFIG_CORENET_GENERIC=y
 # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
 CONFIG_BINFMT_MISC=m
 CONFIG_MATH_EMULATION=y

+ 1 - 0
arch/powerpc/configs/mpc85xx_defconfig

@@ -138,6 +138,7 @@ CONFIG_MARVELL_PHY=y
 CONFIG_DAVICOM_PHY=y
 CONFIG_CICADA_PHY=y
 CONFIG_VITESSE_PHY=y
+CONFIG_AT803X_PHY=y
 CONFIG_FIXED_PHY=y
 CONFIG_INPUT_FF_MEMLESS=m
 # CONFIG_INPUT_MOUSEDEV is not set

+ 1 - 0
arch/powerpc/configs/mpc85xx_smp_defconfig

@@ -138,6 +138,7 @@ CONFIG_MARVELL_PHY=y
 CONFIG_DAVICOM_PHY=y
 CONFIG_CICADA_PHY=y
 CONFIG_VITESSE_PHY=y
+CONFIG_AT803X_PHY=y
 CONFIG_FIXED_PHY=y
 CONFIG_INPUT_FF_MEMLESS=m
 # CONFIG_INPUT_MOUSEDEV is not set

+ 15 - 12
arch/powerpc/configs/ppc64_defconfig

@@ -2,7 +2,6 @@ CONFIG_PPC64=y
 CONFIG_ALTIVEC=y
 CONFIG_VSX=y
 CONFIG_SMP=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_IRQ_DOMAIN_DEBUG=y
@@ -25,7 +24,6 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_PARTITION_ADVANCED=y
-CONFIG_EFI_PARTITION=y
 CONFIG_PPC_SPLPAR=y
 CONFIG_SCANLOG=m
 CONFIG_PPC_SMLPAR=y
@@ -50,12 +48,10 @@ CONFIG_CPU_FREQ_PMAC64=y
 CONFIG_HZ_100=y
 CONFIG_BINFMT_MISC=m
 CONFIG_PPC_TRANSACTIONAL_MEM=y
-CONFIG_HOTPLUG_CPU=y
 CONFIG_KEXEC=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_SCHED_SMT=y
-CONFIG_PPC_DENORMALISATION=y
 CONFIG_PCCARD=y
 CONFIG_ELECTRA_CF=y
 CONFIG_HOTPLUG_PCI=y
@@ -89,7 +85,6 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_CT_NETLINK=m
-CONFIG_NETFILTER_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
 CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
 CONFIG_NETFILTER_XT_TARGET_DSCP=m
@@ -131,7 +126,6 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m
 CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
 CONFIG_NETFILTER_XT_MATCH_U32=m
 CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -157,6 +151,7 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_VIRTIO_BLK=m
 CONFIG_IDE=y
 CONFIG_BLK_DEV_IDECD=y
 CONFIG_BLK_DEV_GENERIC=y
@@ -185,6 +180,10 @@ CONFIG_SCSI_IPR=y
 CONFIG_SCSI_QLA_FC=m
 CONFIG_SCSI_QLA_ISCSI=m
 CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_ALUA=m
 CONFIG_ATA=y
 CONFIG_SATA_SIL24=y
 CONFIG_SATA_SVW=y
@@ -203,6 +202,9 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_UEVENT=y
 CONFIG_ADB_PMU=y
 CONFIG_PMAC_SMU=y
 CONFIG_THERM_PM72=y
@@ -216,6 +218,8 @@ CONFIG_DUMMY=m
 CONFIG_NETCONSOLE=y
 CONFIG_NETPOLL_TRAP=y
 CONFIG_TUN=m
+CONFIG_VIRTIO_NET=m
+CONFIG_VHOST_NET=m
 CONFIG_VORTEX=y
 CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
@@ -262,6 +266,7 @@ CONFIG_HVC_CONSOLE=y
 CONFIG_HVC_RTAS=y
 CONFIG_HVC_BEAT=y
 CONFIG_HVCS=m
+CONFIG_VIRTIO_CONSOLE=m
 CONFIG_IBM_BSR=m
 CONFIG_RAW_DRIVER=y
 CONFIG_I2C_CHARDEV=y
@@ -301,7 +306,6 @@ CONFIG_HID_GYRATION=y
 CONFIG_HID_PANTHERLORD=y
 CONFIG_HID_PETALYNX=y
 CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
 CONFIG_HID_SUNPLUS=y
 CONFIG_USB_HIDDEV=y
 CONFIG_USB=y
@@ -328,6 +332,8 @@ CONFIG_EDAC_MM_EDAC=y
 CONFIG_EDAC_PASEMI=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_DS1307=y
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
@@ -386,21 +392,19 @@ CONFIG_NLS_UTF8=y
 CONFIG_CRC_T10DIF=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_LOCKUP_DETECTOR=y
 CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_LATENCYTOP=y
 CONFIG_SCHED_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_CODE_PATCHING_SELFTEST=y
 CONFIG_FTR_FIXUP_SELFTEST=y
 CONFIG_MSI_BITMAP_SELFTEST=y
 CONFIG_XMON=y
 CONFIG_BOOTX_TEXT=y
 CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_PPC_EARLY_DEBUG_BOOTX=y
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_HMAC=y
@@ -422,4 +426,3 @@ CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
 CONFIG_VIRTUALIZATION=y
 CONFIG_KVM_BOOK3S_64=m
 CONFIG_KVM_BOOK3S_64_HV=y
-CONFIG_VHOST_NET=m

+ 3 - 9
arch/powerpc/configs/ppc64e_defconfig

@@ -1,7 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_PPC_BOOK3E_64=y
 CONFIG_SMP=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_NO_HZ=y
@@ -23,7 +22,7 @@ CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_MAC_PARTITION=y
 CONFIG_EFI_PARTITION=y
-CONFIG_P5020_DS=y
+CONFIG_CORENET_GENERIC=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
@@ -61,7 +60,6 @@ CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_CT_NETLINK=m
-CONFIG_NETFILTER_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
 CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
 CONFIG_NETFILTER_XT_TARGET_DSCP=m
@@ -103,7 +101,6 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m
 CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
 CONFIG_NETFILTER_XT_MATCH_U32=m
 CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -193,7 +190,6 @@ CONFIG_PPP_SYNC_TTY=m
 CONFIG_INPUT_EVDEV=m
 CONFIG_INPUT_MISC=y
 # CONFIG_SERIO_SERPORT is not set
-CONFIG_VT_HW_CONSOLE_BINDING=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
@@ -230,7 +226,6 @@ CONFIG_HID_NTRIG=y
 CONFIG_HID_PANTHERLORD=y
 CONFIG_HID_PETALYNX=y
 CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
 CONFIG_HID_SUNPLUS=y
 CONFIG_HID_GREENASIA=y
 CONFIG_HID_SMARTJOYPLUS=y
@@ -302,19 +297,18 @@ CONFIG_NLS_UTF8=y
 CONFIG_CRC_T10DIF=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_LATENCYTOP=y
 CONFIG_IRQSOFF_TRACER=y
 CONFIG_SCHED_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_CODE_PATCHING_SELFTEST=y
 CONFIG_FTR_FIXUP_SELFTEST=y
 CONFIG_MSI_BITMAP_SELFTEST=y
 CONFIG_XMON=y
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m

+ 1 - 1
arch/powerpc/configs/ppc6xx_defconfig

@@ -71,7 +71,7 @@ CONFIG_QUICC_ENGINE=y
 CONFIG_QE_GPIO=y
 CONFIG_PPC_BESTCOMM=y
 CONFIG_GPIO_MPC8XXX=y
-CONFIG_MCU_MPC8349EMITX=m
+CONFIG_MCU_MPC8349EMITX=y
 CONFIG_HIGHMEM=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y

+ 15 - 10
arch/powerpc/configs/pseries_defconfig

@@ -3,7 +3,6 @@ CONFIG_ALTIVEC=y
 CONFIG_VSX=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2048
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_AUDIT=y
@@ -33,7 +32,6 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_PARTITION_ADVANCED=y
-CONFIG_EFI_PARTITION=y
 CONFIG_PPC_SPLPAR=y
 CONFIG_SCANLOG=m
 CONFIG_PPC_SMLPAR=y
@@ -44,7 +42,6 @@ CONFIG_IBMEBUS=y
 CONFIG_HZ_100=y
 CONFIG_BINFMT_MISC=m
 CONFIG_PPC_TRANSACTIONAL_MEM=y
-CONFIG_HOTPLUG_CPU=y
 CONFIG_KEXEC=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_MEMORY_HOTPLUG=y
@@ -52,7 +49,6 @@ CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_PPC_64K_PAGES=y
 CONFIG_PPC_SUBPAGE_PROT=y
 CONFIG_SCHED_SMT=y
-CONFIG_PPC_DENORMALISATION=y
 CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_RPA=m
 CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
@@ -113,7 +109,6 @@ CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
 CONFIG_NETFILTER_XT_MATCH_TIME=m
 CONFIG_NETFILTER_XT_MATCH_U32=m
 CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -132,6 +127,7 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_VIRTIO_BLK=m
 CONFIG_IDE=y
 CONFIG_BLK_DEV_IDECD=y
 CONFIG_BLK_DEV_GENERIC=y
@@ -157,6 +153,10 @@ CONFIG_SCSI_IPR=y
 CONFIG_SCSI_QLA_FC=m
 CONFIG_SCSI_QLA_ISCSI=m
 CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_ALUA=m
 CONFIG_ATA=y
 # CONFIG_ATA_SFF is not set
 CONFIG_MD=y
@@ -174,11 +174,16 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_UEVENT=y
 CONFIG_BONDING=m
 CONFIG_DUMMY=m
 CONFIG_NETCONSOLE=y
 CONFIG_NETPOLL_TRAP=y
 CONFIG_TUN=m
+CONFIG_VIRTIO_NET=m
+CONFIG_VHOST_NET=m
 CONFIG_VORTEX=y
 CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
@@ -216,6 +221,7 @@ CONFIG_SERIAL_JSM=m
 CONFIG_HVC_CONSOLE=y
 CONFIG_HVC_RTAS=y
 CONFIG_HVCS=m
+CONFIG_VIRTIO_CONSOLE=m
 CONFIG_IBM_BSR=m
 CONFIG_GEN_RTC=y
 CONFIG_RAW_DRIVER=y
@@ -237,7 +243,6 @@ CONFIG_HID_GYRATION=y
 CONFIG_HID_PANTHERLORD=y
 CONFIG_HID_PETALYNX=y
 CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
 CONFIG_HID_SUNPLUS=y
 CONFIG_USB_HIDDEV=y
 CONFIG_USB=y
@@ -258,6 +263,8 @@ CONFIG_INFINIBAND_IPOIB=m
 CONFIG_INFINIBAND_IPOIB_CM=y
 CONFIG_INFINIBAND_SRP=m
 CONFIG_INFINIBAND_ISER=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
@@ -314,18 +321,17 @@ CONFIG_NLS_UTF8=y
 CONFIG_CRC_T10DIF=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
-CONFIG_LOCKUP_DETECTOR=y
 CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_LOCKUP_DETECTOR=y
 CONFIG_LATENCYTOP=y
 CONFIG_SCHED_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_CODE_PATCHING_SELFTEST=y
 CONFIG_FTR_FIXUP_SELFTEST=y
 CONFIG_MSI_BITMAP_SELFTEST=y
 CONFIG_XMON=y
 CONFIG_XMON_DEFAULT=y
-CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_HMAC=y
@@ -347,4 +353,3 @@ CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
 CONFIG_VIRTUALIZATION=y
 CONFIG_KVM_BOOK3S_64=m
 CONFIG_KVM_BOOK3S_64_HV=y
-CONFIG_VHOST_NET=m

+ 32 - 0
arch/powerpc/include/asm/archrandom.h

@@ -0,0 +1,32 @@
+#ifndef _ASM_POWERPC_ARCHRANDOM_H
+#define _ASM_POWERPC_ARCHRANDOM_H
+
+#ifdef CONFIG_ARCH_RANDOM
+
+#include <asm/machdep.h>
+
+static inline int arch_get_random_long(unsigned long *v)
+{
+	if (ppc_md.get_random_long)
+		return ppc_md.get_random_long(v);
+
+	return 0;
+}
+
+static inline int arch_get_random_int(unsigned int *v)
+{
+	unsigned long val;
+	int rc;
+
+	rc = arch_get_random_long(&val);
+	if (rc)
+		*v = val;
+
+	return rc;
+}
+
+int powernv_get_random_long(unsigned long *v);
+
+#endif /* CONFIG_ARCH_RANDOM */
+
+#endif /* _ASM_POWERPC_ARCHRANDOM_H */

+ 5 - 0
arch/powerpc/include/asm/checksum.h

@@ -14,6 +14,9 @@
  * which always checksum on 4 octet boundaries.  ihl is the number
  * of 32-bit words and is always >= 5.
  */
+#ifdef CONFIG_GENERIC_CSUM
+#include <asm-generic/checksum.h>
+#else
 extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
 
 /*
@@ -123,5 +126,7 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
 	return sum;
 #endif
 }
+
+#endif
 #endif /* __KERNEL__ */
 #endif

+ 1 - 0
arch/powerpc/include/asm/emulated_ops.h

@@ -43,6 +43,7 @@ extern struct ppc_emulated {
 	struct ppc_emulated_entry popcntb;
 	struct ppc_emulated_entry spe;
 	struct ppc_emulated_entry string;
+	struct ppc_emulated_entry sync;
 	struct ppc_emulated_entry unaligned;
 #ifdef CONFIG_MATH_EMULATION
 	struct ppc_emulated_entry math;

+ 8 - 8
arch/powerpc/include/asm/hvsi.h

@@ -25,7 +25,7 @@
 struct hvsi_header {
 	uint8_t  type;
 	uint8_t  len;
-	uint16_t seqno;
+	__be16 seqno;
 } __attribute__((packed));
 
 struct hvsi_data {
@@ -35,24 +35,24 @@ struct hvsi_data {
 
 struct hvsi_control {
 	struct hvsi_header hdr;
-	uint16_t verb;
+	__be16 verb;
 	/* optional depending on verb: */
-	uint32_t word;
-	uint32_t mask;
+	__be32 word;
+	__be32 mask;
 } __attribute__((packed));
 
 struct hvsi_query {
 	struct hvsi_header hdr;
-	uint16_t verb;
+	__be16 verb;
 } __attribute__((packed));
 
 struct hvsi_query_response {
 	struct hvsi_header hdr;
-	uint16_t verb;
-	uint16_t query_seqno;
+	__be16 verb;
+	__be16 query_seqno;
 	union {
 		uint8_t  version;
-		uint32_t mctrl_word;
+		__be32 mctrl_word;
 	} u;
 } __attribute__((packed));
 

+ 50 - 19
arch/powerpc/include/asm/io.h

@@ -21,7 +21,7 @@ extern struct pci_dev *isa_bridge_pcidev;
 /*
  * has legacy ISA devices ?
  */
-#define arch_has_dev_port()	(isa_bridge_pcidev != NULL)
+#define arch_has_dev_port()	(isa_bridge_pcidev != NULL || isa_io_special)
 #endif
 
 #include <linux/device.h>
@@ -113,7 +113,7 @@ extern bool isa_io_special;
 
 /* gcc 4.0 and older doesn't have 'Z' constraint */
 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-#define DEF_MMIO_IN_LE(name, size, insn)				\
+#define DEF_MMIO_IN_X(name, size, insn)				\
 static inline u##size name(const volatile u##size __iomem *addr)	\
 {									\
 	u##size ret;							\
@@ -122,7 +122,7 @@ static inline u##size name(const volatile u##size __iomem *addr)	\
 	return ret;							\
 }
 
-#define DEF_MMIO_OUT_LE(name, size, insn) 				\
+#define DEF_MMIO_OUT_X(name, size, insn)				\
 static inline void name(volatile u##size __iomem *addr, u##size val)	\
 {									\
 	__asm__ __volatile__("sync;"#insn" %1,0,%2"			\
@@ -130,7 +130,7 @@ static inline void name(volatile u##size __iomem *addr, u##size val)	\
 	IO_SET_SYNC_FLAG();						\
 }
 #else /* newer gcc */
-#define DEF_MMIO_IN_LE(name, size, insn)				\
+#define DEF_MMIO_IN_X(name, size, insn)				\
 static inline u##size name(const volatile u##size __iomem *addr)	\
 {									\
 	u##size ret;							\
@@ -139,7 +139,7 @@ static inline u##size name(const volatile u##size __iomem *addr)	\
 	return ret;							\
 }
 
-#define DEF_MMIO_OUT_LE(name, size, insn) 				\
+#define DEF_MMIO_OUT_X(name, size, insn)				\
 static inline void name(volatile u##size __iomem *addr, u##size val)	\
 {									\
 	__asm__ __volatile__("sync;"#insn" %1,%y0"			\
@@ -148,7 +148,7 @@ static inline void name(volatile u##size __iomem *addr, u##size val)	\
 }
 #endif
 
-#define DEF_MMIO_IN_BE(name, size, insn)				\
+#define DEF_MMIO_IN_D(name, size, insn)				\
 static inline u##size name(const volatile u##size __iomem *addr)	\
 {									\
 	u##size ret;							\
@@ -157,7 +157,7 @@ static inline u##size name(const volatile u##size __iomem *addr)	\
 	return ret;							\
 }
 
-#define DEF_MMIO_OUT_BE(name, size, insn)				\
+#define DEF_MMIO_OUT_D(name, size, insn)				\
 static inline void name(volatile u##size __iomem *addr, u##size val)	\
 {									\
 	__asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0"			\
@@ -165,22 +165,37 @@ static inline void name(volatile u##size __iomem *addr, u##size val)	\
 	IO_SET_SYNC_FLAG();						\
 }
 
+DEF_MMIO_IN_D(in_8,     8, lbz);
+DEF_MMIO_OUT_D(out_8,   8, stb);
 
-DEF_MMIO_IN_BE(in_8,     8, lbz);
-DEF_MMIO_IN_BE(in_be16, 16, lhz);
-DEF_MMIO_IN_BE(in_be32, 32, lwz);
-DEF_MMIO_IN_LE(in_le16, 16, lhbrx);
-DEF_MMIO_IN_LE(in_le32, 32, lwbrx);
+#ifdef __BIG_ENDIAN__
+DEF_MMIO_IN_D(in_be16, 16, lhz);
+DEF_MMIO_IN_D(in_be32, 32, lwz);
+DEF_MMIO_IN_X(in_le16, 16, lhbrx);
+DEF_MMIO_IN_X(in_le32, 32, lwbrx);
 
-DEF_MMIO_OUT_BE(out_8,     8, stb);
-DEF_MMIO_OUT_BE(out_be16, 16, sth);
-DEF_MMIO_OUT_BE(out_be32, 32, stw);
-DEF_MMIO_OUT_LE(out_le16, 16, sthbrx);
-DEF_MMIO_OUT_LE(out_le32, 32, stwbrx);
+DEF_MMIO_OUT_D(out_be16, 16, sth);
+DEF_MMIO_OUT_D(out_be32, 32, stw);
+DEF_MMIO_OUT_X(out_le16, 16, sthbrx);
+DEF_MMIO_OUT_X(out_le32, 32, stwbrx);
+#else
+DEF_MMIO_IN_X(in_be16, 16, lhbrx);
+DEF_MMIO_IN_X(in_be32, 32, lwbrx);
+DEF_MMIO_IN_D(in_le16, 16, lhz);
+DEF_MMIO_IN_D(in_le32, 32, lwz);
+
+DEF_MMIO_OUT_X(out_be16, 16, sthbrx);
+DEF_MMIO_OUT_X(out_be32, 32, stwbrx);
+DEF_MMIO_OUT_D(out_le16, 16, sth);
+DEF_MMIO_OUT_D(out_le32, 32, stw);
+
+#endif /* __BIG_ENDIAN */
 
 #ifdef __powerpc64__
-DEF_MMIO_OUT_BE(out_be64, 64, std);
-DEF_MMIO_IN_BE(in_be64, 64, ld);
+
+#ifdef __BIG_ENDIAN__
+DEF_MMIO_OUT_D(out_be64, 64, std);
+DEF_MMIO_IN_D(in_be64, 64, ld);
 
 /* There is no asm instructions for 64 bits reverse loads and stores */
 static inline u64 in_le64(const volatile u64 __iomem *addr)
@@ -192,6 +207,22 @@ static inline void out_le64(volatile u64 __iomem *addr, u64 val)
 {
 	out_be64(addr, swab64(val));
 }
+#else
+DEF_MMIO_OUT_D(out_le64, 64, std);
+DEF_MMIO_IN_D(in_le64, 64, ld);
+
+/* There is no asm instructions for 64 bits reverse loads and stores */
+static inline u64 in_be64(const volatile u64 __iomem *addr)
+{
+	return swab64(in_le64(addr));
+}
+
+static inline void out_be64(volatile u64 __iomem *addr, u64 val)
+{
+	out_le64(addr, swab64(val));
+}
+
+#endif
 #endif /* __powerpc64__ */
 
 /*

+ 6 - 6
arch/powerpc/include/asm/lppaca.h

@@ -84,8 +84,8 @@ struct lppaca {
 	 * the processor is yielded (either because of an OS yield or a
 	 * hypervisor preempt).  An even value implies that the processor is
 	 * currently executing.
-	 * NOTE: This value will ALWAYS be zero for dedicated processors and
-	 * will NEVER be zero for shared processors (ie, initialized to a 1).
+	 * NOTE: Even dedicated processor partitions can yield so this
+	 * field cannot be used to determine if we are shared or dedicated.
 	 */
 	volatile __be32 yield_count;
 	volatile __be32 dispersion_count; /* dispatch changed physical cpu */
@@ -106,15 +106,15 @@ extern struct lppaca lppaca[];
 #define lppaca_of(cpu)	(*paca[cpu].lppaca_ptr)
 
 /*
- * Old kernels used a reserved bit in the VPA to determine if it was running
- * in shared processor mode. New kernels look for a non zero yield count
- * but KVM still needs to set the bit to keep the old stuff happy.
+ * We are using a non architected field to determine if a partition is
+ * shared or dedicated. This currently works on both KVM and PHYP, but
+ * we will have to transition to something better.
  */
 #define LPPACA_OLD_SHARED_PROC		2
 
 static inline bool lppaca_shared_proc(struct lppaca *l)
 {
-	return l->yield_count != 0;
+	return !!(l->__old_status & LPPACA_OLD_SHARED_PROC);
 }
 
 /*

+ 16 - 0
arch/powerpc/include/asm/machdep.h

@@ -78,6 +78,18 @@ struct machdep_calls {
 				    long index);
 	void		(*tce_flush)(struct iommu_table *tbl);
 
+	/* _rm versions are for real mode use only */
+	int		(*tce_build_rm)(struct iommu_table *tbl,
+				     long index,
+				     long npages,
+				     unsigned long uaddr,
+				     enum dma_data_direction direction,
+				     struct dma_attrs *attrs);
+	void		(*tce_free_rm)(struct iommu_table *tbl,
+				    long index,
+				    long npages);
+	void		(*tce_flush_rm)(struct iommu_table *tbl);
+
 	void __iomem *	(*ioremap)(phys_addr_t addr, unsigned long size,
 				   unsigned long flags, void *caller);
 	void		(*iounmap)(volatile void __iomem *token);
@@ -263,6 +275,10 @@ struct machdep_calls {
 	ssize_t (*cpu_probe)(const char *, size_t);
 	ssize_t (*cpu_release)(const char *, size_t);
 #endif
+
+#ifdef CONFIG_ARCH_RANDOM
+	int (*get_random_long)(unsigned long *v);
+#endif
 };
 
 extern void e500_idle(void);

+ 2 - 2
arch/powerpc/include/asm/mmu-hash64.h

@@ -135,8 +135,8 @@ extern char initial_stab[];
 #ifndef __ASSEMBLY__
 
 struct hash_pte {
-	unsigned long v;
-	unsigned long r;
+	__be64 v;
+	__be64 r;
 };
 
 extern struct hash_pte *htab_address;

+ 92 - 17
arch/powerpc/include/asm/opal.h

@@ -129,6 +129,9 @@ extern int opal_enter_rtas(struct rtas_args *args,
 #define OPAL_LPC_READ				67
 #define OPAL_LPC_WRITE				68
 #define OPAL_RETURN_CPU				69
+#define OPAL_FLASH_VALIDATE			76
+#define OPAL_FLASH_MANAGE			77
+#define OPAL_FLASH_UPDATE			78
 
 #ifndef __ASSEMBLY__
 
@@ -460,10 +463,12 @@ enum {
 
 enum {
 	OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1,
+	OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2
 };
 
 enum {
 	OPAL_P7IOC_NUM_PEST_REGS = 128,
+	OPAL_PHB3_NUM_PEST_REGS = 256
 };
 
 struct OpalIoPhbErrorCommon {
@@ -531,28 +536,94 @@ struct OpalIoP7IOCPhbErrorData {
 	uint64_t pestB[OPAL_P7IOC_NUM_PEST_REGS];
 };
 
+struct OpalIoPhb3ErrorData {
+	struct OpalIoPhbErrorCommon common;
+
+	uint32_t brdgCtl;
+
+	/* PHB3 UTL regs */
+	uint32_t portStatusReg;
+	uint32_t rootCmplxStatus;
+	uint32_t busAgentStatus;
+
+	/* PHB3 cfg regs */
+	uint32_t deviceStatus;
+	uint32_t slotStatus;
+	uint32_t linkStatus;
+	uint32_t devCmdStatus;
+	uint32_t devSecStatus;
+
+	/* cfg AER regs */
+	uint32_t rootErrorStatus;
+	uint32_t uncorrErrorStatus;
+	uint32_t corrErrorStatus;
+	uint32_t tlpHdr1;
+	uint32_t tlpHdr2;
+	uint32_t tlpHdr3;
+	uint32_t tlpHdr4;
+	uint32_t sourceId;
+
+	uint32_t rsv3;
+
+	/* Record data about the call to allocate a buffer */
+	uint64_t errorClass;
+	uint64_t correlator;
+
+	uint64_t nFir;			/* 000 */
+	uint64_t nFirMask;		/* 003 */
+	uint64_t nFirWOF;		/* 008 */
+
+	/* PHB3 MMIO Error Regs */
+	uint64_t phbPlssr;		/* 120 */
+	uint64_t phbCsr;		/* 110 */
+	uint64_t lemFir;		/* C00 */
+	uint64_t lemErrorMask;		/* C18 */
+	uint64_t lemWOF;		/* C40 */
+	uint64_t phbErrorStatus;	/* C80 */
+	uint64_t phbFirstErrorStatus;	/* C88 */
+	uint64_t phbErrorLog0;		/* CC0 */
+	uint64_t phbErrorLog1;		/* CC8 */
+	uint64_t mmioErrorStatus;	/* D00 */
+	uint64_t mmioFirstErrorStatus;	/* D08 */
+	uint64_t mmioErrorLog0;		/* D40 */
+	uint64_t mmioErrorLog1;		/* D48 */
+	uint64_t dma0ErrorStatus;	/* D80 */
+	uint64_t dma0FirstErrorStatus;	/* D88 */
+	uint64_t dma0ErrorLog0;		/* DC0 */
+	uint64_t dma0ErrorLog1;		/* DC8 */
+	uint64_t dma1ErrorStatus;	/* E00 */
+	uint64_t dma1FirstErrorStatus;	/* E08 */
+	uint64_t dma1ErrorLog0;		/* E40 */
+	uint64_t dma1ErrorLog1;		/* E48 */
+	uint64_t pestA[OPAL_PHB3_NUM_PEST_REGS];
+	uint64_t pestB[OPAL_PHB3_NUM_PEST_REGS];
+};
+
 typedef struct oppanel_line {
 	const char * 	line;
 	uint64_t 	line_len;
 } oppanel_line_t;
 
+/* /sys/firmware/opal */
+extern struct kobject *opal_kobj;
+
 /* API functions */
-int64_t opal_console_write(int64_t term_number, int64_t *length,
+int64_t opal_console_write(int64_t term_number, __be64 *length,
 			   const uint8_t *buffer);
-int64_t opal_console_read(int64_t term_number, int64_t *length,
+int64_t opal_console_read(int64_t term_number, __be64 *length,
 			  uint8_t *buffer);
 int64_t opal_console_write_buffer_space(int64_t term_number,
-					int64_t *length);
-int64_t opal_rtc_read(uint32_t *year_month_day,
-		      uint64_t *hour_minute_second_millisecond);
+					__be64 *length);
+int64_t opal_rtc_read(__be32 *year_month_day,
+		      __be64 *hour_minute_second_millisecond);
 int64_t opal_rtc_write(uint32_t year_month_day,
 		       uint64_t hour_minute_second_millisecond);
 int64_t opal_cec_power_down(uint64_t request);
 int64_t opal_cec_reboot(void);
 int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
 int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
-int64_t opal_handle_interrupt(uint64_t isn, uint64_t *outstanding_event_mask);
-int64_t opal_poll_events(uint64_t *outstanding_event_mask);
+int64_t opal_handle_interrupt(uint64_t isn, __be64 *outstanding_event_mask);
+int64_t opal_poll_events(__be64 *outstanding_event_mask);
 int64_t opal_pci_set_hub_tce_memory(uint64_t hub_id, uint64_t tce_mem_addr,
 				    uint64_t tce_mem_size);
 int64_t opal_pci_set_phb_tce_memory(uint64_t phb_id, uint64_t tce_mem_addr,
@@ -560,9 +631,9 @@ int64_t opal_pci_set_phb_tce_memory(uint64_t phb_id, uint64_t tce_mem_addr,
 int64_t opal_pci_config_read_byte(uint64_t phb_id, uint64_t bus_dev_func,
 				  uint64_t offset, uint8_t *data);
 int64_t opal_pci_config_read_half_word(uint64_t phb_id, uint64_t bus_dev_func,
-				       uint64_t offset, uint16_t *data);
+				       uint64_t offset, __be16 *data);
 int64_t opal_pci_config_read_word(uint64_t phb_id, uint64_t bus_dev_func,
-				  uint64_t offset, uint32_t *data);
+				  uint64_t offset, __be32 *data);
 int64_t opal_pci_config_write_byte(uint64_t phb_id, uint64_t bus_dev_func,
 				   uint64_t offset, uint8_t data);
 int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
@@ -570,14 +641,14 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
 int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func,
 				   uint64_t offset, uint32_t data);
 int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
-int64_t opal_get_xive(uint32_t isn, uint16_t *server, uint8_t *priority);
+int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority);
 int64_t opal_register_exception_handler(uint64_t opal_exception,
 					uint64_t handler_address,
 					uint64_t glue_cache_line);
 int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number,
 				   uint8_t *freeze_state,
-				   uint16_t *pci_error_type,
-				   uint64_t *phb_status);
+				   __be16 *pci_error_type,
+				   __be64 *phb_status);
 int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number,
 				  uint64_t eeh_action_token);
 int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state);
@@ -614,13 +685,13 @@ int64_t opal_pci_msi_eoi(uint64_t phb_id, uint32_t hw_irq);
 int64_t opal_pci_set_xive_pe(uint64_t phb_id, uint32_t pe_number,
 			     uint32_t xive_num);
 int64_t opal_get_xive_source(uint64_t phb_id, uint32_t xive_num,
-			     int32_t *interrupt_source_number);
+			     __be32 *interrupt_source_number);
 int64_t opal_get_msi_32(uint64_t phb_id, uint32_t mve_number, uint32_t xive_num,
-			uint8_t msi_range, uint32_t *msi_address,
-			uint32_t *message_data);
+			uint8_t msi_range, __be32 *msi_address,
+			__be32 *message_data);
 int64_t opal_get_msi_64(uint64_t phb_id, uint32_t mve_number,
 			uint32_t xive_num, uint8_t msi_range,
-			uint64_t *msi_address, uint32_t *message_data);
+			__be64 *msi_address, __be32 *message_data);
 int64_t opal_start_cpu(uint64_t thread_number, uint64_t start_address);
 int64_t opal_query_cpu_status(uint64_t thread_number, uint8_t *thread_status);
 int64_t opal_write_oppanel(oppanel_line_t *lines, uint64_t num_lines);
@@ -642,7 +713,7 @@ int64_t opal_pci_fence_phb(uint64_t phb_id);
 int64_t opal_pci_reinit(uint64_t phb_id, uint8_t reinit_scope);
 int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action);
 int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action);
-int64_t opal_get_epow_status(uint64_t *status);
+int64_t opal_get_epow_status(__be64 *status);
 int64_t opal_set_system_attention_led(uint8_t led_action);
 int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe,
 			    uint16_t *pci_error_type, uint16_t *severity);
@@ -656,6 +727,9 @@ int64_t opal_lpc_write(uint32_t chip_id, enum OpalLPCAddressType addr_type,
 		       uint32_t addr, uint32_t data, uint32_t sz);
 int64_t opal_lpc_read(uint32_t chip_id, enum OpalLPCAddressType addr_type,
 		      uint32_t addr, uint32_t *data, uint32_t sz);
+int64_t opal_validate_flash(uint64_t buffer, uint32_t *size, uint32_t *result);
+int64_t opal_manage_flash(uint8_t op);
+int64_t opal_update_flash(uint64_t blk_list);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data);
@@ -684,6 +758,7 @@ extern int opal_set_rtc_time(struct rtc_time *tm);
 extern void opal_get_rtc_time(struct rtc_time *tm);
 extern unsigned long opal_get_boot_time(void);
 extern void opal_nvram_init(void);
+extern void opal_flash_init(void);
 
 extern int opal_machine_check(struct pt_regs *regs);
 

+ 2 - 2
arch/powerpc/include/asm/page.h

@@ -78,7 +78,7 @@ extern unsigned int HPAGE_SHIFT;
  *
  * Also, KERNELBASE >= PAGE_OFFSET and PHYSICAL_START >= MEMORY_START
  *
- * There are two was to determine a physical address from a virtual one:
+ * There are two ways to determine a physical address from a virtual one:
  * va = pa + PAGE_OFFSET - MEMORY_START
  * va = pa + KERNELBASE - PHYSICAL_START
  *
@@ -403,7 +403,7 @@ void arch_free_page(struct page *page, int order);
 
 struct vm_area_struct;
 
-#ifdef CONFIG_PPC_64K_PAGES
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64)
 typedef pte_t *pgtable_t;
 #else
 typedef struct page *pgtable_t;

+ 2 - 0
arch/powerpc/include/asm/pgtable-ppc64.h

@@ -394,6 +394,8 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
 	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
 }
 
+struct page *realmode_pfn_to_page(unsigned long pfn);
+
 static inline char *get_hpte_slot_array(pmd_t *pmdp)
 {
 	/*

+ 7 - 1
arch/powerpc/include/asm/ppc-opcode.h

@@ -143,6 +143,8 @@
 #define PPC_INST_LSWX			0x7c00042a
 #define PPC_INST_LWARX			0x7c000028
 #define PPC_INST_LWSYNC			0x7c2004ac
+#define PPC_INST_SYNC			0x7c0004ac
+#define PPC_INST_SYNC_MASK		0xfc0007fe
 #define PPC_INST_LXVD2X			0x7c000698
 #define PPC_INST_MCRXR			0x7c000400
 #define PPC_INST_MCRXR_MASK		0xfc0007fe
@@ -181,6 +183,7 @@
 #define PPC_INST_TLBIVAX		0x7c000624
 #define PPC_INST_TLBSRX_DOT		0x7c0006a5
 #define PPC_INST_XXLOR			0xf0000510
+#define PPC_INST_XXSWAPD		0xf0000250
 #define PPC_INST_XVCPSGNDP		0xf0000780
 #define PPC_INST_TRECHKPT		0x7c0007dd
 #define PPC_INST_TRECLAIM		0x7c00075d
@@ -200,6 +203,7 @@
 /* Misc instructions for BPF compiler */
 #define PPC_INST_LD			0xe8000000
 #define PPC_INST_LHZ			0xa0000000
+#define PPC_INST_LHBRX			0x7c00062c
 #define PPC_INST_LWZ			0x80000000
 #define PPC_INST_STD			0xf8000000
 #define PPC_INST_STDU			0xf8000001
@@ -218,7 +222,7 @@
 #define PPC_INST_MULLW			0x7c0001d6
 #define PPC_INST_MULHWU			0x7c000016
 #define PPC_INST_MULLI			0x1c000000
-#define PPC_INST_DIVWU			0x7c0003d6
+#define PPC_INST_DIVWU			0x7c000396
 #define PPC_INST_RLWINM			0x54000000
 #define PPC_INST_RLDICR			0x78000004
 #define PPC_INST_SLW			0x7c000030
@@ -344,6 +348,8 @@
 					       VSX_XX1((s), a, b))
 #define XXLOR(t, a, b)		stringify_in_c(.long PPC_INST_XXLOR | \
 					       VSX_XX3((t), a, b))
+#define XXSWAPD(t, a)		stringify_in_c(.long PPC_INST_XXSWAPD | \
+					       VSX_XX3((t), a, a))
 #define XVCPSGNDP(t, a, b)	stringify_in_c(.long (PPC_INST_XVCPSGNDP | \
 					       VSX_XX3((t), (a), (b))))
 

+ 46 - 96
arch/powerpc/include/asm/ppc_asm.h

@@ -98,123 +98,51 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 #define REST_8GPRS(n, base)	REST_4GPRS(n, base); REST_4GPRS(n+4, base)
 #define REST_10GPRS(n, base)	REST_8GPRS(n, base); REST_2GPRS(n+8, base)
 
-#define SAVE_FPR(n, base)	stfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
+#define SAVE_FPR(n, base)	stfd	n,8*TS_FPRWIDTH*(n)(base)
 #define SAVE_2FPRS(n, base)	SAVE_FPR(n, base); SAVE_FPR(n+1, base)
 #define SAVE_4FPRS(n, base)	SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
 #define SAVE_8FPRS(n, base)	SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base)
 #define SAVE_16FPRS(n, base)	SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base)
 #define SAVE_32FPRS(n, base)	SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base)
-#define REST_FPR(n, base)	lfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
+#define REST_FPR(n, base)	lfd	n,8*TS_FPRWIDTH*(n)(base)
 #define REST_2FPRS(n, base)	REST_FPR(n, base); REST_FPR(n+1, base)
 #define REST_4FPRS(n, base)	REST_2FPRS(n, base); REST_2FPRS(n+2, base)
 #define REST_8FPRS(n, base)	REST_4FPRS(n, base); REST_4FPRS(n+4, base)
 #define REST_16FPRS(n, base)	REST_8FPRS(n, base); REST_8FPRS(n+8, base)
 #define REST_32FPRS(n, base)	REST_16FPRS(n, base); REST_16FPRS(n+16, base)
 
-#define SAVE_VR(n,b,base)	li b,THREAD_VR0+(16*(n));  stvx n,base,b
+#define SAVE_VR(n,b,base)	li b,16*(n);  stvx n,base,b
 #define SAVE_2VRS(n,b,base)	SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
 #define SAVE_4VRS(n,b,base)	SAVE_2VRS(n,b,base); SAVE_2VRS(n+2,b,base)
 #define SAVE_8VRS(n,b,base)	SAVE_4VRS(n,b,base); SAVE_4VRS(n+4,b,base)
 #define SAVE_16VRS(n,b,base)	SAVE_8VRS(n,b,base); SAVE_8VRS(n+8,b,base)
 #define SAVE_32VRS(n,b,base)	SAVE_16VRS(n,b,base); SAVE_16VRS(n+16,b,base)
-#define REST_VR(n,b,base)	li b,THREAD_VR0+(16*(n)); lvx n,base,b
+#define REST_VR(n,b,base)	li b,16*(n); lvx n,base,b
 #define REST_2VRS(n,b,base)	REST_VR(n,b,base); REST_VR(n+1,b,base)
 #define REST_4VRS(n,b,base)	REST_2VRS(n,b,base); REST_2VRS(n+2,b,base)
 #define REST_8VRS(n,b,base)	REST_4VRS(n,b,base); REST_4VRS(n+4,b,base)
 #define REST_16VRS(n,b,base)	REST_8VRS(n,b,base); REST_8VRS(n+8,b,base)
 #define REST_32VRS(n,b,base)	REST_16VRS(n,b,base); REST_16VRS(n+16,b,base)
 
-/* Save/restore FPRs, VRs and VSRs from their checkpointed backups in
- * thread_struct:
- */
-#define SAVE_FPR_TRANSACT(n, base)	stfd n,THREAD_TRANSACT_FPR0+	\
-					8*TS_FPRWIDTH*(n)(base)
-#define SAVE_2FPRS_TRANSACT(n, base)	SAVE_FPR_TRANSACT(n, base);	\
-					SAVE_FPR_TRANSACT(n+1, base)
-#define SAVE_4FPRS_TRANSACT(n, base)	SAVE_2FPRS_TRANSACT(n, base);	\
-					SAVE_2FPRS_TRANSACT(n+2, base)
-#define SAVE_8FPRS_TRANSACT(n, base)	SAVE_4FPRS_TRANSACT(n, base);	\
-					SAVE_4FPRS_TRANSACT(n+4, base)
-#define SAVE_16FPRS_TRANSACT(n, base)	SAVE_8FPRS_TRANSACT(n, base);	\
-					SAVE_8FPRS_TRANSACT(n+8, base)
-#define SAVE_32FPRS_TRANSACT(n, base)	SAVE_16FPRS_TRANSACT(n, base);	\
-					SAVE_16FPRS_TRANSACT(n+16, base)
-
-#define REST_FPR_TRANSACT(n, base)	lfd	n,THREAD_TRANSACT_FPR0+	\
-					8*TS_FPRWIDTH*(n)(base)
-#define REST_2FPRS_TRANSACT(n, base)	REST_FPR_TRANSACT(n, base);	\
-					REST_FPR_TRANSACT(n+1, base)
-#define REST_4FPRS_TRANSACT(n, base)	REST_2FPRS_TRANSACT(n, base);	\
-					REST_2FPRS_TRANSACT(n+2, base)
-#define REST_8FPRS_TRANSACT(n, base)	REST_4FPRS_TRANSACT(n, base);	\
-					REST_4FPRS_TRANSACT(n+4, base)
-#define REST_16FPRS_TRANSACT(n, base)	REST_8FPRS_TRANSACT(n, base);	\
-					REST_8FPRS_TRANSACT(n+8, base)
-#define REST_32FPRS_TRANSACT(n, base)	REST_16FPRS_TRANSACT(n, base);	\
-					REST_16FPRS_TRANSACT(n+16, base)
-
-
-#define SAVE_VR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VR0+(16*(n)); \
-					stvx n,b,base
-#define SAVE_2VRS_TRANSACT(n,b,base)	SAVE_VR_TRANSACT(n,b,base);	\
-					SAVE_VR_TRANSACT(n+1,b,base)
-#define SAVE_4VRS_TRANSACT(n,b,base)	SAVE_2VRS_TRANSACT(n,b,base);	\
-					SAVE_2VRS_TRANSACT(n+2,b,base)
-#define SAVE_8VRS_TRANSACT(n,b,base)	SAVE_4VRS_TRANSACT(n,b,base);	\
-					SAVE_4VRS_TRANSACT(n+4,b,base)
-#define SAVE_16VRS_TRANSACT(n,b,base)	SAVE_8VRS_TRANSACT(n,b,base);	\
-					SAVE_8VRS_TRANSACT(n+8,b,base)
-#define SAVE_32VRS_TRANSACT(n,b,base)	SAVE_16VRS_TRANSACT(n,b,base);	\
-					SAVE_16VRS_TRANSACT(n+16,b,base)
-
-#define REST_VR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VR0+(16*(n)); \
-					lvx n,b,base
-#define REST_2VRS_TRANSACT(n,b,base)	REST_VR_TRANSACT(n,b,base);	\
-					REST_VR_TRANSACT(n+1,b,base)
-#define REST_4VRS_TRANSACT(n,b,base)	REST_2VRS_TRANSACT(n,b,base);	\
-					REST_2VRS_TRANSACT(n+2,b,base)
-#define REST_8VRS_TRANSACT(n,b,base)	REST_4VRS_TRANSACT(n,b,base);	\
-					REST_4VRS_TRANSACT(n+4,b,base)
-#define REST_16VRS_TRANSACT(n,b,base)	REST_8VRS_TRANSACT(n,b,base);	\
-					REST_8VRS_TRANSACT(n+8,b,base)
-#define REST_32VRS_TRANSACT(n,b,base)	REST_16VRS_TRANSACT(n,b,base);	\
-					REST_16VRS_TRANSACT(n+16,b,base)
-
-
-#define SAVE_VSR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VSR0+(16*(n)); \
-					STXVD2X(n,R##base,R##b)
-#define SAVE_2VSRS_TRANSACT(n,b,base)	SAVE_VSR_TRANSACT(n,b,base);	\
-	                                SAVE_VSR_TRANSACT(n+1,b,base)
-#define SAVE_4VSRS_TRANSACT(n,b,base)	SAVE_2VSRS_TRANSACT(n,b,base);	\
-	                                SAVE_2VSRS_TRANSACT(n+2,b,base)
-#define SAVE_8VSRS_TRANSACT(n,b,base)	SAVE_4VSRS_TRANSACT(n,b,base);	\
-	                                SAVE_4VSRS_TRANSACT(n+4,b,base)
-#define SAVE_16VSRS_TRANSACT(n,b,base)	SAVE_8VSRS_TRANSACT(n,b,base);	\
-	                                SAVE_8VSRS_TRANSACT(n+8,b,base)
-#define SAVE_32VSRS_TRANSACT(n,b,base)	SAVE_16VSRS_TRANSACT(n,b,base);	\
-	                                SAVE_16VSRS_TRANSACT(n+16,b,base)
-
-#define REST_VSR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VSR0+(16*(n)); \
-					LXVD2X(n,R##base,R##b)
-#define REST_2VSRS_TRANSACT(n,b,base)	REST_VSR_TRANSACT(n,b,base);    \
-	                                REST_VSR_TRANSACT(n+1,b,base)
-#define REST_4VSRS_TRANSACT(n,b,base)	REST_2VSRS_TRANSACT(n,b,base);	\
-	                                REST_2VSRS_TRANSACT(n+2,b,base)
-#define REST_8VSRS_TRANSACT(n,b,base)	REST_4VSRS_TRANSACT(n,b,base);	\
-	                                REST_4VSRS_TRANSACT(n+4,b,base)
-#define REST_16VSRS_TRANSACT(n,b,base)	REST_8VSRS_TRANSACT(n,b,base);	\
-	                                REST_8VSRS_TRANSACT(n+8,b,base)
-#define REST_32VSRS_TRANSACT(n,b,base)	REST_16VSRS_TRANSACT(n,b,base);	\
-	                                REST_16VSRS_TRANSACT(n+16,b,base)
+#ifdef __BIG_ENDIAN__
+#define STXVD2X_ROT(n,b,base)		STXVD2X(n,b,base)
+#define LXVD2X_ROT(n,b,base)		LXVD2X(n,b,base)
+#else
+#define STXVD2X_ROT(n,b,base)		XXSWAPD(n,n);		\
+					STXVD2X(n,b,base);	\
+					XXSWAPD(n,n)
 
+#define LXVD2X_ROT(n,b,base)		LXVD2X(n,b,base);	\
+					XXSWAPD(n,n)
+#endif
 /* Save the lower 32 VSRs in the thread VSR region */
-#define SAVE_VSR(n,b,base)	li b,THREAD_VSR0+(16*(n));  STXVD2X(n,R##base,R##b)
+#define SAVE_VSR(n,b,base)	li b,16*(n);  STXVD2X_ROT(n,R##base,R##b)
 #define SAVE_2VSRS(n,b,base)	SAVE_VSR(n,b,base); SAVE_VSR(n+1,b,base)
 #define SAVE_4VSRS(n,b,base)	SAVE_2VSRS(n,b,base); SAVE_2VSRS(n+2,b,base)
 #define SAVE_8VSRS(n,b,base)	SAVE_4VSRS(n,b,base); SAVE_4VSRS(n+4,b,base)
 #define SAVE_16VSRS(n,b,base)	SAVE_8VSRS(n,b,base); SAVE_8VSRS(n+8,b,base)
 #define SAVE_32VSRS(n,b,base)	SAVE_16VSRS(n,b,base); SAVE_16VSRS(n+16,b,base)
-#define REST_VSR(n,b,base)	li b,THREAD_VSR0+(16*(n)); LXVD2X(n,R##base,R##b)
+#define REST_VSR(n,b,base)	li b,16*(n); LXVD2X_ROT(n,R##base,R##b)
 #define REST_2VSRS(n,b,base)	REST_VSR(n,b,base); REST_VSR(n+1,b,base)
 #define REST_4VSRS(n,b,base)	REST_2VSRS(n,b,base); REST_2VSRS(n+2,b,base)
 #define REST_8VSRS(n,b,base)	REST_4VSRS(n,b,base); REST_4VSRS(n+4,b,base)
@@ -478,13 +406,6 @@ BEGIN_FTR_SECTION_NESTED(945)						\
 	std	ra,TASKTHREADPPR(rb);					\
 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
 
-#define RESTORE_PPR(ra, rb)						\
-BEGIN_FTR_SECTION_NESTED(946)						\
-	ld	ra,PACACURRENT(r13);					\
-	ld	rb,TASKTHREADPPR(ra);					\
-	mtspr	SPRN_PPR,rb;	/* Restore PPR */			\
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,946)
-
 #endif
 
 /*
@@ -832,6 +753,35 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,946)
 #define N_SLINE	68
 #define N_SO	100
 
-#endif /*  __ASSEMBLY__ */
+/*
+ * Create an endian fixup trampoline
+ *
+ * This starts with a "tdi 0,0,0x48" instruction which is
+ * essentially a "trap never", and thus akin to a nop.
+ *
+ * The opcode for this instruction read with the wrong endian
+ * however results in a b . + 8
+ *
+ * So essentially we use that trick to execute the following
+ * trampoline in "reverse endian" if we are running with the
+ * MSR_LE bit set the "wrong" way for whatever endianness the
+ * kernel is built for.
+ */
 
+#ifdef CONFIG_PPC_BOOK3E
+#define FIXUP_ENDIAN
+#else
+#define FIXUP_ENDIAN						   \
+	tdi   0,0,0x48;	  /* Reverse endian of b . + 8		*/ \
+	b     $+36;	  /* Skip trampoline if endian is good	*/ \
+	.long 0x05009f42; /* bcl 20,31,$+4			*/ \
+	.long 0xa602487d; /* mflr r10				*/ \
+	.long 0x1c004a39; /* addi r10,r10,28			*/ \
+	.long 0xa600607d; /* mfmsr r11				*/ \
+	.long 0x01006b69; /* xori r11,r11,1			*/ \
+	.long 0xa6035a7d; /* mtsrr0 r10				*/ \
+	.long 0xa6037b7d; /* mtsrr1 r11				*/ \
+	.long 0x2400004c  /* rfid				*/
+#endif /* !CONFIG_PPC_BOOK3E */
+#endif /*  __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_PPC_ASM_H */

+ 53 - 38
arch/powerpc/include/asm/processor.h

@@ -14,8 +14,18 @@
 
 #ifdef CONFIG_VSX
 #define TS_FPRWIDTH 2
+
+#ifdef __BIG_ENDIAN__
+#define TS_FPROFFSET 0
+#define TS_VSRLOWOFFSET 1
+#else
+#define TS_FPROFFSET 1
+#define TS_VSRLOWOFFSET 0
+#endif
+
 #else
 #define TS_FPRWIDTH 1
+#define TS_FPROFFSET 0
 #endif
 
 #ifdef CONFIG_PPC64
@@ -142,26 +152,22 @@ typedef struct {
 	unsigned long seg;
 } mm_segment_t;
 
-#define TS_FPROFFSET 0
-#define TS_VSRLOWOFFSET 1
-#define TS_FPR(i) fpr[i][TS_FPROFFSET]
-#define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET]
+#define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET]
+#define TS_TRANS_FPR(i) transact_fp.fpr[i][TS_FPROFFSET]
 
-struct thread_struct {
-	unsigned long	ksp;		/* Kernel stack pointer */
-#ifdef CONFIG_PPC64
-	unsigned long	ksp_vsid;
-#endif
-	struct pt_regs	*regs;		/* Pointer to saved register state */
-	mm_segment_t	fs;		/* for get_fs() validation */
-#ifdef CONFIG_BOOKE
-	/* BookE base exception scratch space; align on cacheline */
-	unsigned long	normsave[8] ____cacheline_aligned;
-#endif
-#ifdef CONFIG_PPC32
-	void		*pgdir;		/* root of page-table tree */
-	unsigned long	ksp_limit;	/* if ksp <= ksp_limit stack overflow */
-#endif
+/* FP and VSX 0-31 register set */
+struct thread_fp_state {
+	u64	fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
+	u64	fpscr;		/* Floating point status */
+};
+
+/* Complete AltiVec register set including VSCR */
+struct thread_vr_state {
+	vector128	vr[32] __attribute__((aligned(16)));
+	vector128	vscr __attribute__((aligned(16)));
+};
+
+struct debug_reg {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	/*
 	 * The following help to manage the use of Debug Control Registers
@@ -198,13 +204,26 @@ struct thread_struct {
 	unsigned long	dvc2;
 #endif
 #endif
-	/* FP and VSX 0-31 register set */
-	double		fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
-	struct {
+};
 
-		unsigned int pad;
-		unsigned int val;	/* Floating point status */
-	} fpscr;
+struct thread_struct {
+	unsigned long	ksp;		/* Kernel stack pointer */
+#ifdef CONFIG_PPC64
+	unsigned long	ksp_vsid;
+#endif
+	struct pt_regs	*regs;		/* Pointer to saved register state */
+	mm_segment_t	fs;		/* for get_fs() validation */
+#ifdef CONFIG_BOOKE
+	/* BookE base exception scratch space; align on cacheline */
+	unsigned long	normsave[8] ____cacheline_aligned;
+#endif
+#ifdef CONFIG_PPC32
+	void		*pgdir;		/* root of page-table tree */
+	unsigned long	ksp_limit;	/* if ksp <= ksp_limit stack overflow */
+#endif
+	struct debug_reg debug;
+	struct thread_fp_state	fp_state;
+	struct thread_fp_state	*fp_save_area;
 	int		fpexc_mode;	/* floating-point exception mode */
 	unsigned int	align_ctl;	/* alignment handling control */
 #ifdef CONFIG_PPC64
@@ -222,10 +241,8 @@ struct thread_struct {
 	struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
 	unsigned long	trap_nr;	/* last trap # on this thread */
 #ifdef CONFIG_ALTIVEC
-	/* Complete AltiVec register set */
-	vector128	vr[32] __attribute__((aligned(16)));
-	/* AltiVec status */
-	vector128	vscr __attribute__((aligned(16)));
+	struct thread_vr_state vr_state;
+	struct thread_vr_state *vr_save_area;
 	unsigned long	vrsave;
 	int		used_vr;	/* set if process has used altivec */
 #endif /* CONFIG_ALTIVEC */
@@ -262,13 +279,8 @@ struct thread_struct {
 	 * transact_fpr[] is the new set of transactional values.
 	 * VRs work the same way.
 	 */
-	double		transact_fpr[32][TS_FPRWIDTH];
-	struct {
-		unsigned int pad;
-		unsigned int val;	/* Floating point status */
-	} transact_fpscr;
-	vector128	transact_vr[32] __attribute__((aligned(16)));
-	vector128	transact_vscr __attribute__((aligned(16)));
+	struct thread_fp_state transact_fp;
+	struct thread_vr_state transact_vr;
 	unsigned long	transact_vrsave;
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
@@ -322,8 +334,6 @@ struct thread_struct {
 	.ksp = INIT_SP, \
 	.regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
 	.fs = KERNEL_DS, \
-	.fpr = {{0}}, \
-	.fpscr = { .val = 0, }, \
 	.fpexc_mode = 0, \
 	.ppr = INIT_PPR, \
 }
@@ -361,6 +371,11 @@ extern int set_endian(struct task_struct *tsk, unsigned int val);
 extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
 extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
 
+extern void load_fp_state(struct thread_fp_state *fp);
+extern void store_fp_state(struct thread_fp_state *fp);
+extern void load_vr_state(struct thread_vr_state *vr);
+extern void store_vr_state(struct thread_vr_state *vr);
+
 static inline unsigned int __unpack_fe01(unsigned long msr_bits)
 {
 	return ((msr_bits & MSR_FE0) >> 10) | ((msr_bits & MSR_FE1) >> 8);

+ 0 - 3
arch/powerpc/include/asm/prom.h

@@ -44,9 +44,6 @@ void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window,
 
 extern void kdump_move_device_tree(void);
 
-/* cache lookup */
-struct device_node *of_find_next_cache_node(struct device_node *np);
-
 #ifdef CONFIG_NUMA
 extern int of_node_to_nid(struct device_node *device);
 #else

+ 6 - 1
arch/powerpc/include/asm/reg.h

@@ -115,7 +115,12 @@
 #define MSR_64BIT	MSR_SF
 
 /* Server variant */
-#define MSR_		(MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
+#define __MSR		(MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
+#ifdef __BIG_ENDIAN__
+#define MSR_		__MSR
+#else
+#define MSR_		(__MSR | MSR_LE)
+#endif
 #define MSR_KERNEL	(MSR_ | MSR_64BIT)
 #define MSR_USER32	(MSR_ | MSR_PR | MSR_EE)
 #define MSR_USER64	(MSR_USER32 | MSR_64BIT)

+ 4 - 4
arch/powerpc/include/asm/reg_booke.h

@@ -381,7 +381,7 @@
 #define DBCR0_IA34T	0x00004000	/* Instr Addr 3-4 range Toggle */
 #define DBCR0_FT	0x00000001	/* Freeze Timers on debug event */
 
-#define dbcr_iac_range(task)	((task)->thread.dbcr0)
+#define dbcr_iac_range(task)	((task)->thread.debug.dbcr0)
 #define DBCR_IAC12I	DBCR0_IA12			/* Range Inclusive */
 #define DBCR_IAC12X	(DBCR0_IA12 | DBCR0_IA12X)	/* Range Exclusive */
 #define DBCR_IAC12MODE	(DBCR0_IA12 | DBCR0_IA12X)	/* IAC 1-2 Mode Bits */
@@ -395,7 +395,7 @@
 #define DBCR1_DAC1W	0x20000000	/* DAC1 Write Debug Event */
 #define DBCR1_DAC2W	0x10000000	/* DAC2 Write Debug Event */
 
-#define dbcr_dac(task)	((task)->thread.dbcr1)
+#define dbcr_dac(task)	((task)->thread.debug.dbcr1)
 #define DBCR_DAC1R	DBCR1_DAC1R
 #define DBCR_DAC1W	DBCR1_DAC1W
 #define DBCR_DAC2R	DBCR1_DAC2R
@@ -441,7 +441,7 @@
 #define DBCR0_CRET	0x00000020	/* Critical Return Debug Event */
 #define DBCR0_FT	0x00000001	/* Freeze Timers on debug event */
 
-#define dbcr_dac(task)	((task)->thread.dbcr0)
+#define dbcr_dac(task)	((task)->thread.debug.dbcr0)
 #define DBCR_DAC1R	DBCR0_DAC1R
 #define DBCR_DAC1W	DBCR0_DAC1W
 #define DBCR_DAC2R	DBCR0_DAC2R
@@ -475,7 +475,7 @@
 #define DBCR1_IAC34MX	0x000000C0	/* Instr Addr 3-4 range eXclusive */
 #define DBCR1_IAC34AT	0x00000001	/* Instr Addr 3-4 range Toggle */
 
-#define dbcr_iac_range(task)	((task)->thread.dbcr1)
+#define dbcr_iac_range(task)	((task)->thread.debug.dbcr1)
 #define DBCR_IAC12I	DBCR1_IAC12M	/* Range Inclusive */
 #define DBCR_IAC12X	DBCR1_IAC12MX	/* Range Exclusive */
 #define DBCR_IAC12MODE	DBCR1_IAC12MX	/* IAC 1-2 Mode Bits */

+ 17 - 6
arch/powerpc/include/asm/scom.h

@@ -54,8 +54,8 @@ struct scom_controller {
 	scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count);
 	void (*unmap)(scom_map_t map);
 
-	u64 (*read)(scom_map_t map, u32 reg);
-	void (*write)(scom_map_t map, u32 reg, u64 value);
+	int (*read)(scom_map_t map, u64 reg, u64 *value);
+	int (*write)(scom_map_t map, u64 reg, u64 value);
 };
 
 extern const struct scom_controller *scom_controller;
@@ -133,10 +133,18 @@ static inline void scom_unmap(scom_map_t map)
  * scom_read - Read a SCOM register
  * @map: Result of scom_map
  * @reg: Register index within that map
+ * @value: Updated with the value read
+ *
+ * Returns 0 (success) or a negative error code
  */
-static inline u64 scom_read(scom_map_t map, u32 reg)
+static inline int scom_read(scom_map_t map, u64 reg, u64 *value)
 {
-	return scom_controller->read(map, reg);
+	int rc;
+
+	rc = scom_controller->read(map, reg, value);
+	if (rc)
+		*value = 0xfffffffffffffffful;
+	return rc;
 }
 
 /**
@@ -144,12 +152,15 @@ static inline u64 scom_read(scom_map_t map, u32 reg)
  * @map: Result of scom_map
  * @reg: Register index within that map
  * @value: Value to write
+ *
+ * Returns 0 (success) or a negative error code
  */
-static inline void scom_write(scom_map_t map, u32 reg, u64 value)
+static inline int scom_write(scom_map_t map, u64 reg, u64 value)
 {
-	scom_controller->write(map, reg, value);
+	return scom_controller->write(map, reg, value);
 }
 
+
 #endif /* CONFIG_PPC_SCOM */
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */

+ 4 - 0
arch/powerpc/include/asm/setup.h

@@ -23,6 +23,10 @@ extern void reloc_got2(unsigned long);
 
 #define PTRRELOC(x)	((typeof(x)) add_reloc_offset((unsigned long)(x)))
 
+void check_for_initrd(void);
+void do_init_bootmem(void);
+void setup_panic(void);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif	/* _ASM_POWERPC_SETUP_H */

+ 1 - 1
arch/powerpc/include/asm/sfp-machine.h

@@ -125,7 +125,7 @@
 #define FP_EX_DIVZERO         (1 << (31 - 5))
 #define FP_EX_INEXACT         (1 << (31 - 6))
 
-#define __FPU_FPSCR	(current->thread.fpscr.val)
+#define __FPU_FPSCR	(current->thread.fp_state.fpscr)
 
 /* We only actually write to the destination register
  * if exceptions signalled (if any) will not trap.

+ 4 - 0
arch/powerpc/include/asm/string.h

@@ -10,7 +10,9 @@
 #define __HAVE_ARCH_STRNCMP
 #define __HAVE_ARCH_STRCAT
 #define __HAVE_ARCH_MEMSET
+#ifdef __BIG_ENDIAN__
 #define __HAVE_ARCH_MEMCPY
+#endif
 #define __HAVE_ARCH_MEMMOVE
 #define __HAVE_ARCH_MEMCMP
 #define __HAVE_ARCH_MEMCHR
@@ -22,7 +24,9 @@ extern int strcmp(const char *,const char *);
 extern int strncmp(const char *, const char *, __kernel_size_t);
 extern char * strcat(char *, const char *);
 extern void * memset(void *,int,__kernel_size_t);
+#ifdef __BIG_ENDIAN__
 extern void * memcpy(void *,const void *,__kernel_size_t);
+#endif
 extern void * memmove(void *,const void *,__kernel_size_t);
 extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);

+ 1 - 0
arch/powerpc/include/asm/switch_to.h

@@ -35,6 +35,7 @@ extern void giveup_vsx(struct task_struct *);
 extern void enable_kernel_spe(void);
 extern void giveup_spe(struct task_struct *);
 extern void load_up_spe(struct task_struct *);
+extern void switch_booke_debug_regs(struct thread_struct *new_thread);
 
 #ifndef CONFIG_SMP
 extern void discard_lazy_cpu_state(void);

+ 78 - 0
arch/powerpc/include/asm/word-at-a-time.h

@@ -8,6 +8,8 @@
 #include <linux/kernel.h>
 #include <asm/asm-compat.h>
 
+#ifdef __BIG_ENDIAN__
+
 struct word_at_a_time {
 	const unsigned long high_bits, low_bits;
 };
@@ -38,4 +40,80 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct
 	return (val + c->high_bits) & ~rhs;
 }
 
+#else
+
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+#ifdef CONFIG_64BIT
+
+/* Alan Modra's little-endian strlen tail for 64-bit */
+#define create_zero_mask(mask) (mask)
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	unsigned long leading_zero_bits;
+	long trailing_zero_bit_mask;
+
+	asm ("addi %1,%2,-1\n\t"
+	     "andc %1,%1,%2\n\t"
+	     "popcntd %0,%1"
+	     : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
+	     : "r" (mask));
+	return leading_zero_bits >> 3;
+}
+
+#else	/* 32-bit case */
+
+/*
+ * This is largely generic for little-endian machines, but the
+ * optimal byte mask counting is probably going to be something
+ * that is architecture-specific. If you have a reliably fast
+ * bit count instruction, that might be better than the multiply
+ * and shift, for example.
+ */
+
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
+static inline long count_masked_bytes(long mask)
+{
+	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+	long a = (0x0ff0001+mask) >> 23;
+	/* Fix the 1 for 00 case */
+	return a & mask;
+}
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	return count_masked_bytes(mask);
+}
+
+#endif
+
+/* Return nonzero if it has a zero */
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
+{
+	return bits;
+}
+
+/* The mask we created is directly usable as a bytemask */
+#define zero_bytemask(mask) (mask)
+
+#endif
+
 #endif /* _ASM_WORD_AT_A_TIME_H */

+ 67 - 0
arch/powerpc/include/asm/xor.h

@@ -1 +1,68 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#ifndef _ASM_POWERPC_XOR_H
+#define _ASM_POWERPC_XOR_H
+
+#ifdef CONFIG_ALTIVEC
+
+#include <asm/cputable.h>
+
+void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in);
+void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in);
+void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in,
+		   unsigned long *v4_in);
+void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in,
+		   unsigned long *v4_in, unsigned long *v5_in);
+
+static struct xor_block_template xor_block_altivec = {
+	.name = "altivec",
+	.do_2 = xor_altivec_2,
+	.do_3 = xor_altivec_3,
+	.do_4 = xor_altivec_4,
+	.do_5 = xor_altivec_5,
+};
+
+#define XOR_SPEED_ALTIVEC()				\
+	do {						\
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))	\
+			xor_speed(&xor_block_altivec);	\
+	} while (0)
+#else
+#define XOR_SPEED_ALTIVEC()
+#endif
+
+/* Also try the generic routines. */
 #include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES				\
+do {							\
+	xor_speed(&xor_block_8regs);			\
+	xor_speed(&xor_block_8regs_p);			\
+	xor_speed(&xor_block_32regs);			\
+	xor_speed(&xor_block_32regs_p);			\
+	XOR_SPEED_ALTIVEC();				\
+} while (0)
+
+#endif /* _ASM_POWERPC_XOR_H */

+ 4 - 0
arch/powerpc/include/uapi/asm/byteorder.h

@@ -7,6 +7,10 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#ifdef __LITTLE_ENDIAN__
+#include <linux/byteorder/little_endian.h>
+#else
 #include <linux/byteorder/big_endian.h>
+#endif
 
 #endif /* _ASM_POWERPC_BYTEORDER_H */

+ 112 - 61
arch/powerpc/kernel/align.c

@@ -54,8 +54,6 @@ struct aligninfo {
 /* DSISR bits reported for a DCBZ instruction: */
 #define DCBZ	0x5f	/* 8xx/82xx dcbz faults when cache not enabled */
 
-#define SWAP(a, b)	(t = (a), (a) = (b), (b) = t)
-
 /*
  * The PowerPC stores certain bits of the instruction that caused the
  * alignment exception in the DSISR register.  This array maps those
@@ -256,11 +254,17 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
  * bottom 4 bytes of each register, and the loads clear the
  * top 4 bytes of the affected register.
  */
+#ifdef __BIG_ENDIAN__
 #ifdef CONFIG_PPC64
 #define REG_BYTE(rp, i)		*((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
 #else
 #define REG_BYTE(rp, i)		*((u8 *)(rp) + (i))
 #endif
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define REG_BYTE(rp, i)		(*(((u8 *)((rp) + ((i)>>2)) + ((i)&3))))
+#endif
 
 #define SWIZ_PTR(p)		((unsigned char __user *)((p) ^ swiz))
 
@@ -305,6 +309,15 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
 			nb0 = nb + reg * 4 - 128;
 			nb = 128 - reg * 4;
 		}
+#ifdef __LITTLE_ENDIAN__
+		/*
+		 *  String instructions are endian neutral but the code
+		 *  below is not.  Force byte swapping on so that the
+		 *  effects of swizzling are undone in the load/store
+		 *  loops below.
+		 */
+		flags ^= SW;
+#endif
 	} else {
 		/* lwm, stmw */
 		nb = (32 - reg) * 4;
@@ -458,7 +471,7 @@ static struct aligninfo spe_aligninfo[32] = {
 static int emulate_spe(struct pt_regs *regs, unsigned int reg,
 		       unsigned int instr)
 {
-	int t, ret;
+	int ret;
 	union {
 		u64 ll;
 		u32 w[2];
@@ -581,24 +594,18 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
 	if (flags & SW) {
 		switch (flags & 0xf0) {
 		case E8:
-			SWAP(data.v[0], data.v[7]);
-			SWAP(data.v[1], data.v[6]);
-			SWAP(data.v[2], data.v[5]);
-			SWAP(data.v[3], data.v[4]);
+			data.ll = swab64(data.ll);
 			break;
 		case E4:
-
-			SWAP(data.v[0], data.v[3]);
-			SWAP(data.v[1], data.v[2]);
-			SWAP(data.v[4], data.v[7]);
-			SWAP(data.v[5], data.v[6]);
+			data.w[0] = swab32(data.w[0]);
+			data.w[1] = swab32(data.w[1]);
 			break;
 		/* Its half word endian */
 		default:
-			SWAP(data.v[0], data.v[1]);
-			SWAP(data.v[2], data.v[3]);
-			SWAP(data.v[4], data.v[5]);
-			SWAP(data.v[6], data.v[7]);
+			data.h[0] = swab16(data.h[0]);
+			data.h[1] = swab16(data.h[1]);
+			data.h[2] = swab16(data.h[2]);
+			data.h[3] = swab16(data.h[3]);
 			break;
 		}
 	}
@@ -658,14 +665,31 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
 	flush_vsx_to_thread(current);
 
 	if (reg < 32)
-		ptr = (char *) &current->thread.TS_FPR(reg);
+		ptr = (char *) &current->thread.fp_state.fpr[reg][0];
 	else
-		ptr = (char *) &current->thread.vr[reg - 32];
+		ptr = (char *) &current->thread.vr_state.vr[reg - 32];
 
 	lptr = (unsigned long *) ptr;
 
+#ifdef __LITTLE_ENDIAN__
+	if (flags & SW) {
+		elsize = length;
+		sw = length-1;
+	} else {
+		/*
+		 * The elements are BE ordered, even in LE mode, so process
+		 * them in reverse order.
+		 */
+		addr += length - elsize;
+
+		/* 8 byte memory accesses go in the top 8 bytes of the VR */
+		if (length == 8)
+			ptr += 8;
+	}
+#else
 	if (flags & SW)
 		sw = elsize-1;
+#endif
 
 	for (j = 0; j < length; j += elsize) {
 		for (i = 0; i < elsize; ++i) {
@@ -675,19 +699,31 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
 				ret |= __get_user(ptr[i^sw], addr + i);
 		}
 		ptr  += elsize;
+#ifdef __LITTLE_ENDIAN__
+		addr -= elsize;
+#else
 		addr += elsize;
+#endif
 	}
 
+#ifdef __BIG_ENDIAN__
+#define VSX_HI 0
+#define VSX_LO 1
+#else
+#define VSX_HI 1
+#define VSX_LO 0
+#endif
+
 	if (!ret) {
 		if (flags & U)
 			regs->gpr[areg] = regs->dar;
 
 		/* Splat load copies the same data to top and bottom 8 bytes */
 		if (flags & SPLT)
-			lptr[1] = lptr[0];
-		/* For 8 byte loads, zero the top 8 bytes */
+			lptr[VSX_LO] = lptr[VSX_HI];
+		/* For 8 byte loads, zero the low 8 bytes */
 		else if (!(flags & ST) && (8 == length))
-			lptr[1] = 0;
+			lptr[VSX_LO] = 0;
 	} else
 		return -EFAULT;
 
@@ -710,18 +746,28 @@ int fix_alignment(struct pt_regs *regs)
 	unsigned int dsisr;
 	unsigned char __user *addr;
 	unsigned long p, swiz;
-	int ret, t;
-	union {
+	int ret, i;
+	union data {
 		u64 ll;
 		double dd;
 		unsigned char v[8];
 		struct {
+#ifdef __LITTLE_ENDIAN__
+			int	 low32;
+			unsigned hi32;
+#else
 			unsigned hi32;
 			int	 low32;
+#endif
 		} x32;
 		struct {
+#ifdef __LITTLE_ENDIAN__
+			short	      low16;
+			unsigned char hi48[6];
+#else
 			unsigned char hi48[6];
 			short	      low16;
+#endif
 		} x16;
 	} data;
 
@@ -780,8 +826,9 @@ int fix_alignment(struct pt_regs *regs)
 
 	/* Byteswap little endian loads and stores */
 	swiz = 0;
-	if (regs->msr & MSR_LE) {
+	if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
 		flags ^= SW;
+#ifdef __BIG_ENDIAN__
 		/*
 		 * So-called "PowerPC little endian" mode works by
 		 * swizzling addresses rather than by actually doing
@@ -794,6 +841,7 @@ int fix_alignment(struct pt_regs *regs)
 		 */
 		if (cpu_has_feature(CPU_FTR_PPC_LE))
 			swiz = 7;
+#endif
 	}
 
 	/* DAR has the operand effective address */
@@ -818,7 +866,7 @@ int fix_alignment(struct pt_regs *regs)
 			elsize = 8;
 
 		flags = 0;
-		if (regs->msr & MSR_LE)
+		if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE))
 			flags |= SW;
 		if (instruction & 0x100)
 			flags |= ST;
@@ -878,32 +926,36 @@ int fix_alignment(struct pt_regs *regs)
 	 * get it from register values
 	 */
 	if (!(flags & ST)) {
-		data.ll = 0;
-		ret = 0;
-		p = (unsigned long) addr;
+		unsigned int start = 0;
+
 		switch (nb) {
-		case 8:
-			ret |= __get_user_inatomic(data.v[0], SWIZ_PTR(p++));
-			ret |= __get_user_inatomic(data.v[1], SWIZ_PTR(p++));
-			ret |= __get_user_inatomic(data.v[2], SWIZ_PTR(p++));
-			ret |= __get_user_inatomic(data.v[3], SWIZ_PTR(p++));
 		case 4:
-			ret |= __get_user_inatomic(data.v[4], SWIZ_PTR(p++));
-			ret |= __get_user_inatomic(data.v[5], SWIZ_PTR(p++));
+			start = offsetof(union data, x32.low32);
+			break;
 		case 2:
-			ret |= __get_user_inatomic(data.v[6], SWIZ_PTR(p++));
-			ret |= __get_user_inatomic(data.v[7], SWIZ_PTR(p++));
-			if (unlikely(ret))
-				return -EFAULT;
+			start = offsetof(union data, x16.low16);
+			break;
 		}
+
+		data.ll = 0;
+		ret = 0;
+		p = (unsigned long)addr;
+
+		for (i = 0; i < nb; i++)
+			ret |= __get_user_inatomic(data.v[start + i],
+						   SWIZ_PTR(p++));
+
+		if (unlikely(ret))
+			return -EFAULT;
+
 	} else if (flags & F) {
-		data.dd = current->thread.TS_FPR(reg);
+		data.ll = current->thread.TS_FPR(reg);
 		if (flags & S) {
 			/* Single-precision FP store requires conversion... */
 #ifdef CONFIG_PPC_FPU
 			preempt_disable();
 			enable_kernel_fp();
-			cvt_df(&data.dd, (float *)&data.v[4]);
+			cvt_df(&data.dd, (float *)&data.x32.low32);
 			preempt_enable();
 #else
 			return 0;
@@ -915,17 +967,13 @@ int fix_alignment(struct pt_regs *regs)
 	if (flags & SW) {
 		switch (nb) {
 		case 8:
-			SWAP(data.v[0], data.v[7]);
-			SWAP(data.v[1], data.v[6]);
-			SWAP(data.v[2], data.v[5]);
-			SWAP(data.v[3], data.v[4]);
+			data.ll = swab64(data.ll);
 			break;
 		case 4:
-			SWAP(data.v[4], data.v[7]);
-			SWAP(data.v[5], data.v[6]);
+			data.x32.low32 = swab32(data.x32.low32);
 			break;
 		case 2:
-			SWAP(data.v[6], data.v[7]);
+			data.x16.low16 = swab16(data.x16.low16);
 			break;
 		}
 	}
@@ -947,7 +995,7 @@ int fix_alignment(struct pt_regs *regs)
 #ifdef CONFIG_PPC_FPU
 		preempt_disable();
 		enable_kernel_fp();
-		cvt_fd((float *)&data.v[4], &data.dd);
+		cvt_fd((float *)&data.x32.low32, &data.dd);
 		preempt_enable();
 #else
 		return 0;
@@ -957,25 +1005,28 @@ int fix_alignment(struct pt_regs *regs)
 
 	/* Store result to memory or update registers */
 	if (flags & ST) {
-		ret = 0;
-		p = (unsigned long) addr;
+		unsigned int start = 0;
+
 		switch (nb) {
-		case 8:
-			ret |= __put_user_inatomic(data.v[0], SWIZ_PTR(p++));
-			ret |= __put_user_inatomic(data.v[1], SWIZ_PTR(p++));
-			ret |= __put_user_inatomic(data.v[2], SWIZ_PTR(p++));
-			ret |= __put_user_inatomic(data.v[3], SWIZ_PTR(p++));
 		case 4:
-			ret |= __put_user_inatomic(data.v[4], SWIZ_PTR(p++));
-			ret |= __put_user_inatomic(data.v[5], SWIZ_PTR(p++));
+			start = offsetof(union data, x32.low32);
+			break;
 		case 2:
-			ret |= __put_user_inatomic(data.v[6], SWIZ_PTR(p++));
-			ret |= __put_user_inatomic(data.v[7], SWIZ_PTR(p++));
+			start = offsetof(union data, x16.low16);
+			break;
 		}
+
+		ret = 0;
+		p = (unsigned long)addr;
+
+		for (i = 0; i < nb; i++)
+			ret |= __put_user_inatomic(data.v[start + i],
+						   SWIZ_PTR(p++));
+
 		if (unlikely(ret))
 			return -EFAULT;
 	} else if (flags & F)
-		current->thread.TS_FPR(reg) = data.dd;
+		current->thread.TS_FPR(reg) = data.ll;
 	else
 		regs->gpr[reg] = data.ll;
 

+ 11 - 18
arch/powerpc/kernel/asm-offsets.c

@@ -90,16 +90,17 @@ int main(void)
 	DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
 #endif
 	DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
-	DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0]));
-	DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr));
+	DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state));
+	DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area));
+	DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));
 #ifdef CONFIG_ALTIVEC
-	DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0]));
+	DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state));
+	DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area));
 	DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
-	DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
 	DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
+	DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
-	DEFINE(THREAD_VSR0, offsetof(struct thread_struct, fpr));
 	DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
 #endif /* CONFIG_VSX */
 #ifdef CONFIG_PPC64
@@ -114,7 +115,7 @@ int main(void)
 #endif /* CONFIG_SPE */
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-	DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
+	DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, debug.dbcr0));
 #endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
@@ -143,20 +144,12 @@ int main(void)
 	DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
 	DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
 	DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
-	DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct,
-					 transact_vr[0]));
-	DEFINE(THREAD_TRANSACT_VSCR, offsetof(struct thread_struct,
-					  transact_vscr));
+	DEFINE(THREAD_TRANSACT_VRSTATE, offsetof(struct thread_struct,
+						 transact_vr));
 	DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct,
 					    transact_vrsave));
-	DEFINE(THREAD_TRANSACT_FPR0, offsetof(struct thread_struct,
-					  transact_fpr[0]));
-	DEFINE(THREAD_TRANSACT_FPSCR, offsetof(struct thread_struct,
-					   transact_fpscr));
-#ifdef CONFIG_VSX
-	DEFINE(THREAD_TRANSACT_VSR0, offsetof(struct thread_struct,
-					  transact_fpr[0]));
-#endif
+	DEFINE(THREAD_TRANSACT_FPSTATE, offsetof(struct thread_struct,
+						 transact_fp));
 	/* Local pt_regs on stack for Transactional Memory funcs. */
 	DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
 	       sizeof(struct pt_regs) + 16);

+ 3 - 3
arch/powerpc/kernel/eeh.c

@@ -327,11 +327,11 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
 	/* Isolate the PHB and send event */
 	eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
 	eeh_serialize_unlock(flags);
-	eeh_send_failure_event(phb_pe);
 
 	pr_err("EEH: PHB#%x failure detected\n",
 		phb_pe->phb->global_number);
 	dump_stack();
+	eeh_send_failure_event(phb_pe);
 
 	return 1;
 out:
@@ -454,8 +454,6 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 	eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 	eeh_serialize_unlock(flags);
 
-	eeh_send_failure_event(pe);
-
 	/* Most EEH events are due to device driver bugs.  Having
 	 * a stack trace will help the device-driver authors figure
 	 * out what happened.  So print that out.
@@ -464,6 +462,8 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 		pe->addr, pe->phb->global_number);
 	dump_stack();
 
+	eeh_send_failure_event(pe);
+
 	return 1;
 
 dn_unlock:

+ 30 - 20
arch/powerpc/kernel/entry_64.S

@@ -673,9 +673,7 @@ _GLOBAL(ret_from_except_lite)
 
 resume_kernel:
 	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
-	CURRENT_THREAD_INFO(r9, r1)
-	ld	r8,TI_FLAGS(r9)
-	andis.	r8,r8,_TIF_EMULATE_STACK_STORE@h
+	andis.	r8,r4,_TIF_EMULATE_STACK_STORE@h
 	beq+	1f
 
 	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */
@@ -820,6 +818,12 @@ fast_exception_return:
 	andi.	r0,r3,MSR_RI
 	beq-	unrecov_restore
 
+	/* Load PPR from thread struct before we clear MSR:RI */
+BEGIN_FTR_SECTION
+	ld	r2,PACACURRENT(r13)
+	ld	r2,TASKTHREADPPR(r2)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
 	/*
 	 * Clear RI before restoring r13.  If we are returning to
 	 * userspace and we take an exception after restoring r13,
@@ -840,8 +844,10 @@ fast_exception_return:
 	 */
 	andi.	r0,r3,MSR_PR
 	beq	1f
+BEGIN_FTR_SECTION
+	mtspr	SPRN_PPR,r2	/* Restore PPR */
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ACCOUNT_CPU_USER_EXIT(r2, r4)
-	RESTORE_PPR(r2, r4)
 	REST_GPR(13, r1)
 1:
 	mtspr	SPRN_SRR1,r3
@@ -1017,7 +1023,7 @@ _GLOBAL(enter_rtas)
 	
         li      r9,1
         rldicr  r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
-	ori	r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI
+	ori	r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
 	andc	r6,r0,r9
 	sync				/* disable interrupts so SRR0/1 */
 	mtmsrd	r0			/* don't get trashed */
@@ -1032,6 +1038,8 @@ _GLOBAL(enter_rtas)
 	b	.	/* prevent speculative execution */
 
 _STATIC(rtas_return_loc)
+	FIXUP_ENDIAN
+
 	/* relocation is off at this point */
 	GET_PACA(r4)
 	clrldi	r4,r4,2			/* convert to realmode address */
@@ -1103,28 +1111,30 @@ _GLOBAL(enter_prom)
 	std	r10,_CCR(r1)
 	std	r11,_MSR(r1)
 
-	/* Get the PROM entrypoint */
-	mtlr	r4
+	/* Put PROM address in SRR0 */
+	mtsrr0	r4
 
-	/* Switch MSR to 32 bits mode
+	/* Setup our trampoline return addr in LR */
+	bcl	20,31,$+4
+0:	mflr	r4
+	addi	r4,r4,(1f - 0b)
+       	mtlr	r4
+
+	/* Prepare a 32-bit mode big endian MSR
 	 */
 #ifdef CONFIG_PPC_BOOK3E
 	rlwinm	r11,r11,0,1,31
-	mtmsr	r11
+	mtsrr1	r11
+	rfi
 #else /* CONFIG_PPC_BOOK3E */
-        mfmsr   r11
-        li      r12,1
-        rldicr  r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
-        andc    r11,r11,r12
-        li      r12,1
-        rldicr  r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
-        andc    r11,r11,r12
-        mtmsrd  r11
+	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
+	andc	r11,r11,r12
+	mtsrr1	r11
+	rfid
 #endif /* CONFIG_PPC_BOOK3E */
-        isync
 
-	/* Enter PROM here... */
-	blrl
+1:	/* Return from OF */
+	FIXUP_ENDIAN
 
 	/* Just make sure that r1 top 32 bits didn't get
 	 * corrupt by OF

+ 4 - 2
arch/powerpc/kernel/exceptions-64e.S

@@ -399,7 +399,7 @@ interrupt_end_book3e:
 
 /* Altivec Unavailable Interrupt */
 	START_EXCEPTION(altivec_unavailable);
-	NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL,
+	NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL,
 				PROLOG_ADDITION_NONE)
 	/* we can probably do a shorter exception entry for that one... */
 	EXCEPTION_COMMON(0x200, PACA_EXGEN, INTS_KEEP)
@@ -421,7 +421,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 
 /* AltiVec Assist */
 	START_EXCEPTION(altivec_assist);
-	NORMAL_EXCEPTION_PROLOG(0x220, BOOKE_INTERRUPT_ALTIVEC_ASSIST,
+	NORMAL_EXCEPTION_PROLOG(0x220,
+				BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST,
 				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x220, PACA_EXGEN, INTS_DISABLE)
 	bl	.save_nvgprs
@@ -607,6 +608,7 @@ kernel_dbg_exc:
 	NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
 				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE)
+	CHECK_NAPPING()
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.performance_monitor_exception
 	b	.ret_from_except_lite

+ 39 - 47
arch/powerpc/kernel/fpu.S

@@ -35,15 +35,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
 2:	REST_32VSRS(n,c,base);						\
 3:
 
-#define __REST_32FPVSRS_TRANSACT(n,c,base)				\
-BEGIN_FTR_SECTION							\
-	b	2f;							\
-END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
-	REST_32FPRS_TRANSACT(n,base);					\
-	b	3f;							\
-2:	REST_32VSRS_TRANSACT(n,c,base);					\
-3:
-
 #define __SAVE_32FPVSRS(n,c,base)					\
 BEGIN_FTR_SECTION							\
 	b	2f;							\
@@ -54,40 +45,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
 3:
 #else
 #define __REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base)
-#define __REST_32FPVSRS_TRANSACT(n,b,base)	REST_32FPRS(n, base)
 #define __SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base)
 #endif
 #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
-#define REST_32FPVSRS_TRANSACT(n,c,base) \
-	__REST_32FPVSRS_TRANSACT(n,__REG_##c,__REG_##base)
 #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Wrapper to call load_up_fpu from C.
- * void do_load_up_fpu(struct pt_regs *regs);
- */
-_GLOBAL(do_load_up_fpu)
-	mflr	r0
-	std	r0, 16(r1)
-	stdu	r1, -112(r1)
-
-	subi	r6, r3, STACK_FRAME_OVERHEAD
-	/* load_up_fpu expects r12=MSR, r13=PACA, and returns
-	 * with r12 = new MSR.
-	 */
-	ld	r12,_MSR(r6)
-	GET_PACA(r13)
-
-	bl	load_up_fpu
-	std	r12,_MSR(r6)
-
-	ld	r0, 112+16(r1)
-	addi	r1, r1, 112
-	mtlr	r0
-	blr
-
-
 /* void do_load_up_transact_fpu(struct thread_struct *thread)
  *
  * This is similar to load_up_fpu but for the transactional version of the FP
@@ -105,9 +68,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	SYNC
 	MTMSRD(r5)
 
-	lfd	fr0,THREAD_TRANSACT_FPSCR(r3)
+	addi	r7,r3,THREAD_TRANSACT_FPSTATE
+	lfd	fr0,FPSTATE_FPSCR(r7)
 	MTFSF_L(fr0)
-	REST_32FPVSRS_TRANSACT(0, R4, R3)
+	REST_32FPVSRS(0, R4, R7)
 
 	/* FP/VSX off again */
 	MTMSRD(r6)
@@ -116,12 +80,34 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	blr
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
+/*
+ * Load state from memory into FP registers including FPSCR.
+ * Assumes the caller has enabled FP in the MSR.
+ */
+_GLOBAL(load_fp_state)
+	lfd	fr0,FPSTATE_FPSCR(r3)
+	MTFSF_L(fr0)
+	REST_32FPVSRS(0, R4, R3)
+	blr
+
+/*
+ * Store FP state into memory, including FPSCR
+ * Assumes the caller has enabled FP in the MSR.
+ */
+_GLOBAL(store_fp_state)
+	SAVE_32FPVSRS(0, R4, R3)
+	mffs	fr0
+	stfd	fr0,FPSTATE_FPSCR(r3)
+	blr
+
 /*
  * This task wants to use the FPU now.
  * On UP, disable FP for the task which had the FPU previously,
  * and save its floating-point registers in its thread_struct.
  * Load up this task's FP registers from its thread_struct,
  * enable the FPU for the current task and return to the task.
+ * Note that on 32-bit this can only use registers that will be
+ * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
  */
 _GLOBAL(load_up_fpu)
 	mfmsr	r5
@@ -147,9 +133,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	beq	1f
 	toreal(r4)
 	addi	r4,r4,THREAD		/* want last_task_used_math->thread */
-	SAVE_32FPVSRS(0, R5, R4)
+	addi	r10,r4,THREAD_FPSTATE
+	SAVE_32FPVSRS(0, R5, R10)
 	mffs	fr0
-	stfd	fr0,THREAD_FPSCR(r4)
+	stfd	fr0,FPSTATE_FPSCR(r10)
 	PPC_LL	r5,PT_REGS(r4)
 	toreal(r5)
 	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
@@ -160,7 +147,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif /* CONFIG_SMP */
 	/* enable use of FP after return */
 #ifdef CONFIG_PPC32
-	mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */
+	mfspr	r5,SPRN_SPRG_THREAD	/* current task's THREAD (phys) */
 	lwz	r4,THREAD_FPEXC_MODE(r5)
 	ori	r9,r9,MSR_FP		/* enable FP for current */
 	or	r9,r9,r4
@@ -172,9 +159,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	or	r12,r12,r4
 	std	r12,_MSR(r1)
 #endif
-	lfd	fr0,THREAD_FPSCR(r5)
+	addi	r10,r5,THREAD_FPSTATE
+	lfd	fr0,FPSTATE_FPSCR(r10)
 	MTFSF_L(fr0)
-	REST_32FPVSRS(0, R4, R5)
+	REST_32FPVSRS(0, R4, R10)
 #ifndef CONFIG_SMP
 	subi	r4,r5,THREAD
 	fromreal(r4)
@@ -206,11 +194,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	PPC_LCMPI	0,r3,0
 	beqlr-				/* if no previous owner, done */
 	addi	r3,r3,THREAD	        /* want THREAD of task */
+	PPC_LL	r6,THREAD_FPSAVEAREA(r3)
 	PPC_LL	r5,PT_REGS(r3)
-	PPC_LCMPI	0,r5,0
-	SAVE_32FPVSRS(0, R4 ,R3)
+	PPC_LCMPI	0,r6,0
+	bne	2f
+	addi	r6,r3,THREAD_FPSTATE
+2:	PPC_LCMPI	0,r5,0
+	SAVE_32FPVSRS(0, R4, R6)
 	mffs	fr0
-	stfd	fr0,THREAD_FPSCR(r3)
+	stfd	fr0,FPSTATE_FPSCR(r6)
 	beq	1f
 	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 	li	r3,MSR_FP|MSR_FE0|MSR_FE1

+ 4 - 0
arch/powerpc/kernel/ftrace.c

@@ -174,7 +174,11 @@ __ftrace_make_nop(struct module *mod,
 
 	pr_devel(" %08x %08x\n", jmp[0], jmp[1]);
 
+#ifdef __LITTLE_ENDIAN__
+	ptr = ((unsigned long)jmp[1] << 32) + jmp[0];
+#else
 	ptr = ((unsigned long)jmp[0] << 32) + jmp[1];
+#endif
 
 	/* This should match what was called */
 	if (ptr != ppc_function_entry((void *)addr)) {

+ 3 - 0
arch/powerpc/kernel/head_64.S

@@ -68,6 +68,7 @@ _stext:
 _GLOBAL(__start)
 	/* NOP this out unconditionally */
 BEGIN_FTR_SECTION
+	FIXUP_ENDIAN
 	b	.__start_initialization_multiplatform
 END_FTR_SECTION(0, 1)
 
@@ -115,6 +116,7 @@ __run_at_load:
  */
 	.globl	__secondary_hold
 __secondary_hold:
+	FIXUP_ENDIAN
 #ifndef CONFIG_PPC_BOOK3E
 	mfmsr	r24
 	ori	r24,r24,MSR_RI
@@ -205,6 +207,7 @@ _GLOBAL(generic_secondary_thread_init)
  * as SCOM before entry).
  */
 _GLOBAL(generic_secondary_smp_init)
+	FIXUP_ENDIAN
 	mr	r24,r3
 	mr	r25,r4
 

+ 3 - 0
arch/powerpc/kernel/head_8xx.S

@@ -858,6 +858,9 @@ initial_mmu:
 	addis	r11, r11, 0x0080	/* Add 8M */
 	mtspr	SPRN_MD_RPN, r11
 
+	addi	r10, r10, 0x0100
+	mtspr	SPRN_MD_CTR, r10
+
 	addis	r8, r8, 0x0080		/* Add 8M */
 	mtspr	SPRN_MD_EPN, r8
 	mtspr	SPRN_MD_TWC, r9

+ 5 - 5
arch/powerpc/kernel/head_fsl_booke.S

@@ -555,27 +555,27 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
 #ifdef CONFIG_SPE
 	/* SPE Unavailable */
 	START_EXCEPTION(SPEUnavailable)
-	NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
+	NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL)
 	beq	1f
 	bl	load_up_spe
 	b	fast_exception_return
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
 	EXC_XFER_EE_LITE(0x2010, KernelSPE)
 #else
-	EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
+	EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \
 		  unknown_exception, EXC_XFER_EE)
 #endif /* CONFIG_SPE */
 
 	/* SPE Floating Point Data */
 #ifdef CONFIG_SPE
-	EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, \
-		  SPEFloatingPointException, EXC_XFER_EE);
+	EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
+		  SPEFloatingPointException, EXC_XFER_EE)
 
 	/* SPE Floating Point Round */
 	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
 		  SPEFloatingPointRoundException, EXC_XFER_EE)
 #else
-	EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, \
+	EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
 		  unknown_exception, EXC_XFER_EE)
 	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
 		  unknown_exception, EXC_XFER_EE)

+ 3 - 3
arch/powerpc/kernel/kgdb.c

@@ -151,15 +151,16 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
 	return 1;
 }
 
+static DEFINE_PER_CPU(struct thread_info, kgdb_thread_info);
 static int kgdb_singlestep(struct pt_regs *regs)
 {
 	struct thread_info *thread_info, *exception_thread_info;
-	struct thread_info *backup_current_thread_info;
+	struct thread_info *backup_current_thread_info =
+		&__get_cpu_var(kgdb_thread_info);
 
 	if (user_mode(regs))
 		return 0;
 
-	backup_current_thread_info = kmalloc(sizeof(struct thread_info), GFP_KERNEL);
 	/*
 	 * On Book E and perhaps other processors, singlestep is handled on
 	 * the critical exception stack.  This causes current_thread_info()
@@ -185,7 +186,6 @@ static int kgdb_singlestep(struct pt_regs *regs)
 		/* Restore current_thread_info lastly. */
 		memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info);
 
-	kfree(backup_current_thread_info);
 	return 1;
 }
 

+ 1 - 1
arch/powerpc/kernel/legacy_serial.c

@@ -35,7 +35,7 @@ static struct legacy_serial_info {
 	phys_addr_t			taddr;
 } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
 
-static struct __initdata of_device_id legacy_serial_parents[] = {
+static struct of_device_id legacy_serial_parents[] __initdata = {
 	{.type = "soc",},
 	{.type = "tsi-bridge",},
 	{.type = "opb", },

+ 1 - 1
arch/powerpc/kernel/machine_kexec_64.c

@@ -312,7 +312,7 @@ static union thread_union kexec_stack __init_task_data =
  */
 struct paca_struct kexec_paca;
 
-/* Our assembly helper, in kexec_stub.S */
+/* Our assembly helper, in misc_64.S */
 extern void kexec_sequence(void *newstack, unsigned long start,
 			   void *image, void *control,
 			   void (*clear_all)(void)) __noreturn;

+ 14 - 0
arch/powerpc/kernel/misc_32.S

@@ -658,6 +658,20 @@ _GLOBAL(__lshrdi3)
 	or	r4,r4,r7	# LSW |= t2
 	blr
 
+/*
+ * 64-bit comparison: __cmpdi2(s64 a, s64 b)
+ * Returns 0 if a < b, 1 if a == b, 2 if a > b.
+ */
+_GLOBAL(__cmpdi2)
+	cmpw	r3,r5
+	li	r3,1
+	bne	1f
+	cmplw	r4,r6
+	beqlr
+1:	li	r3,0
+	bltlr
+	li	r3,2
+	blr
 /*
  * 64-bit comparison: __ucmpdi2(u64 a, u64 b)
  * Returns 0 if a < b, 1 if a == b, 2 if a > b.

+ 1 - 2
arch/powerpc/kernel/module.c

@@ -25,8 +25,7 @@
 #include <asm/uaccess.h>
 #include <asm/firmware.h>
 #include <linux/sort.h>
-
-#include "setup.h"
+#include <asm/setup.h>
 
 LIST_HEAD(module_bug_list);
 

+ 1 - 2
arch/powerpc/kernel/module_32.c

@@ -26,8 +26,7 @@
 #include <linux/cache.h>
 #include <linux/bug.h>
 #include <linux/sort.h>
-
-#include "setup.h"
+#include <asm/setup.h>
 
 #if 0
 #define DEBUGP printk

+ 17 - 2
arch/powerpc/kernel/module_64.c

@@ -26,8 +26,7 @@
 #include <asm/firmware.h>
 #include <asm/code-patching.h>
 #include <linux/sort.h>
-
-#include "setup.h"
+#include <asm/setup.h>
 
 /* FIXME: We don't do .init separately.  To do this, we'd need to have
    a separate r2 value in the init and core section, and stub between
@@ -62,6 +61,16 @@ struct ppc64_stub_entry
    r2) into the stub. */
 static struct ppc64_stub_entry ppc64_stub =
 { .jump = {
+#ifdef __LITTLE_ENDIAN__
+	0x00, 0x00, 0x82, 0x3d, /* addis   r12,r2, <high> */
+	0x00, 0x00, 0x8c, 0x39, /* addi    r12,r12, <low> */
+	/* Save current r2 value in magic place on the stack. */
+	0x28, 0x00, 0x41, 0xf8, /* std     r2,40(r1) */
+	0x20, 0x00, 0x6c, 0xe9, /* ld      r11,32(r12) */
+	0x28, 0x00, 0x4c, 0xe8, /* ld      r2,40(r12) */
+	0xa6, 0x03, 0x69, 0x7d, /* mtctr   r11 */
+	0x20, 0x04, 0x80, 0x4e  /* bctr */
+#else
 	0x3d, 0x82, 0x00, 0x00, /* addis   r12,r2, <high> */
 	0x39, 0x8c, 0x00, 0x00, /* addi    r12,r12, <low> */
 	/* Save current r2 value in magic place on the stack. */
@@ -70,6 +79,7 @@ static struct ppc64_stub_entry ppc64_stub =
 	0xe8, 0x4c, 0x00, 0x28, /* ld      r2,40(r12) */
 	0x7d, 0x69, 0x03, 0xa6, /* mtctr   r11 */
 	0x4e, 0x80, 0x04, 0x20  /* bctr */
+#endif
 } };
 
 /* Count how many different 24-bit relocations (different symbol,
@@ -269,8 +279,13 @@ static inline int create_stub(Elf64_Shdr *sechdrs,
 
 	*entry = ppc64_stub;
 
+#ifdef __LITTLE_ENDIAN__
+	loc1 = (Elf64_Half *)&entry->jump[0];
+	loc2 = (Elf64_Half *)&entry->jump[4];
+#else
 	loc1 = (Elf64_Half *)&entry->jump[2];
 	loc2 = (Elf64_Half *)&entry->jump[6];
+#endif
 
 	/* Stub uses address relative to r2. */
 	reladdr = (unsigned long)entry - my_r2(sechdrs, me);

+ 8 - 2
arch/powerpc/kernel/nvram_64.c

@@ -223,9 +223,13 @@ static int __init nvram_write_header(struct nvram_partition * part)
 {
 	loff_t tmp_index;
 	int rc;
-	
+	struct nvram_header phead;
+
+	memcpy(&phead, &part->header, NVRAM_HEADER_LEN);
+	phead.length = cpu_to_be16(phead.length);
+
 	tmp_index = part->index;
-	rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index); 
+	rc = ppc_md.nvram_write((char *)&phead, NVRAM_HEADER_LEN, &tmp_index);
 
 	return rc;
 }
@@ -505,6 +509,8 @@ int __init nvram_scan_partitions(void)
 
 		memcpy(&phead, header, NVRAM_HEADER_LEN);
 
+		phead.length = be16_to_cpu(phead.length);
+
 		err = 0;
 		c_sum = nvram_checksum(&phead);
 		if (c_sum != phead.checksum) {

+ 3 - 3
arch/powerpc/kernel/paca.c

@@ -46,7 +46,7 @@ struct lppaca lppaca[] = {
 static struct lppaca *extra_lppacas;
 static long __initdata lppaca_size;
 
-static void allocate_lppacas(int nr_cpus, unsigned long limit)
+static void __init allocate_lppacas(int nr_cpus, unsigned long limit)
 {
 	if (nr_cpus <= NR_LPPACAS)
 		return;
@@ -57,7 +57,7 @@ static void allocate_lppacas(int nr_cpus, unsigned long limit)
 						 PAGE_SIZE, limit));
 }
 
-static struct lppaca *new_lppaca(int cpu)
+static struct lppaca * __init new_lppaca(int cpu)
 {
 	struct lppaca *lp;
 
@@ -70,7 +70,7 @@ static struct lppaca *new_lppaca(int cpu)
 	return lp;
 }
 
-static void free_lppacas(void)
+static void __init free_lppacas(void)
 {
 	long new_size = 0, nr;
 

+ 2 - 2
arch/powerpc/kernel/pci_of_scan.c

@@ -302,7 +302,7 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
 			    struct device_node *dn)
 {
 	struct pci_dev *dev = NULL;
-	const u32 *reg;
+	const __be32 *reg;
 	int reglen, devfn;
 
 	pr_debug("  * %s\n", dn->full_name);
@@ -312,7 +312,7 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
 	reg = of_get_property(dn, "reg", &reglen);
 	if (reg == NULL || reglen < 20)
 		return NULL;
-	devfn = (reg[0] >> 8) & 0xff;
+	devfn = (of_read_number(reg, 1) >> 8) & 0xff;
 
 	/* Check if the PCI device is already there */
 	dev = pci_get_slot(bus, devfn);

+ 10 - 0
arch/powerpc/kernel/ppc_ksyms.c

@@ -79,10 +79,12 @@ EXPORT_SYMBOL(strlen);
 EXPORT_SYMBOL(strcmp);
 EXPORT_SYMBOL(strncmp);
 
+#ifndef CONFIG_GENERIC_CSUM
 EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(csum_partial_copy_generic);
 EXPORT_SYMBOL(ip_fast_csum);
 EXPORT_SYMBOL(csum_tcpudp_magic);
+#endif
 
 EXPORT_SYMBOL(__copy_tofrom_user);
 EXPORT_SYMBOL(__clear_user);
@@ -98,9 +100,13 @@ EXPORT_SYMBOL(start_thread);
 
 #ifdef CONFIG_PPC_FPU
 EXPORT_SYMBOL(giveup_fpu);
+EXPORT_SYMBOL(load_fp_state);
+EXPORT_SYMBOL(store_fp_state);
 #endif
 #ifdef CONFIG_ALTIVEC
 EXPORT_SYMBOL(giveup_altivec);
+EXPORT_SYMBOL(load_vr_state);
+EXPORT_SYMBOL(store_vr_state);
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
 EXPORT_SYMBOL(giveup_vsx);
@@ -143,10 +149,14 @@ EXPORT_SYMBOL(__ashldi3);
 EXPORT_SYMBOL(__lshrdi3);
 int __ucmpdi2(unsigned long long, unsigned long long);
 EXPORT_SYMBOL(__ucmpdi2);
+int __cmpdi2(long long, long long);
+EXPORT_SYMBOL(__cmpdi2);
 #endif
 long long __bswapdi2(long long);
 EXPORT_SYMBOL(__bswapdi2);
+#ifdef __BIG_ENDIAN__
 EXPORT_SYMBOL(memcpy);
+#endif
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memcmp);

+ 35 - 32
arch/powerpc/kernel/process.c

@@ -314,28 +314,28 @@ static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk);
  */
 static void set_debug_reg_defaults(struct thread_struct *thread)
 {
-	thread->iac1 = thread->iac2 = 0;
+	thread->debug.iac1 = thread->debug.iac2 = 0;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
-	thread->iac3 = thread->iac4 = 0;
+	thread->debug.iac3 = thread->debug.iac4 = 0;
 #endif
-	thread->dac1 = thread->dac2 = 0;
+	thread->debug.dac1 = thread->debug.dac2 = 0;
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-	thread->dvc1 = thread->dvc2 = 0;
+	thread->debug.dvc1 = thread->debug.dvc2 = 0;
 #endif
-	thread->dbcr0 = 0;
+	thread->debug.dbcr0 = 0;
 #ifdef CONFIG_BOOKE
 	/*
 	 * Force User/Supervisor bits to b11 (user-only MSR[PR]=1)
 	 */
-	thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |	\
+	thread->debug.dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
 			DBCR1_IAC3US | DBCR1_IAC4US;
 	/*
 	 * Force Data Address Compare User/Supervisor bits to be User-only
 	 * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0.
 	 */
-	thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+	thread->debug.dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
 #else
-	thread->dbcr1 = 0;
+	thread->debug.dbcr1 = 0;
 #endif
 }
 
@@ -348,22 +348,22 @@ static void prime_debug_regs(struct thread_struct *thread)
 	 */
 	mtmsr(mfmsr() & ~MSR_DE);
 
-	mtspr(SPRN_IAC1, thread->iac1);
-	mtspr(SPRN_IAC2, thread->iac2);
+	mtspr(SPRN_IAC1, thread->debug.iac1);
+	mtspr(SPRN_IAC2, thread->debug.iac2);
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
-	mtspr(SPRN_IAC3, thread->iac3);
-	mtspr(SPRN_IAC4, thread->iac4);
+	mtspr(SPRN_IAC3, thread->debug.iac3);
+	mtspr(SPRN_IAC4, thread->debug.iac4);
 #endif
-	mtspr(SPRN_DAC1, thread->dac1);
-	mtspr(SPRN_DAC2, thread->dac2);
+	mtspr(SPRN_DAC1, thread->debug.dac1);
+	mtspr(SPRN_DAC2, thread->debug.dac2);
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-	mtspr(SPRN_DVC1, thread->dvc1);
-	mtspr(SPRN_DVC2, thread->dvc2);
+	mtspr(SPRN_DVC1, thread->debug.dvc1);
+	mtspr(SPRN_DVC2, thread->debug.dvc2);
 #endif
-	mtspr(SPRN_DBCR0, thread->dbcr0);
-	mtspr(SPRN_DBCR1, thread->dbcr1);
+	mtspr(SPRN_DBCR0, thread->debug.dbcr0);
+	mtspr(SPRN_DBCR1, thread->debug.dbcr1);
 #ifdef CONFIG_BOOKE
-	mtspr(SPRN_DBCR2, thread->dbcr2);
+	mtspr(SPRN_DBCR2, thread->debug.dbcr2);
 #endif
 }
 /*
@@ -371,12 +371,13 @@ static void prime_debug_regs(struct thread_struct *thread)
  * debug registers, set the debug registers from the values
  * stored in the new thread.
  */
-static void switch_booke_debug_regs(struct thread_struct *new_thread)
+void switch_booke_debug_regs(struct thread_struct *new_thread)
 {
-	if ((current->thread.dbcr0 & DBCR0_IDM)
-		|| (new_thread->dbcr0 & DBCR0_IDM))
+	if ((current->thread.debug.dbcr0 & DBCR0_IDM)
+		|| (new_thread->debug.dbcr0 & DBCR0_IDM))
 			prime_debug_regs(new_thread);
 }
+EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
 #else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
 static void set_debug_reg_defaults(struct thread_struct *thread)
@@ -596,12 +597,13 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	struct task_struct *new)
 {
 	struct thread_struct *new_thread, *old_thread;
-	unsigned long flags;
 	struct task_struct *last;
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct ppc64_tlb_batch *batch;
 #endif
 
+	WARN_ON(!irqs_disabled());
+
 	/* Back up the TAR across context switches.
 	 * Note that the TAR is not available for use in the kernel.  (To
 	 * provide this, the TAR should be backed up/restored on exception
@@ -721,8 +723,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	}
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
-	local_irq_save(flags);
-
 	/*
 	 * We can't take a PMU exception inside _switch() since there is a
 	 * window where the kernel stack SLB and the kernel stack are out
@@ -742,8 +742,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	}
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
-	local_irq_restore(flags);
-
 	return last;
 }
 
@@ -1008,6 +1006,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	p->thread.ptrace_bps[0] = NULL;
 #endif
 
+	p->thread.fp_save_area = NULL;
+#ifdef CONFIG_ALTIVEC
+	p->thread.vr_save_area = NULL;
+#endif
+
 #ifdef CONFIG_PPC_STD_MMU_64
 	if (mmu_has_feature(MMU_FTR_SLB)) {
 		unsigned long sp_vsid;
@@ -1113,12 +1116,12 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 #ifdef CONFIG_VSX
 	current->thread.used_vsr = 0;
 #endif
-	memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
-	current->thread.fpscr.val = 0;
+	memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
+	current->thread.fp_save_area = NULL;
 #ifdef CONFIG_ALTIVEC
-	memset(current->thread.vr, 0, sizeof(current->thread.vr));
-	memset(&current->thread.vscr, 0, sizeof(current->thread.vscr));
-	current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */
+	memset(&current->thread.vr_state, 0, sizeof(current->thread.vr_state));
+	current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */
+	current->thread.vr_save_area = NULL;
 	current->thread.vrsave = 0;
 	current->thread.used_vr = 0;
 #endif /* CONFIG_ALTIVEC */

+ 0 - 31
arch/powerpc/kernel/prom.c

@@ -760,37 +760,6 @@ void __init early_init_devtree(void *params)
  *
  *******/
 
-/**
- *	of_find_next_cache_node - Find a node's subsidiary cache
- *	@np:	node of type "cpu" or "cache"
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.  Caller should hold a reference
- *	to np.
- */
-struct device_node *of_find_next_cache_node(struct device_node *np)
-{
-	struct device_node *child;
-	const phandle *handle;
-
-	handle = of_get_property(np, "l2-cache", NULL);
-	if (!handle)
-		handle = of_get_property(np, "next-level-cache", NULL);
-
-	if (handle)
-		return of_find_node_by_phandle(*handle);
-
-	/* OF on pmac has nodes instead of properties named "l2-cache"
-	 * beneath CPU nodes.
-	 */
-	if (!strcmp(np->type, "cpu"))
-		for_each_child_of_node(np, child)
-			if (!strcmp(child->type, "cache"))
-				return child;
-
-	return NULL;
-}
-
 /**
  * of_get_ibm_chip_id - Returns the IBM "chip-id" of a device
  * @np: device node of the device

+ 22 - 6
arch/powerpc/kernel/prom_init.c

@@ -858,7 +858,8 @@ static void __init prom_send_capabilities(void)
 {
 	ihandle root;
 	prom_arg_t ret;
-	__be32 *cores;
+	u32 cores;
+	unsigned char *ptcores;
 
 	root = call_prom("open", 1, 1, ADDR("/"));
 	if (root != 0) {
@@ -868,15 +869,30 @@ static void __init prom_send_capabilities(void)
 		 * (we assume this is the same for all cores) and use it to
 		 * divide NR_CPUS.
 		 */
-		cores = (__be32 *)&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET];
-		if (be32_to_cpup(cores) != NR_CPUS) {
+
+		/* The core value may start at an odd address. If such a word
+		 * access is made at a cache line boundary, this leads to an
+		 * exception which may not be handled at this time.
+		 * Forcing a per byte access to avoid exception.
+		 */
+		ptcores = &ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET];
+		cores = 0;
+		cores |= ptcores[0] << 24;
+		cores |= ptcores[1] << 16;
+		cores |= ptcores[2] << 8;
+		cores |= ptcores[3];
+		if (cores != NR_CPUS) {
 			prom_printf("WARNING ! "
 				    "ibm_architecture_vec structure inconsistent: %lu!\n",
-				    be32_to_cpup(cores));
+				    cores);
 		} else {
-			*cores = cpu_to_be32(DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()));
+			cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());
 			prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n",
-				    be32_to_cpup(cores), NR_CPUS);
+				    cores, NR_CPUS);
+			ptcores[0] = (cores >> 24) & 0xff;
+			ptcores[1] = (cores >> 16) & 0xff;
+			ptcores[2] = (cores >> 8) & 0xff;
+			ptcores[3] = cores & 0xff;
 		}
 
 		/* try calling the ibm,client-architecture-support method */

+ 105 - 104
arch/powerpc/kernel/ptrace.c

@@ -362,7 +362,7 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
 		   void *kbuf, void __user *ubuf)
 {
 #ifdef CONFIG_VSX
-	double buf[33];
+	u64 buf[33];
 	int i;
 #endif
 	flush_fp_to_thread(target);
@@ -371,15 +371,15 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
 	/* copy to local buffer then write that out */
 	for (i = 0; i < 32 ; i++)
 		buf[i] = target->thread.TS_FPR(i);
-	memcpy(&buf[32], &target->thread.fpscr, sizeof(double));
+	buf[32] = target->thread.fp_state.fpscr;
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
 
 #else
-	BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) !=
-		     offsetof(struct thread_struct, TS_FPR(32)));
+	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+		     offsetof(struct thread_fp_state, fpr[32][0]));
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   &target->thread.fpr, 0, -1);
+				   &target->thread.fp_state, 0, -1);
 #endif
 }
 
@@ -388,7 +388,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 		   const void *kbuf, const void __user *ubuf)
 {
 #ifdef CONFIG_VSX
-	double buf[33];
+	u64 buf[33];
 	int i;
 #endif
 	flush_fp_to_thread(target);
@@ -400,14 +400,14 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 		return i;
 	for (i = 0; i < 32 ; i++)
 		target->thread.TS_FPR(i) = buf[i];
-	memcpy(&target->thread.fpscr, &buf[32], sizeof(double));
+	target->thread.fp_state.fpscr = buf[32];
 	return 0;
 #else
-	BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) !=
-		     offsetof(struct thread_struct, TS_FPR(32)));
+	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+		     offsetof(struct thread_fp_state, fpr[32][0]));
 
 	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				  &target->thread.fpr, 0, -1);
+				  &target->thread.fp_state, 0, -1);
 #endif
 }
 
@@ -440,11 +440,11 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
 
 	flush_altivec_to_thread(target);
 
-	BUILD_BUG_ON(offsetof(struct thread_struct, vscr) !=
-		     offsetof(struct thread_struct, vr[32]));
+	BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+		     offsetof(struct thread_vr_state, vr[32]));
 
 	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				  &target->thread.vr, 0,
+				  &target->thread.vr_state, 0,
 				  33 * sizeof(vector128));
 	if (!ret) {
 		/*
@@ -471,11 +471,12 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,
 
 	flush_altivec_to_thread(target);
 
-	BUILD_BUG_ON(offsetof(struct thread_struct, vscr) !=
-		     offsetof(struct thread_struct, vr[32]));
+	BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+		     offsetof(struct thread_vr_state, vr[32]));
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				 &target->thread.vr, 0, 33 * sizeof(vector128));
+				 &target->thread.vr_state, 0,
+				 33 * sizeof(vector128));
 	if (!ret && count > 0) {
 		/*
 		 * We use only the first word of vrsave.
@@ -514,13 +515,13 @@ static int vsr_get(struct task_struct *target, const struct user_regset *regset,
 		   unsigned int pos, unsigned int count,
 		   void *kbuf, void __user *ubuf)
 {
-	double buf[32];
+	u64 buf[32];
 	int ret, i;
 
 	flush_vsx_to_thread(target);
 
 	for (i = 0; i < 32 ; i++)
-		buf[i] = target->thread.fpr[i][TS_VSRLOWOFFSET];
+		buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
 	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 				  buf, 0, 32 * sizeof(double));
 
@@ -531,7 +532,7 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset,
 		   unsigned int pos, unsigned int count,
 		   const void *kbuf, const void __user *ubuf)
 {
-	double buf[32];
+	u64 buf[32];
 	int ret,i;
 
 	flush_vsx_to_thread(target);
@@ -539,7 +540,7 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset,
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				 buf, 0, 32 * sizeof(double));
 	for (i = 0; i < 32 ; i++)
-		target->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+		target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
 
 
 	return ret;
@@ -657,7 +658,7 @@ static const struct user_regset native_regsets[] = {
 #endif
 #ifdef CONFIG_SPE
 	[REGSET_SPE] = {
-		.n = 35,
+		.core_note_type = NT_PPC_SPE, .n = 35,
 		.size = sizeof(u32), .align = sizeof(u32),
 		.active = evr_active, .get = evr_get, .set = evr_set
 	},
@@ -854,8 +855,8 @@ void user_enable_single_step(struct task_struct *task)
 
 	if (regs != NULL) {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-		task->thread.dbcr0 &= ~DBCR0_BT;
-		task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+		task->thread.debug.dbcr0 &= ~DBCR0_BT;
+		task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
 		regs->msr |= MSR_DE;
 #else
 		regs->msr &= ~MSR_BE;
@@ -871,8 +872,8 @@ void user_enable_block_step(struct task_struct *task)
 
 	if (regs != NULL) {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-		task->thread.dbcr0 &= ~DBCR0_IC;
-		task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT;
+		task->thread.debug.dbcr0 &= ~DBCR0_IC;
+		task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
 		regs->msr |= MSR_DE;
 #else
 		regs->msr &= ~MSR_SE;
@@ -894,16 +895,16 @@ void user_disable_single_step(struct task_struct *task)
 		 * And, after doing so, if all debug flags are off, turn
 		 * off DBCR0(IDM) and MSR(DE) .... Torez
 		 */
-		task->thread.dbcr0 &= ~DBCR0_IC;
+		task->thread.debug.dbcr0 &= ~(DBCR0_IC|DBCR0_BT);
 		/*
 		 * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
 		 */
-		if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0,
-					task->thread.dbcr1)) {
+		if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+					task->thread.debug.dbcr1)) {
 			/*
 			 * All debug events were off.....
 			 */
-			task->thread.dbcr0 &= ~DBCR0_IDM;
+			task->thread.debug.dbcr0 &= ~DBCR0_IDM;
 			regs->msr &= ~MSR_DE;
 		}
 #else
@@ -1022,14 +1023,14 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	 */
 
 	/* DAC's hold the whole address without any mode flags */
-	task->thread.dac1 = data & ~0x3UL;
+	task->thread.debug.dac1 = data & ~0x3UL;
 
-	if (task->thread.dac1 == 0) {
+	if (task->thread.debug.dac1 == 0) {
 		dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
-		if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0,
-					task->thread.dbcr1)) {
+		if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+					task->thread.debug.dbcr1)) {
 			task->thread.regs->msr &= ~MSR_DE;
-			task->thread.dbcr0 &= ~DBCR0_IDM;
+			task->thread.debug.dbcr0 &= ~DBCR0_IDM;
 		}
 		return 0;
 	}
@@ -1041,7 +1042,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 
 	/* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
 	   register */
-	task->thread.dbcr0 |= DBCR0_IDM;
+	task->thread.debug.dbcr0 |= DBCR0_IDM;
 
 	/* Check for write and read flags and set DBCR0
 	   accordingly */
@@ -1071,10 +1072,10 @@ static long set_instruction_bp(struct task_struct *child,
 			      struct ppc_hw_breakpoint *bp_info)
 {
 	int slot;
-	int slot1_in_use = ((child->thread.dbcr0 & DBCR0_IAC1) != 0);
-	int slot2_in_use = ((child->thread.dbcr0 & DBCR0_IAC2) != 0);
-	int slot3_in_use = ((child->thread.dbcr0 & DBCR0_IAC3) != 0);
-	int slot4_in_use = ((child->thread.dbcr0 & DBCR0_IAC4) != 0);
+	int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
+	int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
+	int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
+	int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
 
 	if (dbcr_iac_range(child) & DBCR_IAC12MODE)
 		slot2_in_use = 1;
@@ -1093,9 +1094,9 @@ static long set_instruction_bp(struct task_struct *child,
 		/* We need a pair of IAC regsisters */
 		if ((!slot1_in_use) && (!slot2_in_use)) {
 			slot = 1;
-			child->thread.iac1 = bp_info->addr;
-			child->thread.iac2 = bp_info->addr2;
-			child->thread.dbcr0 |= DBCR0_IAC1;
+			child->thread.debug.iac1 = bp_info->addr;
+			child->thread.debug.iac2 = bp_info->addr2;
+			child->thread.debug.dbcr0 |= DBCR0_IAC1;
 			if (bp_info->addr_mode ==
 					PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
 				dbcr_iac_range(child) |= DBCR_IAC12X;
@@ -1104,9 +1105,9 @@ static long set_instruction_bp(struct task_struct *child,
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
 		} else if ((!slot3_in_use) && (!slot4_in_use)) {
 			slot = 3;
-			child->thread.iac3 = bp_info->addr;
-			child->thread.iac4 = bp_info->addr2;
-			child->thread.dbcr0 |= DBCR0_IAC3;
+			child->thread.debug.iac3 = bp_info->addr;
+			child->thread.debug.iac4 = bp_info->addr2;
+			child->thread.debug.dbcr0 |= DBCR0_IAC3;
 			if (bp_info->addr_mode ==
 					PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
 				dbcr_iac_range(child) |= DBCR_IAC34X;
@@ -1126,30 +1127,30 @@ static long set_instruction_bp(struct task_struct *child,
 			 */
 			if (slot2_in_use || (slot3_in_use == slot4_in_use)) {
 				slot = 1;
-				child->thread.iac1 = bp_info->addr;
-				child->thread.dbcr0 |= DBCR0_IAC1;
+				child->thread.debug.iac1 = bp_info->addr;
+				child->thread.debug.dbcr0 |= DBCR0_IAC1;
 				goto out;
 			}
 		}
 		if (!slot2_in_use) {
 			slot = 2;
-			child->thread.iac2 = bp_info->addr;
-			child->thread.dbcr0 |= DBCR0_IAC2;
+			child->thread.debug.iac2 = bp_info->addr;
+			child->thread.debug.dbcr0 |= DBCR0_IAC2;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
 		} else if (!slot3_in_use) {
 			slot = 3;
-			child->thread.iac3 = bp_info->addr;
-			child->thread.dbcr0 |= DBCR0_IAC3;
+			child->thread.debug.iac3 = bp_info->addr;
+			child->thread.debug.dbcr0 |= DBCR0_IAC3;
 		} else if (!slot4_in_use) {
 			slot = 4;
-			child->thread.iac4 = bp_info->addr;
-			child->thread.dbcr0 |= DBCR0_IAC4;
+			child->thread.debug.iac4 = bp_info->addr;
+			child->thread.debug.dbcr0 |= DBCR0_IAC4;
 #endif
 		} else
 			return -ENOSPC;
 	}
 out:
-	child->thread.dbcr0 |= DBCR0_IDM;
+	child->thread.debug.dbcr0 |= DBCR0_IDM;
 	child->thread.regs->msr |= MSR_DE;
 
 	return slot;
@@ -1159,49 +1160,49 @@ static int del_instruction_bp(struct task_struct *child, int slot)
 {
 	switch (slot) {
 	case 1:
-		if ((child->thread.dbcr0 & DBCR0_IAC1) == 0)
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
 			return -ENOENT;
 
 		if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
 			/* address range - clear slots 1 & 2 */
-			child->thread.iac2 = 0;
+			child->thread.debug.iac2 = 0;
 			dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
 		}
-		child->thread.iac1 = 0;
-		child->thread.dbcr0 &= ~DBCR0_IAC1;
+		child->thread.debug.iac1 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
 		break;
 	case 2:
-		if ((child->thread.dbcr0 & DBCR0_IAC2) == 0)
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
 			return -ENOENT;
 
 		if (dbcr_iac_range(child) & DBCR_IAC12MODE)
 			/* used in a range */
 			return -EINVAL;
-		child->thread.iac2 = 0;
-		child->thread.dbcr0 &= ~DBCR0_IAC2;
+		child->thread.debug.iac2 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
 		break;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
 	case 3:
-		if ((child->thread.dbcr0 & DBCR0_IAC3) == 0)
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
 			return -ENOENT;
 
 		if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
 			/* address range - clear slots 3 & 4 */
-			child->thread.iac4 = 0;
+			child->thread.debug.iac4 = 0;
 			dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
 		}
-		child->thread.iac3 = 0;
-		child->thread.dbcr0 &= ~DBCR0_IAC3;
+		child->thread.debug.iac3 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
 		break;
 	case 4:
-		if ((child->thread.dbcr0 & DBCR0_IAC4) == 0)
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
 			return -ENOENT;
 
 		if (dbcr_iac_range(child) & DBCR_IAC34MODE)
 			/* Used in a range */
 			return -EINVAL;
-		child->thread.iac4 = 0;
-		child->thread.dbcr0 &= ~DBCR0_IAC4;
+		child->thread.debug.iac4 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
 		break;
 #endif
 	default:
@@ -1231,18 +1232,18 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
 			dbcr_dac(child) |= DBCR_DAC1R;
 		if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
 			dbcr_dac(child) |= DBCR_DAC1W;
-		child->thread.dac1 = (unsigned long)bp_info->addr;
+		child->thread.debug.dac1 = (unsigned long)bp_info->addr;
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
 		if (byte_enable) {
-			child->thread.dvc1 =
+			child->thread.debug.dvc1 =
 				(unsigned long)bp_info->condition_value;
-			child->thread.dbcr2 |=
+			child->thread.debug.dbcr2 |=
 				((byte_enable << DBCR2_DVC1BE_SHIFT) |
 				 (condition_mode << DBCR2_DVC1M_SHIFT));
 		}
 #endif
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-	} else if (child->thread.dbcr2 & DBCR2_DAC12MODE) {
+	} else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
 		/* Both dac1 and dac2 are part of a range */
 		return -ENOSPC;
 #endif
@@ -1252,19 +1253,19 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
 			dbcr_dac(child) |= DBCR_DAC2R;
 		if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
 			dbcr_dac(child) |= DBCR_DAC2W;
-		child->thread.dac2 = (unsigned long)bp_info->addr;
+		child->thread.debug.dac2 = (unsigned long)bp_info->addr;
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
 		if (byte_enable) {
-			child->thread.dvc2 =
+			child->thread.debug.dvc2 =
 				(unsigned long)bp_info->condition_value;
-			child->thread.dbcr2 |=
+			child->thread.debug.dbcr2 |=
 				((byte_enable << DBCR2_DVC2BE_SHIFT) |
 				 (condition_mode << DBCR2_DVC2M_SHIFT));
 		}
 #endif
 	} else
 		return -ENOSPC;
-	child->thread.dbcr0 |= DBCR0_IDM;
+	child->thread.debug.dbcr0 |= DBCR0_IDM;
 	child->thread.regs->msr |= MSR_DE;
 
 	return slot + 4;
@@ -1276,32 +1277,32 @@ static int del_dac(struct task_struct *child, int slot)
 		if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
 			return -ENOENT;
 
-		child->thread.dac1 = 0;
+		child->thread.debug.dac1 = 0;
 		dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-		if (child->thread.dbcr2 & DBCR2_DAC12MODE) {
-			child->thread.dac2 = 0;
-			child->thread.dbcr2 &= ~DBCR2_DAC12MODE;
+		if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+			child->thread.debug.dac2 = 0;
+			child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
 		}
-		child->thread.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
+		child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
 #endif
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-		child->thread.dvc1 = 0;
+		child->thread.debug.dvc1 = 0;
 #endif
 	} else if (slot == 2) {
 		if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
 			return -ENOENT;
 
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-		if (child->thread.dbcr2 & DBCR2_DAC12MODE)
+		if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
 			/* Part of a range */
 			return -EINVAL;
-		child->thread.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
+		child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
 #endif
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-		child->thread.dvc2 = 0;
+		child->thread.debug.dvc2 = 0;
 #endif
-		child->thread.dac2 = 0;
+		child->thread.debug.dac2 = 0;
 		dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
 	} else
 		return -EINVAL;
@@ -1343,22 +1344,22 @@ static int set_dac_range(struct task_struct *child,
 			return -EIO;
 	}
 
-	if (child->thread.dbcr0 &
+	if (child->thread.debug.dbcr0 &
 	    (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
 		return -ENOSPC;
 
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
-		child->thread.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
+		child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
-		child->thread.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
-	child->thread.dac1 = bp_info->addr;
-	child->thread.dac2 = bp_info->addr2;
+		child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
+	child->thread.debug.dac1 = bp_info->addr;
+	child->thread.debug.dac2 = bp_info->addr2;
 	if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
-		child->thread.dbcr2  |= DBCR2_DAC12M;
+		child->thread.debug.dbcr2  |= DBCR2_DAC12M;
 	else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
-		child->thread.dbcr2  |= DBCR2_DAC12MX;
+		child->thread.debug.dbcr2  |= DBCR2_DAC12MX;
 	else	/* PPC_BREAKPOINT_MODE_MASK */
-		child->thread.dbcr2  |= DBCR2_DAC12MM;
+		child->thread.debug.dbcr2  |= DBCR2_DAC12MM;
 	child->thread.regs->msr |= MSR_DE;
 
 	return 5;
@@ -1489,9 +1490,9 @@ static long ppc_del_hwdebug(struct task_struct *child, long data)
 		rc = del_dac(child, (int)data - 4);
 
 	if (!rc) {
-		if (!DBCR_ACTIVE_EVENTS(child->thread.dbcr0,
-					child->thread.dbcr1)) {
-			child->thread.dbcr0 &= ~DBCR0_IDM;
+		if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
+					child->thread.debug.dbcr1)) {
+			child->thread.debug.dbcr0 &= ~DBCR0_IDM;
 			child->thread.regs->msr &= ~MSR_DE;
 		}
 	}
@@ -1554,10 +1555,10 @@ long arch_ptrace(struct task_struct *child, long request,
 
 			flush_fp_to_thread(child);
 			if (fpidx < (PT_FPSCR - PT_FPR0))
-				tmp = ((unsigned long *)child->thread.fpr)
-					[fpidx * TS_FPRWIDTH];
+				memcpy(&tmp, &child->thread.fp_state.fpr,
+				       sizeof(long));
 			else
-				tmp = child->thread.fpscr.val;
+				tmp = child->thread.fp_state.fpscr;
 		}
 		ret = put_user(tmp, datalp);
 		break;
@@ -1587,10 +1588,10 @@ long arch_ptrace(struct task_struct *child, long request,
 
 			flush_fp_to_thread(child);
 			if (fpidx < (PT_FPSCR - PT_FPR0))
-				((unsigned long *)child->thread.fpr)
-					[fpidx * TS_FPRWIDTH] = data;
+				memcpy(&child->thread.fp_state.fpr, &data,
+				       sizeof(long));
 			else
-				child->thread.fpscr.val = data;
+				child->thread.fp_state.fpscr = data;
 			ret = 0;
 		}
 		break;
@@ -1669,7 +1670,7 @@ long arch_ptrace(struct task_struct *child, long request,
 		if (addr > 0)
 			break;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-		ret = put_user(child->thread.dac1, datalp);
+		ret = put_user(child->thread.debug.dac1, datalp);
 #else
 		dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
 			     (child->thread.hw_brk.type & HW_BRK_TYPE_DABR));

+ 5 - 8
arch/powerpc/kernel/ptrace32.c

@@ -43,7 +43,6 @@
 #define FPRNUMBER(i) (((i) - PT_FPR0) >> 1)
 #define FPRHALF(i) (((i) - PT_FPR0) & 1)
 #define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) * 2 + FPRHALF(i)
-#define FPRINDEX_3264(i) (TS_FPRWIDTH * ((i) - PT_FPR0))
 
 long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			compat_ulong_t caddr, compat_ulong_t cdata)
@@ -105,7 +104,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			 * to be an array of unsigned int (32 bits) - the
 			 * index passed in is based on this assumption.
 			 */
-			tmp = ((unsigned int *)child->thread.fpr)
+			tmp = ((unsigned int *)child->thread.fp_state.fpr)
 				[FPRINDEX(index)];
 		}
 		ret = put_user((unsigned int)tmp, (u32 __user *)data);
@@ -147,8 +146,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		if (numReg >= PT_FPR0) {
 			flush_fp_to_thread(child);
 			/* get 64 bit FPR */
-			tmp = ((u64 *)child->thread.fpr)
-				[FPRINDEX_3264(numReg)];
+			tmp = child->thread.fp_state.fpr[numReg - PT_FPR0][0];
 		} else { /* register within PT_REGS struct */
 			unsigned long tmp2;
 			ret = ptrace_get_reg(child, numReg, &tmp2);
@@ -207,7 +205,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			 * to be an array of unsigned int (32 bits) - the
 			 * index passed in is based on this assumption.
 			 */
-			((unsigned int *)child->thread.fpr)
+			((unsigned int *)child->thread.fp_state.fpr)
 				[FPRINDEX(index)] = data;
 			ret = 0;
 		}
@@ -251,8 +249,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			u64 *tmp;
 			flush_fp_to_thread(child);
 			/* get 64 bit FPR ... */
-			tmp = &(((u64 *)child->thread.fpr)
-				[FPRINDEX_3264(numReg)]);
+			tmp = &child->thread.fp_state.fpr[numReg - PT_FPR0][0];
 			/* ... write the 32 bit part we want */
 			((u32 *)tmp)[index % 2] = data;
 			ret = 0;
@@ -269,7 +266,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		if (addr > 0)
 			break;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-		ret = put_user(child->thread.dac1, (u32 __user *)data);
+		ret = put_user(child->thread.debug.dac1, (u32 __user *)data);
 #else
 		dabr_fake = (
 			(child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |

+ 3 - 3
arch/powerpc/kernel/rtas_pci.c

@@ -223,7 +223,7 @@ unsigned long get_phb_buid(struct device_node *phb)
 static int phb_set_bus_ranges(struct device_node *dev,
 			      struct pci_controller *phb)
 {
-	const int *bus_range;
+	const __be32 *bus_range;
 	unsigned int len;
 
 	bus_range = of_get_property(dev, "bus-range", &len);
@@ -231,8 +231,8 @@ static int phb_set_bus_ranges(struct device_node *dev,
 		return 1;
  	}
 
-	phb->first_busno =  bus_range[0];
-	phb->last_busno  =  bus_range[1];
+	phb->first_busno = be32_to_cpu(bus_range[0]);
+	phb->last_busno  = be32_to_cpu(bus_range[1]);
 
 	return 0;
 }

+ 0 - 2
arch/powerpc/kernel/setup-common.c

@@ -62,8 +62,6 @@
 #include <mm/mmu_decl.h>
 #include <asm/fadump.h>
 
-#include "setup.h"
-
 #ifdef DEBUG
 #include <asm/udbg.h>
 #define DBG(fmt...) udbg_printf(fmt)

+ 0 - 9
arch/powerpc/kernel/setup.h

@@ -1,9 +0,0 @@
-#ifndef _POWERPC_KERNEL_SETUP_H
-#define _POWERPC_KERNEL_SETUP_H
-
-void check_for_initrd(void);
-void do_init_bootmem(void);
-void setup_panic(void);
-extern int do_early_xmon;
-
-#endif /* _POWERPC_KERNEL_SETUP_H */

+ 0 - 2
arch/powerpc/kernel/setup_32.c

@@ -40,8 +40,6 @@
 #include <asm/mmu_context.h>
 #include <asm/epapr_hcalls.h>
 
-#include "setup.h"
-
 #define DBG(fmt...)
 
 extern void bootx_init(unsigned long r4, unsigned long phys);

+ 0 - 2
arch/powerpc/kernel/setup_64.c

@@ -68,8 +68,6 @@
 #include <asm/hugetlb.h>
 #include <asm/epapr_hcalls.h>
 
-#include "setup.h"
-
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
 #else

+ 42 - 39
arch/powerpc/kernel/signal_32.c

@@ -265,27 +265,27 @@ struct rt_sigframe {
 unsigned long copy_fpr_to_user(void __user *to,
 			       struct task_struct *task)
 {
-	double buf[ELF_NFPREG];
+	u64 buf[ELF_NFPREG];
 	int i;
 
 	/* save FPR copy to local buffer then write to the thread_struct */
 	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
 		buf[i] = task->thread.TS_FPR(i);
-	memcpy(&buf[i], &task->thread.fpscr, sizeof(double));
+	buf[i] = task->thread.fp_state.fpscr;
 	return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
 }
 
 unsigned long copy_fpr_from_user(struct task_struct *task,
 				 void __user *from)
 {
-	double buf[ELF_NFPREG];
+	u64 buf[ELF_NFPREG];
 	int i;
 
 	if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
 		return 1;
 	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
 		task->thread.TS_FPR(i) = buf[i];
-	memcpy(&task->thread.fpscr, &buf[i], sizeof(double));
+	task->thread.fp_state.fpscr = buf[i];
 
 	return 0;
 }
@@ -293,25 +293,25 @@ unsigned long copy_fpr_from_user(struct task_struct *task,
 unsigned long copy_vsx_to_user(void __user *to,
 			       struct task_struct *task)
 {
-	double buf[ELF_NVSRHALFREG];
+	u64 buf[ELF_NVSRHALFREG];
 	int i;
 
 	/* save FPR copy to local buffer then write to the thread_struct */
 	for (i = 0; i < ELF_NVSRHALFREG; i++)
-		buf[i] = task->thread.fpr[i][TS_VSRLOWOFFSET];
+		buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
 	return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
 }
 
 unsigned long copy_vsx_from_user(struct task_struct *task,
 				 void __user *from)
 {
-	double buf[ELF_NVSRHALFREG];
+	u64 buf[ELF_NVSRHALFREG];
 	int i;
 
 	if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
 		return 1;
 	for (i = 0; i < ELF_NVSRHALFREG ; i++)
-		task->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+		task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
 	return 0;
 }
 
@@ -319,27 +319,27 @@ unsigned long copy_vsx_from_user(struct task_struct *task,
 unsigned long copy_transact_fpr_to_user(void __user *to,
 				  struct task_struct *task)
 {
-	double buf[ELF_NFPREG];
+	u64 buf[ELF_NFPREG];
 	int i;
 
 	/* save FPR copy to local buffer then write to the thread_struct */
 	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
 		buf[i] = task->thread.TS_TRANS_FPR(i);
-	memcpy(&buf[i], &task->thread.transact_fpscr, sizeof(double));
+	buf[i] = task->thread.transact_fp.fpscr;
 	return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
 }
 
 unsigned long copy_transact_fpr_from_user(struct task_struct *task,
 					  void __user *from)
 {
-	double buf[ELF_NFPREG];
+	u64 buf[ELF_NFPREG];
 	int i;
 
 	if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
 		return 1;
 	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
 		task->thread.TS_TRANS_FPR(i) = buf[i];
-	memcpy(&task->thread.transact_fpscr, &buf[i], sizeof(double));
+	task->thread.transact_fp.fpscr = buf[i];
 
 	return 0;
 }
@@ -347,25 +347,25 @@ unsigned long copy_transact_fpr_from_user(struct task_struct *task,
 unsigned long copy_transact_vsx_to_user(void __user *to,
 				  struct task_struct *task)
 {
-	double buf[ELF_NVSRHALFREG];
+	u64 buf[ELF_NVSRHALFREG];
 	int i;
 
 	/* save FPR copy to local buffer then write to the thread_struct */
 	for (i = 0; i < ELF_NVSRHALFREG; i++)
-		buf[i] = task->thread.transact_fpr[i][TS_VSRLOWOFFSET];
+		buf[i] = task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET];
 	return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
 }
 
 unsigned long copy_transact_vsx_from_user(struct task_struct *task,
 					  void __user *from)
 {
-	double buf[ELF_NVSRHALFREG];
+	u64 buf[ELF_NVSRHALFREG];
 	int i;
 
 	if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
 		return 1;
 	for (i = 0; i < ELF_NVSRHALFREG ; i++)
-		task->thread.transact_fpr[i][TS_VSRLOWOFFSET] = buf[i];
+		task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = buf[i];
 	return 0;
 }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -373,14 +373,14 @@ unsigned long copy_transact_vsx_from_user(struct task_struct *task,
 inline unsigned long copy_fpr_to_user(void __user *to,
 				      struct task_struct *task)
 {
-	return __copy_to_user(to, task->thread.fpr,
+	return __copy_to_user(to, task->thread.fp_state.fpr,
 			      ELF_NFPREG * sizeof(double));
 }
 
 inline unsigned long copy_fpr_from_user(struct task_struct *task,
 					void __user *from)
 {
-	return __copy_from_user(task->thread.fpr, from,
+	return __copy_from_user(task->thread.fp_state.fpr, from,
 			      ELF_NFPREG * sizeof(double));
 }
 
@@ -388,14 +388,14 @@ inline unsigned long copy_fpr_from_user(struct task_struct *task,
 inline unsigned long copy_transact_fpr_to_user(void __user *to,
 					 struct task_struct *task)
 {
-	return __copy_to_user(to, task->thread.transact_fpr,
+	return __copy_to_user(to, task->thread.transact_fp.fpr,
 			      ELF_NFPREG * sizeof(double));
 }
 
 inline unsigned long copy_transact_fpr_from_user(struct task_struct *task,
 						 void __user *from)
 {
-	return __copy_from_user(task->thread.transact_fpr, from,
+	return __copy_from_user(task->thread.transact_fp.fpr, from,
 				ELF_NFPREG * sizeof(double));
 }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -423,7 +423,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
 	/* save altivec registers */
 	if (current->thread.used_vr) {
 		flush_altivec_to_thread(current);
-		if (__copy_to_user(&frame->mc_vregs, current->thread.vr,
+		if (__copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
 				   ELF_NVRREG * sizeof(vector128)))
 			return 1;
 		/* set MSR_VEC in the saved MSR value to indicate that
@@ -534,17 +534,17 @@ static int save_tm_user_regs(struct pt_regs *regs,
 	/* save altivec registers */
 	if (current->thread.used_vr) {
 		flush_altivec_to_thread(current);
-		if (__copy_to_user(&frame->mc_vregs, current->thread.vr,
+		if (__copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
 				   ELF_NVRREG * sizeof(vector128)))
 			return 1;
 		if (msr & MSR_VEC) {
 			if (__copy_to_user(&tm_frame->mc_vregs,
-					   current->thread.transact_vr,
+					   &current->thread.transact_vr,
 					   ELF_NVRREG * sizeof(vector128)))
 				return 1;
 		} else {
 			if (__copy_to_user(&tm_frame->mc_vregs,
-					   current->thread.vr,
+					   &current->thread.vr_state,
 					   ELF_NVRREG * sizeof(vector128)))
 				return 1;
 		}
@@ -692,11 +692,12 @@ static long restore_user_regs(struct pt_regs *regs,
 	regs->msr &= ~MSR_VEC;
 	if (msr & MSR_VEC) {
 		/* restore altivec registers from the stack */
-		if (__copy_from_user(current->thread.vr, &sr->mc_vregs,
+		if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
 				     sizeof(sr->mc_vregs)))
 			return 1;
 	} else if (current->thread.used_vr)
-		memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128));
+		memset(&current->thread.vr_state, 0,
+		       ELF_NVRREG * sizeof(vector128));
 
 	/* Always get VRSAVE back */
 	if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32]))
@@ -722,7 +723,7 @@ static long restore_user_regs(struct pt_regs *regs,
 			return 1;
 	} else if (current->thread.used_vsr)
 		for (i = 0; i < 32 ; i++)
-			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
 #endif /* CONFIG_VSX */
 	/*
 	 * force the process to reload the FP registers from
@@ -798,15 +799,16 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	regs->msr &= ~MSR_VEC;
 	if (msr & MSR_VEC) {
 		/* restore altivec registers from the stack */
-		if (__copy_from_user(current->thread.vr, &sr->mc_vregs,
+		if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
 				     sizeof(sr->mc_vregs)) ||
-		    __copy_from_user(current->thread.transact_vr,
+		    __copy_from_user(&current->thread.transact_vr,
 				     &tm_sr->mc_vregs,
 				     sizeof(sr->mc_vregs)))
 			return 1;
 	} else if (current->thread.used_vr) {
-		memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128));
-		memset(current->thread.transact_vr, 0,
+		memset(&current->thread.vr_state, 0,
+		       ELF_NVRREG * sizeof(vector128));
+		memset(&current->thread.transact_vr, 0,
 		       ELF_NVRREG * sizeof(vector128));
 	}
 
@@ -838,8 +840,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 			return 1;
 	} else if (current->thread.used_vsr)
 		for (i = 0; i < 32 ; i++) {
-			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
-			current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0;
 		}
 #endif /* CONFIG_VSX */
 
@@ -1030,7 +1032,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
 		if (__put_user(0, &rt_sf->uc.uc_link))
 			goto badframe;
 
-	current->thread.fpscr.val = 0;	/* turn off all fp exceptions */
+	current->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
 
 	/* create a stack frame for the caller of the handler */
 	newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16);
@@ -1045,8 +1047,9 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
 	regs->gpr[5] = (unsigned long) &rt_sf->uc;
 	regs->gpr[6] = (unsigned long) rt_sf;
 	regs->nip = (unsigned long) ka->sa.sa_handler;
-	/* enter the signal handler in big-endian mode */
+	/* enter the signal handler in native-endian mode */
 	regs->msr &= ~MSR_LE;
+	regs->msr |= (MSR_KERNEL & MSR_LE);
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	/* Remove TM bits from thread's MSR.  The MSR in the sigcontext
 	 * just indicates to userland that we were doing a transaction, but we
@@ -1309,7 +1312,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 	unsigned char tmp;
 	unsigned long new_msr = regs->msr;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-	unsigned long new_dbcr0 = current->thread.dbcr0;
+	unsigned long new_dbcr0 = current->thread.debug.dbcr0;
 #endif
 
 	for (i=0; i<ndbg; i++) {
@@ -1324,7 +1327,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 			} else {
 				new_dbcr0 &= ~DBCR0_IC;
 				if (!DBCR_ACTIVE_EVENTS(new_dbcr0,
-						current->thread.dbcr1)) {
+						current->thread.debug.dbcr1)) {
 					new_msr &= ~MSR_DE;
 					new_dbcr0 &= ~DBCR0_IDM;
 				}
@@ -1359,7 +1362,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 	   the user is really doing something wrong. */
 	regs->msr = new_msr;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-	current->thread.dbcr0 = new_dbcr0;
+	current->thread.debug.dbcr0 = new_dbcr0;
 #endif
 
 	if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx))
@@ -1462,7 +1465,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
 
 	regs->link = tramp;
 
-	current->thread.fpscr.val = 0;	/* turn off all fp exceptions */
+	current->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
 
 	/* create a stack frame for the caller of the handler */
 	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;

+ 17 - 15
arch/powerpc/kernel/signal_64.c

@@ -103,7 +103,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
 	if (current->thread.used_vr) {
 		flush_altivec_to_thread(current);
 		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
-		err |= __copy_to_user(v_regs, current->thread.vr, 33 * sizeof(vector128));
+		err |= __copy_to_user(v_regs, &current->thread.vr_state,
+				      33 * sizeof(vector128));
 		/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
 		 * contains valid data.
 		 */
@@ -195,18 +196,18 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 	if (current->thread.used_vr) {
 		flush_altivec_to_thread(current);
 		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
-		err |= __copy_to_user(v_regs, current->thread.vr,
+		err |= __copy_to_user(v_regs, &current->thread.vr_state,
 				      33 * sizeof(vector128));
 		/* If VEC was enabled there are transactional VRs valid too,
 		 * else they're a copy of the checkpointed VRs.
 		 */
 		if (msr & MSR_VEC)
 			err |= __copy_to_user(tm_v_regs,
-					      current->thread.transact_vr,
+					      &current->thread.transact_vr,
 					      33 * sizeof(vector128));
 		else
 			err |= __copy_to_user(tm_v_regs,
-					      current->thread.vr,
+					      &current->thread.vr_state,
 					      33 * sizeof(vector128));
 
 		/* set MSR_VEC in the MSR value in the frame to indicate
@@ -349,10 +350,10 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
 		return -EFAULT;
 	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
 	if (v_regs != NULL && (msr & MSR_VEC) != 0)
-		err |= __copy_from_user(current->thread.vr, v_regs,
+		err |= __copy_from_user(&current->thread.vr_state, v_regs,
 					33 * sizeof(vector128));
 	else if (current->thread.used_vr)
-		memset(current->thread.vr, 0, 33 * sizeof(vector128));
+		memset(&current->thread.vr_state, 0, 33 * sizeof(vector128));
 	/* Always get VRSAVE back */
 	if (v_regs != NULL)
 		err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
@@ -374,7 +375,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
 		err |= copy_vsx_from_user(current, v_regs);
 	else
 		for (i = 0; i < 32 ; i++)
-			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
 #endif
 	return err;
 }
@@ -468,14 +469,14 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
 		return -EFAULT;
 	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
 	if (v_regs != NULL && tm_v_regs != NULL && (msr & MSR_VEC) != 0) {
-		err |= __copy_from_user(current->thread.vr, v_regs,
+		err |= __copy_from_user(&current->thread.vr_state, v_regs,
 					33 * sizeof(vector128));
-		err |= __copy_from_user(current->thread.transact_vr, tm_v_regs,
+		err |= __copy_from_user(&current->thread.transact_vr, tm_v_regs,
 					33 * sizeof(vector128));
 	}
 	else if (current->thread.used_vr) {
-		memset(current->thread.vr, 0, 33 * sizeof(vector128));
-		memset(current->thread.transact_vr, 0, 33 * sizeof(vector128));
+		memset(&current->thread.vr_state, 0, 33 * sizeof(vector128));
+		memset(&current->thread.transact_vr, 0, 33 * sizeof(vector128));
 	}
 	/* Always get VRSAVE back */
 	if (v_regs != NULL && tm_v_regs != NULL) {
@@ -507,8 +508,8 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
 		err |= copy_transact_vsx_from_user(current, tm_v_regs);
 	} else {
 		for (i = 0; i < 32 ; i++) {
-			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
-			current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0;
 		}
 	}
 #endif
@@ -747,7 +748,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
 		goto badframe;
 
 	/* Make sure signal handler doesn't get spurious FP exceptions */
-	current->thread.fpscr.val = 0;
+	current->thread.fp_state.fpscr = 0;
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	/* Remove TM bits from thread's MSR.  The MSR in the sigcontext
 	 * just indicates to userland that we were doing a transaction, but we
@@ -773,8 +774,9 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
 
 	/* Set up "regs" so we "return" to the signal handler. */
 	err |= get_user(regs->nip, &funct_desc_ptr->entry);
-	/* enter the signal handler in big-endian mode */
+	/* enter the signal handler in native-endian mode */
 	regs->msr &= ~MSR_LE;
+	regs->msr |= (MSR_KERNEL & MSR_LE);
 	regs->gpr[1] = newsp;
 	err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
 	regs->gpr[3] = signr;

+ 4 - 0
arch/powerpc/kernel/swsusp_asm64.S

@@ -114,7 +114,9 @@ _GLOBAL(swsusp_arch_suspend)
 	SAVE_SPECIAL(MSR)
 	SAVE_SPECIAL(XER)
 #ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FW_FTR_SECTION
 	SAVE_SPECIAL(SDR1)
+END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
 #else
 	SAVE_SPR(TCR)
 
@@ -231,7 +233,9 @@ nothing_to_copy:
 	/* can't use RESTORE_SPECIAL(MSR) */
 	ld	r0, SL_MSR(r11)
 	mtmsrd	r0, 0
+BEGIN_FW_FTR_SECTION
 	RESTORE_SPECIAL(SDR1)
+END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
 #else
 	/* Restore SPRG1, be used to save paca */
 	ld	r0, SL_SPRG1(r11)

+ 26 - 23
arch/powerpc/kernel/tm.S

@@ -12,16 +12,15 @@
 #include <asm/reg.h>
 
 #ifdef CONFIG_VSX
-/* See fpu.S, this is very similar but to save/restore checkpointed FPRs/VSRs */
-#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base)	\
+/* See fpu.S, this is borrowed from there */
+#define __SAVE_32FPRS_VSRS(n,c,base)		\
 BEGIN_FTR_SECTION				\
 	b	2f;				\
 END_FTR_SECTION_IFSET(CPU_FTR_VSX);		\
-	SAVE_32FPRS_TRANSACT(n,base);		\
+	SAVE_32FPRS(n,base);			\
 	b	3f;				\
-2:	SAVE_32VSRS_TRANSACT(n,c,base);		\
+2:	SAVE_32VSRS(n,c,base);			\
 3:
-/* ...and this is just plain borrowed from there. */
 #define __REST_32FPRS_VSRS(n,c,base)		\
 BEGIN_FTR_SECTION				\
 	b	2f;				\
@@ -31,11 +30,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);		\
 2:	REST_32VSRS(n,c,base);			\
 3:
 #else
-#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base) SAVE_32FPRS_TRANSACT(n, base)
-#define __REST_32FPRS_VSRS(n,c,base)	      REST_32FPRS(n, base)
+#define __SAVE_32FPRS_VSRS(n,c,base)	SAVE_32FPRS(n, base)
+#define __REST_32FPRS_VSRS(n,c,base)	REST_32FPRS(n, base)
 #endif
-#define SAVE_32FPRS_VSRS_TRANSACT(n,c,base) \
-	__SAVE_32FPRS_VSRS_TRANSACT(n,__REG_##c,__REG_##base)
+#define SAVE_32FPRS_VSRS(n,c,base) \
+	__SAVE_32FPRS_VSRS(n,__REG_##c,__REG_##base)
 #define REST_32FPRS_VSRS(n,c,base) \
 	__REST_32FPRS_VSRS(n,__REG_##c,__REG_##base)
 
@@ -107,7 +106,7 @@ DSCR_DEFAULT:
 _GLOBAL(tm_reclaim)
 	mfcr	r6
 	mflr	r0
-	std	r6, 8(r1)
+	stw	r6, 8(r1)
 	std	r0, 16(r1)
 	std	r2, 40(r1)
 	stdu	r1, -TM_FRAME_SIZE(r1)
@@ -157,10 +156,11 @@ _GLOBAL(tm_reclaim)
 	andis.		r0, r4, MSR_VEC@h
 	beq	dont_backup_vec
 
-	SAVE_32VRS_TRANSACT(0, r6, r3)	/* r6 scratch, r3 thread */
+	addi	r7, r3, THREAD_TRANSACT_VRSTATE
+	SAVE_32VRS(0, r6, r7)	/* r6 scratch, r7 transact vr state */
 	mfvscr	vr0
-	li	r6, THREAD_TRANSACT_VSCR
-	stvx	vr0, r3, r6
+	li	r6, VRSTATE_VSCR
+	stvx	vr0, r7, r6
 dont_backup_vec:
 	mfspr	r0, SPRN_VRSAVE
 	std	r0, THREAD_TRANSACT_VRSAVE(r3)
@@ -168,10 +168,11 @@ dont_backup_vec:
 	andi.	r0, r4, MSR_FP
 	beq	dont_backup_fp
 
-	SAVE_32FPRS_VSRS_TRANSACT(0, R6, R3)	/* r6 scratch, r3 thread */
+	addi	r7, r3, THREAD_TRANSACT_FPSTATE
+	SAVE_32FPRS_VSRS(0, R6, R7)	/* r6 scratch, r7 transact fp state */
 
 	mffs    fr0
-	stfd    fr0,THREAD_TRANSACT_FPSCR(r3)
+	stfd    fr0,FPSTATE_FPSCR(r7)
 
 dont_backup_fp:
 	/* The moment we treclaim, ALL of our GPRs will switch
@@ -284,7 +285,7 @@ dont_backup_fp:
 	REST_NVGPRS(r1)
 
 	addi    r1, r1, TM_FRAME_SIZE
-	ld	r4, 8(r1)
+	lwz	r4, 8(r1)
 	ld	r0, 16(r1)
 	mtcr	r4
 	mtlr	r0
@@ -309,7 +310,7 @@ dont_backup_fp:
 _GLOBAL(tm_recheckpoint)
 	mfcr	r5
 	mflr	r0
-	std	r5, 8(r1)
+	stw	r5, 8(r1)
 	std	r0, 16(r1)
 	std	r2, 40(r1)
 	stdu	r1, -TM_FRAME_SIZE(r1)
@@ -358,10 +359,11 @@ _GLOBAL(tm_recheckpoint)
 	andis.	r0, r4, MSR_VEC@h
 	beq	dont_restore_vec
 
-	li	r5, THREAD_VSCR
-	lvx	vr0, r3, r5
+	addi	r8, r3, THREAD_VRSTATE
+	li	r5, VRSTATE_VSCR
+	lvx	vr0, r8, r5
 	mtvscr	vr0
-	REST_32VRS(0, r5, r3)			/* r5 scratch, r3 THREAD ptr */
+	REST_32VRS(0, r5, r8)			/* r5 scratch, r8 ptr */
 dont_restore_vec:
 	ld	r5, THREAD_VRSAVE(r3)
 	mtspr	SPRN_VRSAVE, r5
@@ -370,9 +372,10 @@ dont_restore_vec:
 	andi.	r0, r4, MSR_FP
 	beq	dont_restore_fp
 
-	lfd	fr0, THREAD_FPSCR(r3)
+	addi	r8, r3, THREAD_FPSTATE
+	lfd	fr0, FPSTATE_FPSCR(r8)
 	MTFSF_L(fr0)
-	REST_32FPRS_VSRS(0, R4, R3)
+	REST_32FPRS_VSRS(0, R4, R8)
 
 dont_restore_fp:
 	mtmsr	r6				/* FP/Vec off again! */
@@ -441,7 +444,7 @@ restore_gprs:
 	REST_NVGPRS(r1)
 
 	addi    r1, r1, TM_FRAME_SIZE
-	ld	r4, 8(r1)
+	lwz	r4, 8(r1)
 	ld	r0, 16(r1)
 	mtcr	r4
 	mtlr	r0

+ 31 - 25
arch/powerpc/kernel/traps.c

@@ -351,8 +351,8 @@ static inline int check_io_access(struct pt_regs *regs)
 #define REASON_TRAP		ESR_PTR
 
 /* single-step stuff */
-#define single_stepping(regs)	(current->thread.dbcr0 & DBCR0_IC)
-#define clear_single_step(regs)	(current->thread.dbcr0 &= ~DBCR0_IC)
+#define single_stepping(regs)	(current->thread.debug.dbcr0 & DBCR0_IC)
+#define clear_single_step(regs)	(current->thread.debug.dbcr0 &= ~DBCR0_IC)
 
 #else
 /* On non-4xx, the reason for the machine check or program
@@ -816,7 +816,7 @@ static void parse_fpe(struct pt_regs *regs)
 
 	flush_fp_to_thread(current);
 
-	code = __parse_fpscr(current->thread.fpscr.val);
+	code = __parse_fpscr(current->thread.fp_state.fpscr);
 
 	_exception(SIGFPE, regs, code, regs->nip);
 }
@@ -1018,6 +1018,13 @@ static int emulate_instruction(struct pt_regs *regs)
 		return emulate_isel(regs, instword);
 	}
 
+	/* Emulate sync instruction variants */
+	if ((instword & PPC_INST_SYNC_MASK) == PPC_INST_SYNC) {
+		PPC_WARN_EMULATED(sync, regs);
+		asm volatile("sync");
+		return 0;
+	}
+
 #ifdef CONFIG_PPC64
 	/* Emulate the mfspr rD, DSCR. */
 	if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) ==
@@ -1069,7 +1076,7 @@ static int emulate_math(struct pt_regs *regs)
 		return 0;
 	case 1: {
 			int code = 0;
-			code = __parse_fpscr(current->thread.fpscr.val);
+			code = __parse_fpscr(current->thread.fp_state.fpscr);
 			_exception(SIGFPE, regs, code, regs->nip);
 			return 0;
 		}
@@ -1371,8 +1378,6 @@ void facility_unavailable_exception(struct pt_regs *regs)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 
-extern void do_load_up_fpu(struct pt_regs *regs);
-
 void fp_unavailable_tm(struct pt_regs *regs)
 {
 	/* Note:  This does not handle any kind of FP laziness. */
@@ -1403,8 +1408,6 @@ void fp_unavailable_tm(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_ALTIVEC
-extern void do_load_up_altivec(struct pt_regs *regs);
-
 void altivec_unavailable_tm(struct pt_regs *regs)
 {
 	/* See the comments in fp_unavailable_tm().  This function operates
@@ -1465,7 +1468,8 @@ void SoftwareEmulation(struct pt_regs *regs)
 
 	if (!user_mode(regs)) {
 		debugger(regs);
-		die("Kernel Mode Software FPU Emulation", regs, SIGFPE);
+		die("Kernel Mode Unimplemented Instruction or SW FPU Emulation",
+			regs, SIGFPE);
 	}
 
 	if (!emulate_math(regs))
@@ -1486,7 +1490,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
 	if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
 		dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W);
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-		current->thread.dbcr2 &= ~DBCR2_DAC12MODE;
+		current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
 #endif
 		do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT,
 			     5);
@@ -1497,24 +1501,24 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
 			     6);
 		changed |= 0x01;
 	}  else if (debug_status & DBSR_IAC1) {
-		current->thread.dbcr0 &= ~DBCR0_IAC1;
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC1;
 		dbcr_iac_range(current) &= ~DBCR_IAC12MODE;
 		do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT,
 			     1);
 		changed |= 0x01;
 	}  else if (debug_status & DBSR_IAC2) {
-		current->thread.dbcr0 &= ~DBCR0_IAC2;
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC2;
 		do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT,
 			     2);
 		changed |= 0x01;
 	}  else if (debug_status & DBSR_IAC3) {
-		current->thread.dbcr0 &= ~DBCR0_IAC3;
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC3;
 		dbcr_iac_range(current) &= ~DBCR_IAC34MODE;
 		do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT,
 			     3);
 		changed |= 0x01;
 	}  else if (debug_status & DBSR_IAC4) {
-		current->thread.dbcr0 &= ~DBCR0_IAC4;
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC4;
 		do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT,
 			     4);
 		changed |= 0x01;
@@ -1524,19 +1528,20 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
 	 * Check all other debug flags and see if that bit needs to be turned
 	 * back on or not.
 	 */
-	if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1))
+	if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
+	    current->thread.debug.dbcr1))
 		regs->msr |= MSR_DE;
 	else
 		/* Make sure the IDM flag is off */
-		current->thread.dbcr0 &= ~DBCR0_IDM;
+		current->thread.debug.dbcr0 &= ~DBCR0_IDM;
 
 	if (changed & 0x01)
-		mtspr(SPRN_DBCR0, current->thread.dbcr0);
+		mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
 }
 
 void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 {
-	current->thread.dbsr = debug_status;
+	current->thread.debug.dbsr = debug_status;
 
 	/* Hack alert: On BookE, Branch Taken stops on the branch itself, while
 	 * on server, it stops on the target of the branch. In order to simulate
@@ -1553,8 +1558,8 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 
 		/* Do the single step trick only when coming from userspace */
 		if (user_mode(regs)) {
-			current->thread.dbcr0 &= ~DBCR0_BT;
-			current->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+			current->thread.debug.dbcr0 &= ~DBCR0_BT;
+			current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
 			regs->msr |= MSR_DE;
 			return;
 		}
@@ -1582,13 +1587,13 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 			return;
 
 		if (user_mode(regs)) {
-			current->thread.dbcr0 &= ~DBCR0_IC;
-			if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0,
-					       current->thread.dbcr1))
+			current->thread.debug.dbcr0 &= ~DBCR0_IC;
+			if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
+					       current->thread.debug.dbcr1))
 				regs->msr |= MSR_DE;
 			else
 				/* Make sure the IDM bit is off */
-				current->thread.dbcr0 &= ~DBCR0_IDM;
+				current->thread.debug.dbcr0 &= ~DBCR0_IDM;
 		}
 
 		_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
@@ -1634,7 +1639,7 @@ void altivec_assist_exception(struct pt_regs *regs)
 		/* XXX quick hack for now: set the non-Java bit in the VSCR */
 		printk_ratelimited(KERN_ERR "Unrecognized altivec instruction "
 				   "in %s at %lx\n", current->comm, regs->nip);
-		current->thread.vscr.u[3] |= 0x10000;
+		current->thread.vr_state.vscr.u[3] |= 0x10000;
 	}
 }
 #endif /* CONFIG_ALTIVEC */
@@ -1815,6 +1820,7 @@ struct ppc_emulated ppc_emulated = {
 	WARN_EMULATED_SETUP(popcntb),
 	WARN_EMULATED_SETUP(spe),
 	WARN_EMULATED_SETUP(string),
+	WARN_EMULATED_SETUP(sync),
 	WARN_EMULATED_SETUP(unaligned),
 #ifdef CONFIG_MATH_EMULATION
 	WARN_EMULATED_SETUP(math),

+ 1 - 2
arch/powerpc/kernel/vdso.c

@@ -34,8 +34,7 @@
 #include <asm/firmware.h>
 #include <asm/vdso.h>
 #include <asm/vdso_datapage.h>
-
-#include "setup.h"
+#include <asm/setup.h>
 
 #undef DEBUG
 

+ 4 - 0
arch/powerpc/kernel/vdso32/vdso32.lds.S

@@ -4,7 +4,11 @@
  */
 #include <asm/vdso.h>
 
+#ifdef __LITTLE_ENDIAN__
+OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle")
+#else
 OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
+#endif
 OUTPUT_ARCH(powerpc:common)
 ENTRY(_start)
 

+ 4 - 0
arch/powerpc/kernel/vdso64/vdso64.lds.S

@@ -4,7 +4,11 @@
  */
 #include <asm/vdso.h>
 
+#ifdef __LITTLE_ENDIAN__
+OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle")
+#else
 OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc")
+#endif
 OUTPUT_ARCH(powerpc:common64)
 ENTRY(_start)
 

+ 3 - 3
arch/powerpc/kernel/vecemu.c

@@ -271,7 +271,7 @@ int emulate_altivec(struct pt_regs *regs)
 	vb = (instr >> 11) & 0x1f;
 	vc = (instr >> 6) & 0x1f;
 
-	vrs = current->thread.vr;
+	vrs = current->thread.vr_state.vr;
 	switch (instr & 0x3f) {
 	case 10:
 		switch (vc) {
@@ -320,12 +320,12 @@ int emulate_altivec(struct pt_regs *regs)
 		case 14:	/* vctuxs */
 			for (i = 0; i < 4; ++i)
 				vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
-						&current->thread.vscr.u[3]);
+					&current->thread.vr_state.vscr.u[3]);
 			break;
 		case 15:	/* vctsxs */
 			for (i = 0; i < 4; ++i)
 				vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
-						&current->thread.vscr.u[3]);
+					&current->thread.vr_state.vscr.u[3]);
 			break;
 		default:
 			return -EINVAL;

+ 44 - 36
arch/powerpc/kernel/vector.S

@@ -8,29 +8,6 @@
 #include <asm/ptrace.h>
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Wrapper to call load_up_altivec from C.
- * void do_load_up_altivec(struct pt_regs *regs);
- */
-_GLOBAL(do_load_up_altivec)
-	mflr	r0
-	std	r0, 16(r1)
-	stdu	r1, -112(r1)
-
-	subi	r6, r3, STACK_FRAME_OVERHEAD
-	/* load_up_altivec expects r12=MSR, r13=PACA, and returns
-	 * with r12 = new MSR.
-	 */
-	ld	r12,_MSR(r6)
-	GET_PACA(r13)
-	bl	load_up_altivec
-	std	r12,_MSR(r6)
-
-	ld	r0, 112+16(r1)
-	addi	r1, r1, 112
-	mtlr	r0
-	blr
-
 /* void do_load_up_transact_altivec(struct thread_struct *thread)
  *
  * This is similar to load_up_altivec but for the transactional version of the
@@ -46,10 +23,11 @@ _GLOBAL(do_load_up_transact_altivec)
 	li	r4,1
 	stw	r4,THREAD_USED_VR(r3)
 
-	li	r10,THREAD_TRANSACT_VSCR
+	li	r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR
 	lvx	vr0,r10,r3
 	mtvscr	vr0
-	REST_32VRS_TRANSACT(0,r4,r3)
+	addi	r10,r3,THREAD_TRANSACT_VRSTATE
+	REST_32VRS(0,r4,r10)
 
 	/* Disable VEC again. */
 	MTMSRD(r6)
@@ -59,12 +37,36 @@ _GLOBAL(do_load_up_transact_altivec)
 #endif
 
 /*
- * load_up_altivec(unused, unused, tsk)
+ * Load state from memory into VMX registers including VSCR.
+ * Assumes the caller has enabled VMX in the MSR.
+ */
+_GLOBAL(load_vr_state)
+	li	r4,VRSTATE_VSCR
+	lvx	vr0,r4,r3
+	mtvscr	vr0
+	REST_32VRS(0,r4,r3)
+	blr
+
+/*
+ * Store VMX state into memory, including VSCR.
+ * Assumes the caller has enabled VMX in the MSR.
+ */
+_GLOBAL(store_vr_state)
+	SAVE_32VRS(0, r4, r3)
+	mfvscr	vr0
+	li	r4, VRSTATE_VSCR
+	stvx	vr0, r4, r3
+	blr
+
+/*
  * Disable VMX for the task which had it previously,
  * and save its vector registers in its thread_struct.
  * Enables the VMX for use in the kernel on return.
  * On SMP we know the VMX is free, since we give it up every
  * switch (ie, no lazy save of the vector registers).
+ *
+ * Note that on 32-bit this can only use registers that will be
+ * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
  */
 _GLOBAL(load_up_altivec)
 	mfmsr	r5			/* grab the current MSR */
@@ -90,10 +92,11 @@ _GLOBAL(load_up_altivec)
 	/* Save VMX state to last_task_used_altivec's THREAD struct */
 	toreal(r4)
 	addi	r4,r4,THREAD
-	SAVE_32VRS(0,r5,r4)
+	addi	r6,r4,THREAD_VRSTATE
+	SAVE_32VRS(0,r5,r6)
 	mfvscr	vr0
-	li	r10,THREAD_VSCR
-	stvx	vr0,r10,r4
+	li	r10,VRSTATE_VSCR
+	stvx	vr0,r10,r6
 	/* Disable VMX for last_task_used_altivec */
 	PPC_LL	r5,PT_REGS(r4)
 	toreal(r5)
@@ -125,12 +128,13 @@ _GLOBAL(load_up_altivec)
 	oris	r12,r12,MSR_VEC@h
 	std	r12,_MSR(r1)
 #endif
+	addi	r6,r5,THREAD_VRSTATE
 	li	r4,1
-	li	r10,THREAD_VSCR
+	li	r10,VRSTATE_VSCR
 	stw	r4,THREAD_USED_VR(r5)
-	lvx	vr0,r10,r5
+	lvx	vr0,r10,r6
 	mtvscr	vr0
-	REST_32VRS(0,r4,r5)
+	REST_32VRS(0,r4,r6)
 #ifndef CONFIG_SMP
 	/* Update last_task_used_altivec to 'current' */
 	subi	r4,r5,THREAD		/* Back to 'current' */
@@ -165,12 +169,16 @@ _GLOBAL(giveup_altivec)
 	PPC_LCMPI	0,r3,0
 	beqlr				/* if no previous owner, done */
 	addi	r3,r3,THREAD		/* want THREAD of task */
+	PPC_LL	r7,THREAD_VRSAVEAREA(r3)
 	PPC_LL	r5,PT_REGS(r3)
-	PPC_LCMPI	0,r5,0
-	SAVE_32VRS(0,r4,r3)
+	PPC_LCMPI	0,r7,0
+	bne	2f
+	addi	r7,r3,THREAD_VRSTATE
+2:	PPC_LCMPI	0,r5,0
+	SAVE_32VRS(0,r4,r7)
 	mfvscr	vr0
-	li	r4,THREAD_VSCR
-	stvx	vr0,r4,r3
+	li	r4,VRSTATE_VSCR
+	stvx	vr0,r4,r7
 	beq	1f
 	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 #ifdef CONFIG_VSX

+ 2 - 2
arch/powerpc/kernel/vio.c

@@ -1537,12 +1537,12 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 
 	dn = dev->of_node;
 	if (!dn) {
-		strcat(buf, "\n");
+		strcpy(buf, "\n");
 		return strlen(buf);
 	}
 	cp = of_get_property(dn, "compatible", NULL);
 	if (!cp) {
-		strcat(buf, "\n");
+		strcpy(buf, "\n");
 		return strlen(buf);
 	}
 

+ 1 - 0
arch/powerpc/kvm/Kconfig

@@ -6,6 +6,7 @@ source "virt/kvm/Kconfig"
 
 menuconfig VIRTUALIZATION
 	bool "Virtualization"
+	depends on !CPU_LITTLE_ENDIAN
 	---help---
 	  Say Y here to get to see options for using your Linux host to run
 	  other operating systems inside virtual machines (guests).

+ 15 - 21
arch/powerpc/kvm/book3s_pr.c

@@ -444,7 +444,7 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 #ifdef CONFIG_VSX
 	u64 *vcpu_vsx = vcpu->arch.vsr;
 #endif
-	u64 *thread_fpr = (u64*)t->fpr;
+	u64 *thread_fpr = &t->fp_state.fpr[0][0];
 	int i;
 
 	/*
@@ -466,14 +466,14 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 		/*
 		 * Note that on CPUs with VSX, giveup_fpu stores
 		 * both the traditional FP registers and the added VSX
-		 * registers into thread.fpr[].
+		 * registers into thread.fp_state.fpr[].
 		 */
 		if (current->thread.regs->msr & MSR_FP)
 			giveup_fpu(current);
 		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
 			vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
 
-		vcpu->arch.fpscr = t->fpscr.val;
+		vcpu->arch.fpscr = t->fp_state.fpscr;
 
 #ifdef CONFIG_VSX
 		if (cpu_has_feature(CPU_FTR_VSX))
@@ -486,8 +486,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 	if (msr & MSR_VEC) {
 		if (current->thread.regs->msr & MSR_VEC)
 			giveup_altivec(current);
-		memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
-		vcpu->arch.vscr = t->vscr;
+		memcpy(vcpu->arch.vr, t->vr_state.vr, sizeof(vcpu->arch.vr));
+		vcpu->arch.vscr = t->vr_state.vscr;
 	}
 #endif
 
@@ -539,7 +539,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 #ifdef CONFIG_VSX
 	u64 *vcpu_vsx = vcpu->arch.vsr;
 #endif
-	u64 *thread_fpr = (u64*)t->fpr;
+	u64 *thread_fpr = &t->fp_state.fpr[0][0];
 	int i;
 
 	/* When we have paired singles, we emulate in software */
@@ -584,15 +584,15 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 		for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
 			thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
 #endif
-		t->fpscr.val = vcpu->arch.fpscr;
+		t->fp_state.fpscr = vcpu->arch.fpscr;
 		t->fpexc_mode = 0;
 		kvmppc_load_up_fpu();
 	}
 
 	if (msr & MSR_VEC) {
 #ifdef CONFIG_ALTIVEC
-		memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
-		t->vscr = vcpu->arch.vscr;
+		memcpy(t->vr_state.vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
+		t->vr_state.vscr = vcpu->arch.vscr;
 		t->vrsave = -1;
 		kvmppc_load_up_altivec();
 #endif
@@ -1116,12 +1116,10 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int ret;
-	double fpr[32][TS_FPRWIDTH];
-	unsigned int fpscr;
+	struct thread_fp_state fp;
 	int fpexc_mode;
 #ifdef CONFIG_ALTIVEC
-	vector128 vr[32];
-	vector128 vscr;
+	struct thread_vr_state vr;
 	unsigned long uninitialized_var(vrsave);
 	int used_vr;
 #endif
@@ -1153,8 +1151,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	/* Save FPU state in stack */
 	if (current->thread.regs->msr & MSR_FP)
 		giveup_fpu(current);
-	memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
-	fpscr = current->thread.fpscr.val;
+	fp = current->thread.fp_state;
 	fpexc_mode = current->thread.fpexc_mode;
 
 #ifdef CONFIG_ALTIVEC
@@ -1163,8 +1160,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	if (used_vr) {
 		if (current->thread.regs->msr & MSR_VEC)
 			giveup_altivec(current);
-		memcpy(vr, current->thread.vr, sizeof(current->thread.vr));
-		vscr = current->thread.vscr;
+		vr = current->thread.vr_state;
 		vrsave = current->thread.vrsave;
 	}
 #endif
@@ -1196,15 +1192,13 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	current->thread.regs->msr = ext_msr;
 
 	/* Restore FPU/VSX state from stack */
-	memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
-	current->thread.fpscr.val = fpscr;
+	current->thread.fp_state = fp;
 	current->thread.fpexc_mode = fpexc_mode;
 
 #ifdef CONFIG_ALTIVEC
 	/* Restore Altivec state from stack */
 	if (used_vr && current->thread.used_vr) {
-		memcpy(current->thread.vr, vr, sizeof(current->thread.vr));
-		current->thread.vscr = vscr;
+		current->thread.vr_state = vr;
 		current->thread.vrsave = vrsave;
 	}
 	current->thread.used_vr = used_vr;

+ 9 - 10
arch/powerpc/kvm/booke.c

@@ -656,9 +656,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int ret, s;
 #ifdef CONFIG_PPC_FPU
-	unsigned int fpscr;
+	struct thread_fp_state fp;
 	int fpexc_mode;
-	u64 fpr[32];
 #endif
 
 	if (!vcpu->arch.sane) {
@@ -677,13 +676,13 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 #ifdef CONFIG_PPC_FPU
 	/* Save userspace FPU state in stack */
 	enable_kernel_fp();
-	memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
-	fpscr = current->thread.fpscr.val;
+	fp = current->thread.fp_state;
 	fpexc_mode = current->thread.fpexc_mode;
 
 	/* Restore guest FPU state to thread */
-	memcpy(current->thread.fpr, vcpu->arch.fpr, sizeof(vcpu->arch.fpr));
-	current->thread.fpscr.val = vcpu->arch.fpscr;
+	memcpy(current->thread.fp_state.fpr, vcpu->arch.fpr,
+	       sizeof(vcpu->arch.fpr));
+	current->thread.fp_state.fpscr = vcpu->arch.fpscr;
 
 	/*
 	 * Since we can't trap on MSR_FP in GS-mode, we consider the guest
@@ -709,12 +708,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	vcpu->fpu_active = 0;
 
 	/* Save guest FPU state from thread */
-	memcpy(vcpu->arch.fpr, current->thread.fpr, sizeof(vcpu->arch.fpr));
-	vcpu->arch.fpscr = current->thread.fpscr.val;
+	memcpy(vcpu->arch.fpr, current->thread.fp_state.fpr,
+	       sizeof(vcpu->arch.fpr));
+	vcpu->arch.fpscr = current->thread.fp_state.fpscr;
 
 	/* Restore userspace FPU state from stack */
-	memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
-	current->thread.fpscr.val = fpscr;
+	current->thread.fp_state = fp;
 	current->thread.fpexc_mode = fpexc_mode;
 #endif
 

+ 16 - 5
arch/powerpc/lib/Makefile

@@ -10,15 +10,23 @@ CFLAGS_REMOVE_code-patching.o = -pg
 CFLAGS_REMOVE_feature-fixups.o = -pg
 
 obj-y			:= string.o alloc.o \
-			   checksum_$(CONFIG_WORD_SIZE).o crtsavres.o
+			   crtsavres.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o
 obj-$(CONFIG_HAS_IOMEM)	+= devres.o
 
 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
-			   memcpy_64.o usercopy_64.o mem_64.o string.o \
-			   checksum_wrappers_64.o hweight_64.o \
-			   copyuser_power7.o string_64.o copypage_power7.o \
-			   memcpy_power7.o
+			   usercopy_64.o mem_64.o string.o \
+			   hweight_64.o \
+			   copyuser_power7.o string_64.o copypage_power7.o
+ifeq ($(CONFIG_GENERIC_CSUM),)
+obj-y			+= checksum_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_PPC64)	+= checksum_wrappers_64.o
+endif
+
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),)
+obj-$(CONFIG_PPC64)		+= memcpy_power7.o memcpy_64.o 
+endif
+
 obj-$(CONFIG_PPC_EMULATE_SSTEP)	+= sstep.o ldstfp.o
 
 ifeq ($(CONFIG_PPC64),y)
@@ -31,3 +39,6 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
 obj-y			+= code-patching.o
 obj-y			+= feature-fixups.o
 obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
+
+obj-$(CONFIG_ALTIVEC)	+= xor_vmx.o
+CFLAGS_xor_vmx.o += -maltivec -mabi=altivec

+ 31 - 23
arch/powerpc/lib/copyuser_power7.S

@@ -19,6 +19,14 @@
  */
 #include <asm/ppc_asm.h>
 
+#ifdef __BIG_ENDIAN__
+#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
+#else
+#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
+#endif
+
 	.macro err1
 100:
 	.section __ex_table,"a"
@@ -552,13 +560,13 @@ err3;	stw	r7,4(r3)
 	li	r10,32
 	li	r11,48
 
-	lvsl	vr16,0,r4	/* Setup permute control vector */
+	LVS(vr16,0,r4)		/* Setup permute control vector */
 err3;	lvx	vr0,0,r4
 	addi	r4,r4,16
 
 	bf	cr7*4+3,5f
 err3;	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 	addi	r4,r4,16
 err3;	stvx	vr8,r0,r3
 	addi	r3,r3,16
@@ -566,9 +574,9 @@ err3;	stvx	vr8,r0,r3
 
 5:	bf	cr7*4+2,6f
 err3;	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 err3;	lvx	vr0,r4,r9
-	vperm	vr9,vr1,vr0,vr16
+	VPERM(vr9,vr1,vr0,vr16)
 	addi	r4,r4,32
 err3;	stvx	vr8,r0,r3
 err3;	stvx	vr9,r3,r9
@@ -576,13 +584,13 @@ err3;	stvx	vr9,r3,r9
 
 6:	bf	cr7*4+1,7f
 err3;	lvx	vr3,r0,r4
-	vperm	vr8,vr0,vr3,vr16
+	VPERM(vr8,vr0,vr3,vr16)
 err3;	lvx	vr2,r4,r9
-	vperm	vr9,vr3,vr2,vr16
+	VPERM(vr9,vr3,vr2,vr16)
 err3;	lvx	vr1,r4,r10
-	vperm	vr10,vr2,vr1,vr16
+	VPERM(vr10,vr2,vr1,vr16)
 err3;	lvx	vr0,r4,r11
-	vperm	vr11,vr1,vr0,vr16
+	VPERM(vr11,vr1,vr0,vr16)
 	addi	r4,r4,64
 err3;	stvx	vr8,r0,r3
 err3;	stvx	vr9,r3,r9
@@ -611,21 +619,21 @@ err3;	stvx	vr11,r3,r11
 	.align	5
 8:
 err4;	lvx	vr7,r0,r4
-	vperm	vr8,vr0,vr7,vr16
+	VPERM(vr8,vr0,vr7,vr16)
 err4;	lvx	vr6,r4,r9
-	vperm	vr9,vr7,vr6,vr16
+	VPERM(vr9,vr7,vr6,vr16)
 err4;	lvx	vr5,r4,r10
-	vperm	vr10,vr6,vr5,vr16
+	VPERM(vr10,vr6,vr5,vr16)
 err4;	lvx	vr4,r4,r11
-	vperm	vr11,vr5,vr4,vr16
+	VPERM(vr11,vr5,vr4,vr16)
 err4;	lvx	vr3,r4,r12
-	vperm	vr12,vr4,vr3,vr16
+	VPERM(vr12,vr4,vr3,vr16)
 err4;	lvx	vr2,r4,r14
-	vperm	vr13,vr3,vr2,vr16
+	VPERM(vr13,vr3,vr2,vr16)
 err4;	lvx	vr1,r4,r15
-	vperm	vr14,vr2,vr1,vr16
+	VPERM(vr14,vr2,vr1,vr16)
 err4;	lvx	vr0,r4,r16
-	vperm	vr15,vr1,vr0,vr16
+	VPERM(vr15,vr1,vr0,vr16)
 	addi	r4,r4,128
 err4;	stvx	vr8,r0,r3
 err4;	stvx	vr9,r3,r9
@@ -649,13 +657,13 @@ err4;	stvx	vr15,r3,r16
 
 	bf	cr7*4+1,9f
 err3;	lvx	vr3,r0,r4
-	vperm	vr8,vr0,vr3,vr16
+	VPERM(vr8,vr0,vr3,vr16)
 err3;	lvx	vr2,r4,r9
-	vperm	vr9,vr3,vr2,vr16
+	VPERM(vr9,vr3,vr2,vr16)
 err3;	lvx	vr1,r4,r10
-	vperm	vr10,vr2,vr1,vr16
+	VPERM(vr10,vr2,vr1,vr16)
 err3;	lvx	vr0,r4,r11
-	vperm	vr11,vr1,vr0,vr16
+	VPERM(vr11,vr1,vr0,vr16)
 	addi	r4,r4,64
 err3;	stvx	vr8,r0,r3
 err3;	stvx	vr9,r3,r9
@@ -665,9 +673,9 @@ err3;	stvx	vr11,r3,r11
 
 9:	bf	cr7*4+2,10f
 err3;	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 err3;	lvx	vr0,r4,r9
-	vperm	vr9,vr1,vr0,vr16
+	VPERM(vr9,vr1,vr0,vr16)
 	addi	r4,r4,32
 err3;	stvx	vr8,r0,r3
 err3;	stvx	vr9,r3,r9
@@ -675,7 +683,7 @@ err3;	stvx	vr9,r3,r9
 
 10:	bf	cr7*4+3,11f
 err3;	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 	addi	r4,r4,16
 err3;	stvx	vr8,r0,r3
 	addi	r3,r3,16

+ 32 - 23
arch/powerpc/lib/memcpy_power7.S

@@ -20,6 +20,15 @@
 #include <asm/ppc_asm.h>
 
 _GLOBAL(memcpy_power7)
+
+#ifdef __BIG_ENDIAN__
+#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
+#else
+#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
+#endif
+
 #ifdef CONFIG_ALTIVEC
 	cmpldi	r5,16
 	cmpldi	cr1,r5,4096
@@ -485,13 +494,13 @@ _GLOBAL(memcpy_power7)
 	li	r10,32
 	li	r11,48
 
-	lvsl	vr16,0,r4	/* Setup permute control vector */
+	LVS(vr16,0,r4)		/* Setup permute control vector */
 	lvx	vr0,0,r4
 	addi	r4,r4,16
 
 	bf	cr7*4+3,5f
 	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 	addi	r4,r4,16
 	stvx	vr8,r0,r3
 	addi	r3,r3,16
@@ -499,9 +508,9 @@ _GLOBAL(memcpy_power7)
 
 5:	bf	cr7*4+2,6f
 	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 	lvx	vr0,r4,r9
-	vperm	vr9,vr1,vr0,vr16
+	VPERM(vr9,vr1,vr0,vr16)
 	addi	r4,r4,32
 	stvx	vr8,r0,r3
 	stvx	vr9,r3,r9
@@ -509,13 +518,13 @@ _GLOBAL(memcpy_power7)
 
 6:	bf	cr7*4+1,7f
 	lvx	vr3,r0,r4
-	vperm	vr8,vr0,vr3,vr16
+	VPERM(vr8,vr0,vr3,vr16)
 	lvx	vr2,r4,r9
-	vperm	vr9,vr3,vr2,vr16
+	VPERM(vr9,vr3,vr2,vr16)
 	lvx	vr1,r4,r10
-	vperm	vr10,vr2,vr1,vr16
+	VPERM(vr10,vr2,vr1,vr16)
 	lvx	vr0,r4,r11
-	vperm	vr11,vr1,vr0,vr16
+	VPERM(vr11,vr1,vr0,vr16)
 	addi	r4,r4,64
 	stvx	vr8,r0,r3
 	stvx	vr9,r3,r9
@@ -544,21 +553,21 @@ _GLOBAL(memcpy_power7)
 	.align	5
 8:
 	lvx	vr7,r0,r4
-	vperm	vr8,vr0,vr7,vr16
+	VPERM(vr8,vr0,vr7,vr16)
 	lvx	vr6,r4,r9
-	vperm	vr9,vr7,vr6,vr16
+	VPERM(vr9,vr7,vr6,vr16)
 	lvx	vr5,r4,r10
-	vperm	vr10,vr6,vr5,vr16
+	VPERM(vr10,vr6,vr5,vr16)
 	lvx	vr4,r4,r11
-	vperm	vr11,vr5,vr4,vr16
+	VPERM(vr11,vr5,vr4,vr16)
 	lvx	vr3,r4,r12
-	vperm	vr12,vr4,vr3,vr16
+	VPERM(vr12,vr4,vr3,vr16)
 	lvx	vr2,r4,r14
-	vperm	vr13,vr3,vr2,vr16
+	VPERM(vr13,vr3,vr2,vr16)
 	lvx	vr1,r4,r15
-	vperm	vr14,vr2,vr1,vr16
+	VPERM(vr14,vr2,vr1,vr16)
 	lvx	vr0,r4,r16
-	vperm	vr15,vr1,vr0,vr16
+	VPERM(vr15,vr1,vr0,vr16)
 	addi	r4,r4,128
 	stvx	vr8,r0,r3
 	stvx	vr9,r3,r9
@@ -582,13 +591,13 @@ _GLOBAL(memcpy_power7)
 
 	bf	cr7*4+1,9f
 	lvx	vr3,r0,r4
-	vperm	vr8,vr0,vr3,vr16
+	VPERM(vr8,vr0,vr3,vr16)
 	lvx	vr2,r4,r9
-	vperm	vr9,vr3,vr2,vr16
+	VPERM(vr9,vr3,vr2,vr16)
 	lvx	vr1,r4,r10
-	vperm	vr10,vr2,vr1,vr16
+	VPERM(vr10,vr2,vr1,vr16)
 	lvx	vr0,r4,r11
-	vperm	vr11,vr1,vr0,vr16
+	VPERM(vr11,vr1,vr0,vr16)
 	addi	r4,r4,64
 	stvx	vr8,r0,r3
 	stvx	vr9,r3,r9
@@ -598,9 +607,9 @@ _GLOBAL(memcpy_power7)
 
 9:	bf	cr7*4+2,10f
 	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 	lvx	vr0,r4,r9
-	vperm	vr9,vr1,vr0,vr16
+	VPERM(vr9,vr1,vr0,vr16)
 	addi	r4,r4,32
 	stvx	vr8,r0,r3
 	stvx	vr9,r3,r9
@@ -608,7 +617,7 @@ _GLOBAL(memcpy_power7)
 
 10:	bf	cr7*4+3,11f
 	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 	addi	r4,r4,16
 	stvx	vr8,r0,r3
 	addi	r3,r3,16

+ 85 - 12
arch/powerpc/lib/sstep.c

@@ -212,11 +212,19 @@ static int __kprobes read_mem_unaligned(unsigned long *dest, unsigned long ea,
 {
 	int err;
 	unsigned long x, b, c;
+#ifdef __LITTLE_ENDIAN__
+	int len = nb; /* save a copy of the length for byte reversal */
+#endif
 
 	/* unaligned, do this in pieces */
 	x = 0;
 	for (; nb > 0; nb -= c) {
+#ifdef __LITTLE_ENDIAN__
+		c = 1;
+#endif
+#ifdef __BIG_ENDIAN__
 		c = max_align(ea);
+#endif
 		if (c > nb)
 			c = max_align(nb);
 		err = read_mem_aligned(&b, ea, c);
@@ -225,7 +233,24 @@ static int __kprobes read_mem_unaligned(unsigned long *dest, unsigned long ea,
 		x = (x << (8 * c)) + b;
 		ea += c;
 	}
+#ifdef __LITTLE_ENDIAN__
+	switch (len) {
+	case 2:
+		*dest = byterev_2(x);
+		break;
+	case 4:
+		*dest = byterev_4(x);
+		break;
+#ifdef __powerpc64__
+	case 8:
+		*dest = byterev_8(x);
+		break;
+#endif
+	}
+#endif
+#ifdef __BIG_ENDIAN__
 	*dest = x;
+#endif
 	return 0;
 }
 
@@ -273,9 +298,29 @@ static int __kprobes write_mem_unaligned(unsigned long val, unsigned long ea,
 	int err;
 	unsigned long c;
 
+#ifdef __LITTLE_ENDIAN__
+	switch (nb) {
+	case 2:
+		val = byterev_2(val);
+		break;
+	case 4:
+		val = byterev_4(val);
+		break;
+#ifdef __powerpc64__
+	case 8:
+		val = byterev_8(val);
+		break;
+#endif
+	}
+#endif
 	/* unaligned or little-endian, do this in pieces */
 	for (; nb > 0; nb -= c) {
+#ifdef __LITTLE_ENDIAN__
+		c = 1;
+#endif
+#ifdef __BIG_ENDIAN__
 		c = max_align(ea);
+#endif
 		if (c > nb)
 			c = max_align(nb);
 		err = write_mem_aligned(val >> (nb - c) * 8, ea, c);
@@ -310,22 +355,36 @@ static int __kprobes do_fp_load(int rn, int (*func)(int, unsigned long),
 				struct pt_regs *regs)
 {
 	int err;
-	unsigned long val[sizeof(double) / sizeof(long)];
+	union {
+		double dbl;
+		unsigned long ul[2];
+		struct {
+#ifdef __BIG_ENDIAN__
+			unsigned _pad_;
+			unsigned word;
+#endif
+#ifdef __LITTLE_ENDIAN__
+			unsigned word;
+			unsigned _pad_;
+#endif
+		} single;
+	} data;
 	unsigned long ptr;
 
 	if (!address_ok(regs, ea, nb))
 		return -EFAULT;
 	if ((ea & 3) == 0)
 		return (*func)(rn, ea);
-	ptr = (unsigned long) &val[0];
+	ptr = (unsigned long) &data.ul;
 	if (sizeof(unsigned long) == 8 || nb == 4) {
-		err = read_mem_unaligned(&val[0], ea, nb, regs);
-		ptr += sizeof(unsigned long) - nb;
+		err = read_mem_unaligned(&data.ul[0], ea, nb, regs);
+		if (nb == 4)
+			ptr = (unsigned long)&(data.single.word);
 	} else {
 		/* reading a double on 32-bit */
-		err = read_mem_unaligned(&val[0], ea, 4, regs);
+		err = read_mem_unaligned(&data.ul[0], ea, 4, regs);
 		if (!err)
-			err = read_mem_unaligned(&val[1], ea + 4, 4, regs);
+			err = read_mem_unaligned(&data.ul[1], ea + 4, 4, regs);
 	}
 	if (err)
 		return err;
@@ -337,28 +396,42 @@ static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long),
 				 struct pt_regs *regs)
 {
 	int err;
-	unsigned long val[sizeof(double) / sizeof(long)];
+	union {
+		double dbl;
+		unsigned long ul[2];
+		struct {
+#ifdef __BIG_ENDIAN__
+			unsigned _pad_;
+			unsigned word;
+#endif
+#ifdef __LITTLE_ENDIAN__
+			unsigned word;
+			unsigned _pad_;
+#endif
+		} single;
+	} data;
 	unsigned long ptr;
 
 	if (!address_ok(regs, ea, nb))
 		return -EFAULT;
 	if ((ea & 3) == 0)
 		return (*func)(rn, ea);
-	ptr = (unsigned long) &val[0];
+	ptr = (unsigned long) &data.ul[0];
 	if (sizeof(unsigned long) == 8 || nb == 4) {
-		ptr += sizeof(unsigned long) - nb;
+		if (nb == 4)
+			ptr = (unsigned long)&(data.single.word);
 		err = (*func)(rn, ptr);
 		if (err)
 			return err;
-		err = write_mem_unaligned(val[0], ea, nb, regs);
+		err = write_mem_unaligned(data.ul[0], ea, nb, regs);
 	} else {
 		/* writing a double on 32-bit */
 		err = (*func)(rn, ptr);
 		if (err)
 			return err;
-		err = write_mem_unaligned(val[0], ea, 4, regs);
+		err = write_mem_unaligned(data.ul[0], ea, 4, regs);
 		if (!err)
-			err = write_mem_unaligned(val[1], ea + 4, 4, regs);
+			err = write_mem_unaligned(data.ul[1], ea + 4, 4, regs);
 	}
 	return err;
 }

+ 177 - 0
arch/powerpc/lib/xor_vmx.c

@@ -0,0 +1,177 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <altivec.h>
+
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/switch_to.h>
+
+typedef vector signed char unative_t;
+
+#define DEFINE(V)				\
+	unative_t *V = (unative_t *)V##_in;	\
+	unative_t V##_0, V##_1, V##_2, V##_3
+
+#define LOAD(V)			\
+	do {			\
+		V##_0 = V[0];	\
+		V##_1 = V[1];	\
+		V##_2 = V[2];	\
+		V##_3 = V[3];	\
+	} while (0)
+
+#define STORE(V)		\
+	do {			\
+		V[0] = V##_0;	\
+		V[1] = V##_1;	\
+		V[2] = V##_2;	\
+		V[3] = V##_3;	\
+	} while (0)
+
+#define XOR(V1, V2)					\
+	do {						\
+		V1##_0 = vec_xor(V1##_0, V2##_0);	\
+		V1##_1 = vec_xor(V1##_1, V2##_1);	\
+		V1##_2 = vec_xor(V1##_2, V2##_2);	\
+		V1##_3 = vec_xor(V1##_3, V2##_3);	\
+	} while (0)
+
+void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in)
+{
+	DEFINE(v1);
+	DEFINE(v2);
+	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+	preempt_disable();
+	enable_kernel_altivec();
+
+	do {
+		LOAD(v1);
+		LOAD(v2);
+		XOR(v1, v2);
+		STORE(v1);
+
+		v1 += 4;
+		v2 += 4;
+	} while (--lines > 0);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_2);
+
+void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in)
+{
+	DEFINE(v1);
+	DEFINE(v2);
+	DEFINE(v3);
+	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+	preempt_disable();
+	enable_kernel_altivec();
+
+	do {
+		LOAD(v1);
+		LOAD(v2);
+		LOAD(v3);
+		XOR(v1, v2);
+		XOR(v1, v3);
+		STORE(v1);
+
+		v1 += 4;
+		v2 += 4;
+		v3 += 4;
+	} while (--lines > 0);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_3);
+
+void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in,
+		   unsigned long *v4_in)
+{
+	DEFINE(v1);
+	DEFINE(v2);
+	DEFINE(v3);
+	DEFINE(v4);
+	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+	preempt_disable();
+	enable_kernel_altivec();
+
+	do {
+		LOAD(v1);
+		LOAD(v2);
+		LOAD(v3);
+		LOAD(v4);
+		XOR(v1, v2);
+		XOR(v3, v4);
+		XOR(v1, v3);
+		STORE(v1);
+
+		v1 += 4;
+		v2 += 4;
+		v3 += 4;
+		v4 += 4;
+	} while (--lines > 0);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_4);
+
+void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+		   unsigned long *v2_in, unsigned long *v3_in,
+		   unsigned long *v4_in, unsigned long *v5_in)
+{
+	DEFINE(v1);
+	DEFINE(v2);
+	DEFINE(v3);
+	DEFINE(v4);
+	DEFINE(v5);
+	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+	preempt_disable();
+	enable_kernel_altivec();
+
+	do {
+		LOAD(v1);
+		LOAD(v2);
+		LOAD(v3);
+		LOAD(v4);
+		LOAD(v5);
+		XOR(v1, v2);
+		XOR(v3, v4);
+		XOR(v1, v5);
+		XOR(v1, v3);
+		STORE(v1);
+
+		v1 += 4;
+		v2 += 4;
+		v3 += 4;
+		v4 += 4;
+		v5 += 4;
+	} while (--lines > 0);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_5);

+ 26 - 20
arch/powerpc/mm/hash_native_64.c

@@ -35,7 +35,11 @@
 #define DBG_LOW(fmt...)
 #endif
 
+#ifdef __BIG_ENDIAN__
 #define HPTE_LOCK_BIT 3
+#else
+#define HPTE_LOCK_BIT (56+3)
+#endif
 
 DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 
@@ -172,7 +176,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
 
 static inline void native_lock_hpte(struct hash_pte *hptep)
 {
-	unsigned long *word = &hptep->v;
+	unsigned long *word = (unsigned long *)&hptep->v;
 
 	while (1) {
 		if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
@@ -184,7 +188,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
 
 static inline void native_unlock_hpte(struct hash_pte *hptep)
 {
-	unsigned long *word = &hptep->v;
+	unsigned long *word = (unsigned long *)&hptep->v;
 
 	clear_bit_unlock(HPTE_LOCK_BIT, word);
 }
@@ -204,10 +208,10 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 	}
 
 	for (i = 0; i < HPTES_PER_GROUP; i++) {
-		if (! (hptep->v & HPTE_V_VALID)) {
+		if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {
 			/* retry with lock held */
 			native_lock_hpte(hptep);
-			if (! (hptep->v & HPTE_V_VALID))
+			if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))
 				break;
 			native_unlock_hpte(hptep);
 		}
@@ -226,14 +230,14 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 			i, hpte_v, hpte_r);
 	}
 
-	hptep->r = hpte_r;
+	hptep->r = cpu_to_be64(hpte_r);
 	/* Guarantee the second dword is visible before the valid bit */
 	eieio();
 	/*
 	 * Now set the first dword including the valid bit
 	 * NOTE: this also unlocks the hpte
 	 */
-	hptep->v = hpte_v;
+	hptep->v = cpu_to_be64(hpte_v);
 
 	__asm__ __volatile__ ("ptesync" : : : "memory");
 
@@ -254,12 +258,12 @@ static long native_hpte_remove(unsigned long hpte_group)
 
 	for (i = 0; i < HPTES_PER_GROUP; i++) {
 		hptep = htab_address + hpte_group + slot_offset;
-		hpte_v = hptep->v;
+		hpte_v = be64_to_cpu(hptep->v);
 
 		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
 			/* retry with lock held */
 			native_lock_hpte(hptep);
-			hpte_v = hptep->v;
+			hpte_v = be64_to_cpu(hptep->v);
 			if ((hpte_v & HPTE_V_VALID)
 			    && !(hpte_v & HPTE_V_BOLTED))
 				break;
@@ -294,7 +298,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 
 	native_lock_hpte(hptep);
 
-	hpte_v = hptep->v;
+	hpte_v = be64_to_cpu(hptep->v);
 	/*
 	 * We need to invalidate the TLB always because hpte_remove doesn't do
 	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -308,8 +312,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 	} else {
 		DBG_LOW(" -> hit\n");
 		/* Update the HPTE */
-		hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
-			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
+		hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) |
+			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)));
 	}
 	native_unlock_hpte(hptep);
 
@@ -334,7 +338,7 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
 	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 	for (i = 0; i < HPTES_PER_GROUP; i++) {
 		hptep = htab_address + slot;
-		hpte_v = hptep->v;
+		hpte_v = be64_to_cpu(hptep->v);
 
 		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
 			/* HPTE matches */
@@ -369,8 +373,9 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 	hptep = htab_address + slot;
 
 	/* Update the HPTE */
-	hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
-		(newpp & (HPTE_R_PP | HPTE_R_N));
+	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
+			~(HPTE_R_PP | HPTE_R_N)) |
+		(newpp & (HPTE_R_PP | HPTE_R_N)));
 	/*
 	 * Ensure it is out of the tlb too. Bolted entries base and
 	 * actual page size will be same.
@@ -392,7 +397,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
 
 	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
 	native_lock_hpte(hptep);
-	hpte_v = hptep->v;
+	hpte_v = be64_to_cpu(hptep->v);
 
 	/*
 	 * We need to invalidate the TLB always because hpte_remove doesn't do
@@ -458,7 +463,7 @@ static void native_hugepage_invalidate(struct mm_struct *mm,
 		hptep = htab_address + slot;
 		want_v = hpte_encode_avpn(vpn, psize, ssize);
 		native_lock_hpte(hptep);
-		hpte_v = hptep->v;
+		hpte_v = be64_to_cpu(hptep->v);
 
 		/* Even if we miss, we need to invalidate the TLB */
 		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
@@ -519,11 +524,12 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			int *psize, int *apsize, int *ssize, unsigned long *vpn)
 {
 	unsigned long avpn, pteg, vpi;
-	unsigned long hpte_v = hpte->v;
+	unsigned long hpte_v = be64_to_cpu(hpte->v);
+	unsigned long hpte_r = be64_to_cpu(hpte->r);
 	unsigned long vsid, seg_off;
 	int size, a_size, shift;
 	/* Look at the 8 bit LP value */
-	unsigned int lp = (hpte->r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+	unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
 
 	if (!(hpte_v & HPTE_V_LARGE)) {
 		size   = MMU_PAGE_4K;
@@ -612,7 +618,7 @@ static void native_hpte_clear(void)
 		 * running,  right?  and for crash dump, we probably
 		 * don't want to wait for a maybe bad cpu.
 		 */
-		hpte_v = hptep->v;
+		hpte_v = be64_to_cpu(hptep->v);
 
 		/*
 		 * Call __tlbie() here rather than tlbie() since we
@@ -664,7 +670,7 @@ static void native_flush_hash_range(unsigned long number, int local)
 			hptep = htab_address + slot;
 			want_v = hpte_encode_avpn(vpn, psize, ssize);
 			native_lock_hpte(hptep);
-			hpte_v = hptep->v;
+			hpte_v = be64_to_cpu(hptep->v);
 			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
 			    !(hpte_v & HPTE_V_VALID))
 				native_unlock_hpte(hptep);

+ 18 - 20
arch/powerpc/mm/hash_utils_64.c

@@ -251,19 +251,18 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
 					 void *data)
 {
 	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	u32 *prop;
+	__be32 *prop;
 	unsigned long size = 0;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
 		return 0;
 
-	prop = (u32 *)of_get_flat_dt_prop(node, "ibm,processor-segment-sizes",
-					  &size);
+	prop = of_get_flat_dt_prop(node, "ibm,processor-segment-sizes", &size);
 	if (prop == NULL)
 		return 0;
 	for (; size >= 4; size -= 4, ++prop) {
-		if (prop[0] == 40) {
+		if (be32_to_cpu(prop[0]) == 40) {
 			DBG("1T segment support detected\n");
 			cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
 			return 1;
@@ -307,23 +306,22 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 					  void *data)
 {
 	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	u32 *prop;
+	__be32 *prop;
 	unsigned long size = 0;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
 		return 0;
 
-	prop = (u32 *)of_get_flat_dt_prop(node,
-					  "ibm,segment-page-sizes", &size);
+	prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size);
 	if (prop != NULL) {
 		pr_info("Page sizes from device-tree:\n");
 		size /= 4;
 		cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
 		while(size > 0) {
-			unsigned int base_shift = prop[0];
-			unsigned int slbenc = prop[1];
-			unsigned int lpnum = prop[2];
+			unsigned int base_shift = be32_to_cpu(prop[0]);
+			unsigned int slbenc = be32_to_cpu(prop[1]);
+			unsigned int lpnum = be32_to_cpu(prop[2]);
 			struct mmu_psize_def *def;
 			int idx, base_idx;
 
@@ -356,8 +354,8 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 				def->tlbiel = 0;
 
 			while (size > 0 && lpnum) {
-				unsigned int shift = prop[0];
-				int penc  = prop[1];
+				unsigned int shift = be32_to_cpu(prop[0]);
+				int penc  = be32_to_cpu(prop[1]);
 
 				prop += 2; size -= 2;
 				lpnum--;
@@ -390,8 +388,8 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
 					const char *uname, int depth,
 					void *data) {
 	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	unsigned long *addr_prop;
-	u32 *page_count_prop;
+	__be64 *addr_prop;
+	__be32 *page_count_prop;
 	unsigned int expected_pages;
 	long unsigned int phys_addr;
 	long unsigned int block_size;
@@ -405,12 +403,12 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
 	page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);
 	if (page_count_prop == NULL)
 		return 0;
-	expected_pages = (1 << page_count_prop[0]);
+	expected_pages = (1 << be32_to_cpu(page_count_prop[0]));
 	addr_prop = of_get_flat_dt_prop(node, "reg", NULL);
 	if (addr_prop == NULL)
 		return 0;
-	phys_addr = addr_prop[0];
-	block_size = addr_prop[1];
+	phys_addr = be64_to_cpu(addr_prop[0]);
+	block_size = be64_to_cpu(addr_prop[1]);
 	if (block_size != (16 * GB))
 		return 0;
 	printk(KERN_INFO "Huge page(16GB) memory: "
@@ -534,16 +532,16 @@ static int __init htab_dt_scan_pftsize(unsigned long node,
 				       void *data)
 {
 	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	u32 *prop;
+	__be32 *prop;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
 		return 0;
 
-	prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
+	prop = of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
 	if (prop != NULL) {
 		/* pft_size[0] is the NUMA CEC cookie */
-		ppc64_pft_size = prop[1];
+		ppc64_pft_size = be32_to_cpu(prop[1]);
 		return 1;
 	}
 	return 0;

+ 5 - 0
arch/powerpc/mm/init_32.c

@@ -213,7 +213,12 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 	 */
 	BUG_ON(first_memblock_base != 0);
 
+#ifdef CONFIG_PIN_TLB
+	/* 8xx can only access 24MB at the moment */
+	memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01800000));
+#else
 	/* 8xx can only access 8MB at the moment */
 	memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
+#endif
 }
 #endif /* CONFIG_8xx */

Some files were not shown because too many files changed in this diff