Browse Source

Merge branch 'cleanups/dma' into next/cleanup

Separate patches from Marek Szyprowski <m.szyprowski@samsung.com>:

Commit e9da6e9905e639b0 ("ARM: dma-mapping: remove custom consistent dma
region") replaced custom consistent memory handling, so setting
consistent dma memory size is not longer required. This patch series
cleans sub-architecture platform code to remove all calls to the
obsolated init_consistent_dma_size() function and finally removes the
init_consistent_dma_size() stub itself.

* cleanups/dma:
  ARM: at91: remove obsoleted init_consistent_dma_size()
  ARM: u300: remove obsoleted init_consistent_dma_size()
  ARM: dma-mapping: remove init_consistent_dma_size() stub
  ARM: shmobile: remove obsoleted init_consistent_dma_size()
  ARM: davinci: remove obsoleted init_consistent_dma_size()
  ARM: samsung: remove obsoleted init_consistent_dma_size()

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Arnd Bergmann 12 years ago
parent
commit
b5932cc839
100 changed files with 618 additions and 441 deletions
  1. 6 6
      Documentation/arm64/memory.txt
  2. 19 0
      Documentation/devicetree/bindings/input/touchscreen/egalax-ts.txt
  3. 1 1
      Documentation/hwmon/fam15h_power
  4. 3 2
      MAINTAINERS
  5. 1 1
      Makefile
  6. 0 7
      arch/arm/include/asm/dma-mapping.h
  7. 2 2
      arch/arm/include/asm/io.h
  8. 0 2
      arch/arm/include/asm/sched_clock.h
  9. 6 6
      arch/arm/include/asm/vfpmacros.h
  10. 2 1
      arch/arm/include/uapi/asm/hwcap.h
  11. 4 14
      arch/arm/kernel/sched_clock.c
  12. 0 1
      arch/arm/mach-at91/at91sam9g45.c
  13. 0 2
      arch/arm/mach-davinci/common.c
  14. 0 1
      arch/arm/mach-s3c64xx/common.c
  15. 0 2
      arch/arm/mach-s5p64x0/common.c
  16. 0 2
      arch/arm/mach-s5pv210/common.c
  17. 0 6
      arch/arm/mach-shmobile/setup-r8a7740.c
  18. 0 6
      arch/arm/mach-shmobile/setup-sh7372.c
  19. 0 2
      arch/arm/mach-u300/core.c
  20. 1 1
      arch/arm/mm/alignment.c
  21. 6 3
      arch/arm/vfp/vfpmodule.c
  22. 11 0
      arch/arm/xen/enlighten.c
  23. 5 9
      arch/arm/xen/hypercall.S
  24. 1 0
      arch/arm64/Kconfig
  25. 1 4
      arch/arm64/include/asm/elf.h
  26. 2 3
      arch/arm64/include/asm/fpsimd.h
  27. 4 4
      arch/arm64/include/asm/io.h
  28. 2 0
      arch/arm64/include/asm/processor.h
  29. 0 1
      arch/arm64/include/asm/unistd.h
  30. 2 8
      arch/arm64/kernel/perf_event.c
  31. 0 18
      arch/arm64/kernel/process.c
  32. 1 2
      arch/arm64/kernel/smp.c
  33. 1 1
      arch/arm64/mm/init.c
  34. 1 0
      arch/frv/Kconfig
  35. 6 4
      arch/frv/boot/Makefile
  36. 0 1
      arch/frv/include/asm/unistd.h
  37. 3 25
      arch/frv/kernel/entry.S
  38. 3 2
      arch/frv/kernel/process.c
  39. 1 0
      arch/frv/mb93090-mb00/pci-dma-nommu.c
  40. 2 1
      arch/h8300/include/asm/cache.h
  41. 2 0
      arch/s390/include/asm/cio.h
  42. 22 13
      arch/s390/include/asm/pgtable.h
  43. 7 1
      arch/s390/kernel/sclp.S
  44. 1 1
      arch/s390/lib/uaccess_pt.c
  45. 1 1
      arch/s390/mm/gup.c
  46. 1 0
      arch/sparc/Kconfig
  47. 8 8
      arch/sparc/crypto/Makefile
  48. 2 0
      arch/sparc/crypto/aes_glue.c
  49. 2 0
      arch/sparc/crypto/camellia_glue.c
  50. 2 0
      arch/sparc/crypto/crc32c_glue.c
  51. 2 0
      arch/sparc/crypto/des_glue.c
  52. 2 0
      arch/sparc/crypto/md5_glue.c
  53. 2 0
      arch/sparc/crypto/sha1_glue.c
  54. 2 0
      arch/sparc/crypto/sha256_glue.c
  55. 2 0
      arch/sparc/crypto/sha512_glue.c
  56. 3 1
      arch/sparc/include/asm/atomic_64.h
  57. 59 10
      arch/sparc/include/asm/backoff.h
  58. 3 2
      arch/sparc/include/asm/compat.h
  59. 16 1
      arch/sparc/include/asm/processor_64.h
  60. 5 0
      arch/sparc/include/asm/prom.h
  61. 5 0
      arch/sparc/include/asm/thread_info_64.h
  62. 16 8
      arch/sparc/include/asm/ttable.h
  63. 6 1
      arch/sparc/include/uapi/asm/unistd.h
  64. 7 0
      arch/sparc/kernel/entry.h
  65. 4 2
      arch/sparc/kernel/leon_kernel.c
  66. 16 6
      arch/sparc/kernel/perf_event.c
  67. 23 19
      arch/sparc/kernel/process_64.c
  68. 2 2
      arch/sparc/kernel/ptrace_64.c
  69. 21 0
      arch/sparc/kernel/setup_64.c
  70. 5 0
      arch/sparc/kernel/sys_sparc_64.c
  71. 1 0
      arch/sparc/kernel/systbls_32.S
  72. 2 0
      arch/sparc/kernel/systbls_64.S
  73. 23 13
      arch/sparc/kernel/unaligned_64.c
  74. 14 9
      arch/sparc/kernel/visemul.c
  75. 5 0
      arch/sparc/kernel/vmlinux.lds.S
  76. 2 0
      arch/sparc/kernel/winfixup.S
  77. 15 1
      arch/sparc/lib/atomic_64.S
  78. 1 0
      arch/sparc/lib/ksyms.c
  79. 1 1
      arch/sparc/math-emu/math_64.c
  80. 7 14
      arch/x86/include/asm/xen/hypercall.h
  81. 0 1
      arch/x86/include/asm/xen/hypervisor.h
  82. 34 26
      arch/x86/kvm/x86.c
  83. 20 1
      arch/x86/xen/mmu.c
  84. 2 0
      arch/xtensa/Kconfig
  85. 4 0
      arch/xtensa/include/asm/io.h
  86. 1 3
      arch/xtensa/include/asm/processor.h
  87. 1 1
      arch/xtensa/include/asm/syscall.h
  88. 5 10
      arch/xtensa/include/asm/unistd.h
  89. 4 12
      arch/xtensa/include/uapi/asm/unistd.h
  90. 13 44
      arch/xtensa/kernel/entry.S
  91. 71 57
      arch/xtensa/kernel/process.c
  92. 3 4
      arch/xtensa/kernel/syscall.c
  93. 0 1
      arch/xtensa/kernel/xtensa_ksyms.c
  94. 1 1
      block/Kconfig
  95. 10 0
      block/blk-cgroup.c
  96. 2 1
      block/blk-core.c
  97. 8 3
      crypto/cryptd.c
  98. 7 4
      drivers/acpi/video.c
  99. 7 0
      drivers/base/platform.c
  100. 8 7
      drivers/block/Kconfig

+ 6 - 6
Documentation/arm64/memory.txt

@@ -27,17 +27,17 @@ Start			End			Size		Use
 -----------------------------------------------------------------------
 0000000000000000	0000007fffffffff	 512GB		user
 
-ffffff8000000000	ffffffbbfffcffff	~240GB		vmalloc
+ffffff8000000000	ffffffbbfffeffff	~240GB		vmalloc
 
-ffffffbbfffd0000	ffffffbcfffdffff	  64KB		[guard page]
+ffffffbbffff0000	ffffffbbffffffff	  64KB		[guard page]
 
-ffffffbbfffe0000	ffffffbcfffeffff	  64KB		PCI I/O space
+ffffffbc00000000	ffffffbdffffffff	   8GB		vmemmap
 
-ffffffbbffff0000	ffffffbcffffffff	  64KB		[guard page]
+ffffffbe00000000	ffffffbffbbfffff	  ~8GB		[guard, future vmmemap]
 
-ffffffbc00000000	ffffffbdffffffff	   8GB		vmemmap
+ffffffbffbe00000	ffffffbffbe0ffff	  64KB		PCI I/O space
 
-ffffffbe00000000	ffffffbffbffffff	  ~8GB		[guard, future vmmemap]
+ffffffbbffff0000	ffffffbcffffffff	  ~2MB		[guard]
 
 ffffffbffc000000	ffffffbfffffffff	  64MB		modules
 

+ 19 - 0
Documentation/devicetree/bindings/input/touchscreen/egalax-ts.txt

@@ -0,0 +1,19 @@
+* EETI eGalax Multiple Touch Controller
+
+Required properties:
+- compatible: must be "eeti,egalax_ts"
+- reg: i2c slave address
+- interrupt-parent: the phandle for the interrupt controller
+- interrupts: touch controller interrupt
+- wakeup-gpios: the gpio pin to be used for waking up the controller
+  as well as uased as irq pin
+
+Example:
+
+	egalax_ts@04 {
+		compatible = "eeti,egalax_ts";
+		reg = <0x04>;
+		interrupt-parent = <&gpio1>;
+		interrupts = <9 2>;
+		wakeup-gpios = <&gpio1 9 0>;
+	};

+ 1 - 1
Documentation/hwmon/fam15h_power

@@ -10,7 +10,7 @@ Supported chips:
   BIOS and Kernel Developer's Guide (BKDG) For AMD Family 15h Processors
     (not yet published)
 
-Author: Andreas Herrmann <andreas.herrmann3@amd.com>
+Author: Andreas Herrmann <herrmann.der.user@googlemail.com>
 
 Description
 -----------

+ 3 - 2
MAINTAINERS

@@ -503,7 +503,7 @@ F:	include/linux/altera_uart.h
 F:	include/linux/altera_jtaguart.h
 
 AMD FAM15H PROCESSOR POWER MONITORING DRIVER
-M:	Andreas Herrmann <andreas.herrmann3@amd.com>
+M:	Andreas Herrmann <herrmann.der.user@googlemail.com>
 L:	lm-sensors@lm-sensors.org
 S:	Maintained
 F:	Documentation/hwmon/fam15h_power
@@ -2506,6 +2506,7 @@ M:	Joonyoung Shim <jy0922.shim@samsung.com>
 M:	Seung-Woo Kim <sw0312.kim@samsung.com>
 M:	Kyungmin Park <kyungmin.park@samsung.com>
 L:	dri-devel@lists.freedesktop.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos.git
 S:	Supported
 F:	drivers/gpu/drm/exynos
 F:	include/drm/exynos*
@@ -5646,7 +5647,7 @@ S:	Maintained
 F:	drivers/pinctrl/spear/
 
 PKTCDVD DRIVER
-M:	Peter Osterlund <petero2@telia.com>
+M:	Jiri Kosina <jkosina@suse.cz>
 S:	Maintained
 F:	drivers/block/pktcdvd.c
 F:	include/linux/pktcdvd.h

+ 1 - 1
Makefile

@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 7
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc5
 NAME = Terrified Chipmunk
 
 # *DOCUMENTATION*

+ 0 - 7
arch/arm/include/asm/dma-mapping.h

@@ -210,13 +210,6 @@ static inline void dma_free_writecombine(struct device *dev, size_t size,
  */
 extern void __init init_dma_coherent_pool_size(unsigned long size);
 
-/*
- * This can be called during boot to increase the size of the consistent
- * DMA region above it's default value of 2MB. It must be called before the
- * memory allocator is initialised, i.e. before any core_initcall.
- */
-static inline void init_consistent_dma_size(unsigned long size) { }
-
 /*
  * For SA-1111, IXP425, and ADI systems  the dma-mapping functions are "magic"
  * and utilize bounce buffers as needed to work around limited DMA windows.

+ 2 - 2
arch/arm/include/asm/io.h

@@ -64,7 +64,7 @@ extern void __raw_readsl(const void __iomem *addr, void *data, int longlen);
 static inline void __raw_writew(u16 val, volatile void __iomem *addr)
 {
 	asm volatile("strh %1, %0"
-		     : "+Qo" (*(volatile u16 __force *)addr)
+		     : "+Q" (*(volatile u16 __force *)addr)
 		     : "r" (val));
 }
 
@@ -72,7 +72,7 @@ static inline u16 __raw_readw(const volatile void __iomem *addr)
 {
 	u16 val;
 	asm volatile("ldrh %1, %0"
-		     : "+Qo" (*(volatile u16 __force *)addr),
+		     : "+Q" (*(volatile u16 __force *)addr),
 		       "=r" (val));
 	return val;
 }

+ 0 - 2
arch/arm/include/asm/sched_clock.h

@@ -10,7 +10,5 @@
 
 extern void sched_clock_postinit(void);
 extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
-extern void setup_sched_clock_needs_suspend(u32 (*read)(void), int bits,
-		unsigned long rate);
 
 #endif

+ 6 - 6
arch/arm/include/asm/vfpmacros.h

@@ -27,9 +27,9 @@
 #if __LINUX_ARM_ARCH__ <= 6
 	ldr	\tmp, =elf_hwcap		    @ may not have MVFR regs
 	ldr	\tmp, [\tmp, #0]
-	tst	\tmp, #HWCAP_VFPv3D16
-	ldceql	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d16-d31}
-	addne	\base, \base, #32*4		    @ step over unused register space
+	tst	\tmp, #HWCAP_VFPD32
+	ldcnel	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d16-d31}
+	addeq	\base, \base, #32*4		    @ step over unused register space
 #else
 	VFPFMRX	\tmp, MVFR0			    @ Media and VFP Feature Register 0
 	and	\tmp, \tmp, #MVFR0_A_SIMD_MASK	    @ A_SIMD field
@@ -51,9 +51,9 @@
 #if __LINUX_ARM_ARCH__ <= 6
 	ldr	\tmp, =elf_hwcap		    @ may not have MVFR regs
 	ldr	\tmp, [\tmp, #0]
-	tst	\tmp, #HWCAP_VFPv3D16
-	stceql	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d16-d31}
-	addne	\base, \base, #32*4		    @ step over unused register space
+	tst	\tmp, #HWCAP_VFPD32
+	stcnel	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d16-d31}
+	addeq	\base, \base, #32*4		    @ step over unused register space
 #else
 	VFPFMRX	\tmp, MVFR0			    @ Media and VFP Feature Register 0
 	and	\tmp, \tmp, #MVFR0_A_SIMD_MASK	    @ A_SIMD field

+ 2 - 1
arch/arm/include/uapi/asm/hwcap.h

@@ -18,11 +18,12 @@
 #define HWCAP_THUMBEE	(1 << 11)
 #define HWCAP_NEON	(1 << 12)
 #define HWCAP_VFPv3	(1 << 13)
-#define HWCAP_VFPv3D16	(1 << 14)
+#define HWCAP_VFPv3D16	(1 << 14)	/* also set for VFPv4-D16 */
 #define HWCAP_TLS	(1 << 15)
 #define HWCAP_VFPv4	(1 << 16)
 #define HWCAP_IDIVA	(1 << 17)
 #define HWCAP_IDIVT	(1 << 18)
+#define HWCAP_VFPD32	(1 << 19)	/* set if VFP has 32 regs (not 16) */
 #define HWCAP_IDIV	(HWCAP_IDIVA | HWCAP_IDIVT)
 
 

+ 4 - 14
arch/arm/kernel/sched_clock.c

@@ -107,13 +107,6 @@ static void sched_clock_poll(unsigned long wrap_ticks)
 	update_sched_clock();
 }
 
-void __init setup_sched_clock_needs_suspend(u32 (*read)(void), int bits,
-		unsigned long rate)
-{
-	setup_sched_clock(read, bits, rate);
-	cd.needs_suspend = true;
-}
-
 void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
 {
 	unsigned long r, w;
@@ -189,18 +182,15 @@ void __init sched_clock_postinit(void)
 static int sched_clock_suspend(void)
 {
 	sched_clock_poll(sched_clock_timer.data);
-	if (cd.needs_suspend)
-		cd.suspended = true;
+	cd.suspended = true;
 	return 0;
 }
 
 static void sched_clock_resume(void)
 {
-	if (cd.needs_suspend) {
-		cd.epoch_cyc = read_sched_clock();
-		cd.epoch_cyc_copy = cd.epoch_cyc;
-		cd.suspended = false;
-	}
+	cd.epoch_cyc = read_sched_clock();
+	cd.epoch_cyc_copy = cd.epoch_cyc;
+	cd.suspended = false;
 }
 
 static struct syscore_ops sched_clock_ops = {

+ 0 - 1
arch/arm/mach-at91/at91sam9g45.c

@@ -343,7 +343,6 @@ static struct at91_gpio_bank at91sam9g45_gpio[] __initdata = {
 static void __init at91sam9g45_map_io(void)
 {
 	at91_init_sram(0, AT91SAM9G45_SRAM_BASE, AT91SAM9G45_SRAM_SIZE);
-	init_consistent_dma_size(SZ_4M);
 }
 
 static void __init at91sam9g45_ioremap_registers(void)

+ 0 - 2
arch/arm/mach-davinci/common.c

@@ -87,8 +87,6 @@ void __init davinci_common_init(struct davinci_soc_info *soc_info)
 		iotable_init(davinci_soc_info.io_desc,
 				davinci_soc_info.io_desc_num);
 
-	init_consistent_dma_size(14 << 20);
-
 	/*
 	 * Normally devicemaps_init() would flush caches and tlb after
 	 * mdesc->map_io(), but we must also do it here because of the CPU

+ 0 - 1
arch/arm/mach-s3c64xx/common.c

@@ -155,7 +155,6 @@ void __init s3c64xx_init_io(struct map_desc *mach_desc, int size)
 	/* initialise the io descriptors we need for initialisation */
 	iotable_init(s3c_iodesc, ARRAY_SIZE(s3c_iodesc));
 	iotable_init(mach_desc, size);
-	init_consistent_dma_size(SZ_8M);
 
 	/* detect cpu id */
 	s3c64xx_init_cpu();

+ 0 - 2
arch/arm/mach-s5p64x0/common.c

@@ -187,7 +187,6 @@ void __init s5p6440_map_io(void)
 	s5p6440_default_sdhci2();
 
 	iotable_init(s5p6440_iodesc, ARRAY_SIZE(s5p6440_iodesc));
-	init_consistent_dma_size(SZ_8M);
 }
 
 void __init s5p6450_map_io(void)
@@ -202,7 +201,6 @@ void __init s5p6450_map_io(void)
 	s5p6450_default_sdhci2();
 
 	iotable_init(s5p6450_iodesc, ARRAY_SIZE(s5p6450_iodesc));
-	init_consistent_dma_size(SZ_8M);
 }
 
 /*

+ 0 - 2
arch/arm/mach-s5pv210/common.c

@@ -169,8 +169,6 @@ void __init s5pv210_init_io(struct map_desc *mach_desc, int size)
 
 void __init s5pv210_map_io(void)
 {
-	init_consistent_dma_size(14 << 20);
-
 	/* initialise device information early */
 	s5pv210_default_sdhci0();
 	s5pv210_default_sdhci1();

+ 0 - 6
arch/arm/mach-shmobile/setup-r8a7740.c

@@ -66,12 +66,6 @@ static struct map_desc r8a7740_io_desc[] __initdata = {
 void __init r8a7740_map_io(void)
 {
 	iotable_init(r8a7740_io_desc, ARRAY_SIZE(r8a7740_io_desc));
-
-	/*
-	 * DMA memory at 0xff200000 - 0xffdfffff. The default 2MB size isn't
-	 * enough to allocate the frame buffer memory.
-	 */
-	init_consistent_dma_size(12 << 20);
 }
 
 /* SCIFA0 */

+ 0 - 6
arch/arm/mach-shmobile/setup-sh7372.c

@@ -58,12 +58,6 @@ static struct map_desc sh7372_io_desc[] __initdata = {
 void __init sh7372_map_io(void)
 {
 	iotable_init(sh7372_io_desc, ARRAY_SIZE(sh7372_io_desc));
-
-	/*
-	 * DMA memory at 0xff200000 - 0xffdfffff. The default 2MB size isn't
-	 * enough to allocate the frame buffer memory.
-	 */
-	init_consistent_dma_size(12 << 20);
 }
 
 /* SCIFA0 */

+ 0 - 2
arch/arm/mach-u300/core.c

@@ -82,8 +82,6 @@ static struct map_desc u300_io_desc[] __initdata = {
 static void __init u300_map_io(void)
 {
 	iotable_init(u300_io_desc, ARRAY_SIZE(u300_io_desc));
-	/* We enable a real big DMA buffer if need be. */
-	init_consistent_dma_size(SZ_4M);
 }
 
 /*

+ 1 - 1
arch/arm/mm/alignment.c

@@ -745,7 +745,7 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
 static int
 do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 {
-	union offset_union offset;
+	union offset_union uninitialized_var(offset);
 	unsigned long instr = 0, instrptr;
 	int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
 	unsigned int type;

+ 6 - 3
arch/arm/vfp/vfpmodule.c

@@ -701,11 +701,14 @@ static int __init vfp_init(void)
 			elf_hwcap |= HWCAP_VFPv3;
 
 			/*
-			 * Check for VFPv3 D16. CPUs in this configuration
-			 * only have 16 x 64bit registers.
+			 * Check for VFPv3 D16 and VFPv4 D16.  CPUs in
+			 * this configuration only have 16 x 64bit
+			 * registers.
 			 */
 			if (((fmrx(MVFR0) & MVFR0_A_SIMD_MASK)) == 1)
-				elf_hwcap |= HWCAP_VFPv3D16;
+				elf_hwcap |= HWCAP_VFPv3D16; /* also v4-D16 */
+			else
+				elf_hwcap |= HWCAP_VFPD32;
 		}
 #endif
 		/*

+ 11 - 0
arch/arm/xen/enlighten.c

@@ -166,3 +166,14 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
 	*pages = NULL;
 }
 EXPORT_SYMBOL_GPL(free_xenballooned_pages);
+
+/* In the hypervisor.S file. */
+EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op);
+EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op);
+EXPORT_SYMBOL_GPL(HYPERVISOR_xen_version);
+EXPORT_SYMBOL_GPL(HYPERVISOR_console_io);
+EXPORT_SYMBOL_GPL(HYPERVISOR_sched_op);
+EXPORT_SYMBOL_GPL(HYPERVISOR_hvm_op);
+EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op);
+EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op);
+EXPORT_SYMBOL_GPL(privcmd_call);

+ 5 - 9
arch/arm/xen/hypercall.S

@@ -48,20 +48,16 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/opcodes-virt.h>
 #include <xen/interface/xen.h>
 
 
-/* HVC 0xEA1 */
-#ifdef CONFIG_THUMB2_KERNEL
-#define xen_hvc .word 0xf7e08ea1
-#else
-#define xen_hvc .word 0xe140ea71
-#endif
+#define XEN_IMM 0xEA1
 
 #define HYPERCALL_SIMPLE(hypercall)		\
 ENTRY(HYPERVISOR_##hypercall)			\
 	mov r12, #__HYPERVISOR_##hypercall;	\
-	xen_hvc;							\
+	__HVC(XEN_IMM);						\
 	mov pc, lr;							\
 ENDPROC(HYPERVISOR_##hypercall)
 
@@ -76,7 +72,7 @@ ENTRY(HYPERVISOR_##hypercall)			\
 	stmdb sp!, {r4}						\
 	ldr r4, [sp, #4]					\
 	mov r12, #__HYPERVISOR_##hypercall;	\
-	xen_hvc								\
+	__HVC(XEN_IMM);						\
 	ldm sp!, {r4}						\
 	mov pc, lr							\
 ENDPROC(HYPERVISOR_##hypercall)
@@ -100,7 +96,7 @@ ENTRY(privcmd_call)
 	mov r2, r3
 	ldr r3, [sp, #8]
 	ldr r4, [sp, #4]
-	xen_hvc
+	__HVC(XEN_IMM)
 	ldm sp!, {r4}
 	mov pc, lr
 ENDPROC(privcmd_call);

+ 1 - 0
arch/arm64/Kconfig

@@ -1,6 +1,7 @@
 config ARM64
 	def_bool y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_HARDIRQS_NO_DEPRECATED
 	select GENERIC_IOMAP

+ 1 - 4
arch/arm64/include/asm/elf.h

@@ -25,12 +25,10 @@
 #include <asm/user.h>
 
 typedef unsigned long elf_greg_t;
-typedef unsigned long elf_freg_t[3];
 
 #define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t))
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-typedef struct user_fp elf_fpregset_t;
+typedef struct user_fpsimd_state elf_fpregset_t;
 
 #define EM_AARCH64		183
 
@@ -87,7 +85,6 @@ typedef struct user_fp elf_fpregset_t;
 #define R_AARCH64_MOVW_PREL_G2_NC	292
 #define R_AARCH64_MOVW_PREL_G3		293
 
-
 /*
  * These are used to set parameters in the core dumps.
  */

+ 2 - 3
arch/arm64/include/asm/fpsimd.h

@@ -25,9 +25,8 @@
  *  - FPSR and FPCR
  *  - 32 128-bit data registers
  *
- * Note that user_fp forms a prefix of this structure, which is relied
- * upon in the ptrace FP/SIMD accessors. struct user_fpsimd_state must
- * form a prefix of struct fpsimd_state.
+ * Note that user_fpsimd forms a prefix of this structure, which is
+ * relied upon in the ptrace FP/SIMD accessors.
  */
 struct fpsimd_state {
 	union {

+ 4 - 4
arch/arm64/include/asm/io.h

@@ -114,7 +114,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
  *  I/O port access primitives.
  */
 #define IO_SPACE_LIMIT		0xffff
-#define PCI_IOBASE		((void __iomem *)0xffffffbbfffe0000UL)
+#define PCI_IOBASE		((void __iomem *)(MODULES_VADDR - SZ_2M))
 
 static inline u8 inb(unsigned long addr)
 {
@@ -225,9 +225,9 @@ extern void __iounmap(volatile void __iomem *addr);
 #define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_XN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC))
 
-#define ioremap(addr, size)		__ioremap((addr), (size), PROT_DEVICE_nGnRE)
-#define ioremap_nocache(addr, size)	__ioremap((addr), (size), PROT_DEVICE_nGnRE)
-#define ioremap_wc(addr, size)		__ioremap((addr), (size), PROT_NORMAL_NC)
+#define ioremap(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
+#define ioremap_nocache(addr, size)	__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
+#define ioremap_wc(addr, size)		__ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
 #define iounmap				__iounmap
 
 #define ARCH_HAS_IOREMAP_WC

+ 2 - 0
arch/arm64/include/asm/processor.h

@@ -43,6 +43,8 @@
 #else
 #define STACK_TOP		STACK_TOP_MAX
 #endif /* CONFIG_COMPAT */
+
+#define ARCH_LOW_ADDRESS_LIMIT	PHYS_MASK
 #endif /* __KERNEL__ */
 
 struct debug_info {

+ 0 - 1
arch/arm64/include/asm/unistd.h

@@ -14,7 +14,6 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #ifdef CONFIG_COMPAT
-#define __ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 #define __ARCH_WANT_COMPAT_STAT64
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_PAUSE

+ 2 - 8
arch/arm64/kernel/perf_event.c

@@ -613,17 +613,11 @@ enum armv8_pmuv3_perf_types {
 	ARMV8_PMUV3_PERFCTR_BUS_ACCESS				= 0x19,
 	ARMV8_PMUV3_PERFCTR_MEM_ERROR				= 0x1A,
 	ARMV8_PMUV3_PERFCTR_BUS_CYCLES				= 0x1D,
-
-	/*
-	 * This isn't an architected event.
-	 * We detect this event number and use the cycle counter instead.
-	 */
-	ARMV8_PMUV3_PERFCTR_CPU_CYCLES				= 0xFF,
 };
 
 /* PMUv3 HW events mapping. */
 static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
 	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
 	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
 	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
@@ -1106,7 +1100,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
 	unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT;
 
 	/* Always place a cycle counter into the cycle counter. */
-	if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
+	if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) {
 		if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
 			return -EAGAIN;
 

+ 0 - 18
arch/arm64/kernel/process.c

@@ -309,24 +309,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	return last;
 }
 
-/*
- * Fill in the task's elfregs structure for a core dump.
- */
-int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs)
-{
-	elf_core_copy_regs(elfregs, task_pt_regs(t));
-	return 1;
-}
-
-/*
- * fill in the fpe structure for a core dump...
- */
-int dump_fpu (struct pt_regs *regs, struct user_fp *fp)
-{
-	return 0;
-}
-EXPORT_SYMBOL(dump_fpu);
-
 /*
  * Shuffle the argument into the correct register before calling the
  * thread function.  x1 is the thread argument, x2 is the pointer to

+ 1 - 2
arch/arm64/kernel/smp.c

@@ -211,8 +211,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	 * before we continue.
 	 */
 	set_cpu_online(cpu, true);
-	while (!cpu_active(cpu))
-		cpu_relax();
+	complete(&cpu_running);
 
 	/*
 	 * OK, it's off to the idle thread for us

+ 1 - 1
arch/arm64/mm/init.c

@@ -80,7 +80,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 #ifdef CONFIG_ZONE_DMA32
 	/* 4GB maximum for 32-bit only capable devices */
 	max_dma32 = min(max, MAX_DMA32_PFN);
-	zone_size[ZONE_DMA32] = max_dma32 - min;
+	zone_size[ZONE_DMA32] = max(min, max_dma32) - min;
 #endif
 	zone_size[ZONE_NORMAL] = max - max_dma32;
 

+ 1 - 0
arch/frv/Kconfig

@@ -13,6 +13,7 @@ config FRV
 	select GENERIC_CPU_DEVICES
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 config ZONE_DMA
 	bool

+ 6 - 4
arch/frv/boot/Makefile

@@ -17,6 +17,8 @@ PARAMS_PHYS	 = 0x0207c000
 INITRD_PHYS	 = 0x02180000
 INITRD_VIRT	 = 0x02180000
 
+OBJCOPYFLAGS	:=-O binary -R .note -R .note.gnu.build-id -R .comment
+
 #
 # If you don't define ZRELADDR above,
 # then it defaults to ZTEXTADDR
@@ -32,18 +34,18 @@ Image: $(obj)/Image
 targets: $(obj)/Image
 
 $(obj)/Image: vmlinux FORCE
-	$(OBJCOPY) -O binary -R .note -R .comment -S vmlinux $@
+	$(OBJCOPY) $(OBJCOPYFLAGS) -S vmlinux $@
 
 #$(obj)/Image:	$(CONFIGURE) $(SYSTEM)
-#	$(OBJCOPY) -O binary -R .note -R .comment -g -S $(SYSTEM) $@
+#	$(OBJCOPY) $(OBJCOPYFLAGS) -g -S $(SYSTEM) $@
 
 bzImage: zImage
 
 zImage:	$(CONFIGURE) compressed/$(LINUX)
-	$(OBJCOPY) -O binary -R .note -R .comment -S compressed/$(LINUX) $@
+	$(OBJCOPY) $(OBJCOPYFLAGS) -S compressed/$(LINUX) $@
 
 bootpImage: bootp/bootp
-	$(OBJCOPY) -O binary -R .note -R .comment -S bootp/bootp $@
+	$(OBJCOPY) $(OBJCOPYFLAGS) -S bootp/bootp $@
 
 compressed/$(LINUX): $(LINUX) dep
 	@$(MAKE) -C compressed $(LINUX)

+ 0 - 1
arch/frv/include/asm/unistd.h

@@ -30,7 +30,6 @@
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_EXECVE
-#define __ARCH_WANT_KERNEL_EXECVE
 
 /*
  * "Conditional" syscalls

+ 3 - 25
arch/frv/kernel/entry.S

@@ -869,11 +869,6 @@ ret_from_kernel_thread:
 	call		schedule_tail
 	calll.p		@(gr21,gr0)
 	or		gr20,gr20,gr8
-	bra		sys_exit
-
-	.globl		ret_from_kernel_execve
-ret_from_kernel_execve:
-	ori		gr28,0,sp
 	bra		__syscall_exit
 
 ###################################################################################################
@@ -1080,27 +1075,10 @@ __entry_return_from_kernel_interrupt:
 	subicc		gr5,#0,gr0,icc0
 	beq		icc0,#0,__entry_return_direct
 
-__entry_preempt_need_resched:
-	ldi		@(gr15,#TI_FLAGS),gr4
-	andicc		gr4,#_TIF_NEED_RESCHED,gr0,icc0
-	beq		icc0,#1,__entry_return_direct
-
-	setlos		#PREEMPT_ACTIVE,gr5
-	sti		gr5,@(gr15,#TI_FLAGS)
-
-	andi		gr23,#~PSR_PIL,gr23
-	movgs		gr23,psr
-
-	call		schedule
-	sti		gr0,@(gr15,#TI_PRE_COUNT)
-
-	movsg		psr,gr23
-	ori		gr23,#PSR_PIL_14,gr23
-	movgs		gr23,psr
-	bra		__entry_preempt_need_resched
-#else
-	bra		__entry_return_direct
+	subcc		gr0,gr0,gr0,icc2		/* set Z and clear C */
+	call		preempt_schedule_irq
 #endif
+	bra		__entry_return_direct
 
 
 ###############################################################################

+ 3 - 2
arch/frv/kernel/process.c

@@ -181,6 +181,9 @@ int copy_thread(unsigned long clone_flags,
 	childregs = (struct pt_regs *)
 		(task_stack_page(p) + THREAD_SIZE - FRV_FRAME0_SIZE);
 
+	/* set up the userspace frame (the only place that the USP is stored) */
+	*childregs = *__kernel_frame0_ptr;
+
 	p->set_child_tid = p->clear_child_tid = NULL;
 
 	p->thread.frame	 = childregs;
@@ -191,10 +194,8 @@ int copy_thread(unsigned long clone_flags,
 	p->thread.frame0 = childregs;
 
 	if (unlikely(!regs)) {
-		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->gr9 = usp; /* function */
 		childregs->gr8 = arg;
-		childregs->psr = PSR_S;
 		p->thread.pc = (unsigned long) ret_from_kernel_thread;
 		save_user_regs(p->thread.user);
 		return 0;

+ 1 - 0
arch/frv/mb93090-mb00/pci-dma-nommu.c

@@ -11,6 +11,7 @@
 
 #include <linux/types.h>
 #include <linux/slab.h>
+#include <linux/export.h>
 #include <linux/dma-mapping.h>
 #include <linux/list.h>
 #include <linux/pci.h>

+ 2 - 1
arch/h8300/include/asm/cache.h

@@ -2,7 +2,8 @@
 #define __ARCH_H8300_CACHE_H
 
 /* bytes per L1 cache line */
-#define        L1_CACHE_BYTES  4
+#define        L1_CACHE_SHIFT  2
+#define        L1_CACHE_BYTES  (1 << L1_CACHE_SHIFT)
 
 /* m68k-elf-gcc  2.95.2 doesn't like these */
 

+ 2 - 0
arch/s390/include/asm/cio.h

@@ -9,6 +9,8 @@
 
 #define LPM_ANYPATH 0xff
 #define __MAX_CSSID 0
+#define __MAX_SUBCHANNEL 65535
+#define __MAX_SSID 3
 
 #include <asm/scsw.h>
 

+ 22 - 13
arch/s390/include/asm/pgtable.h

@@ -506,12 +506,15 @@ static inline int pud_bad(pud_t pud)
 
 static inline int pmd_present(pmd_t pmd)
 {
-	return (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) != 0UL;
+	unsigned long mask = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO;
+	return (pmd_val(pmd) & mask) == _HPAGE_TYPE_NONE ||
+	       !(pmd_val(pmd) & _SEGMENT_ENTRY_INV);
 }
 
 static inline int pmd_none(pmd_t pmd)
 {
-	return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) != 0UL;
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) &&
+	       !(pmd_val(pmd) & _SEGMENT_ENTRY_RO);
 }
 
 static inline int pmd_large(pmd_t pmd)
@@ -1223,6 +1226,11 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+#define SEGMENT_NONE	__pgprot(_HPAGE_TYPE_NONE)
+#define SEGMENT_RO	__pgprot(_HPAGE_TYPE_RO)
+#define SEGMENT_RW	__pgprot(_HPAGE_TYPE_RW)
+
 #define __HAVE_ARCH_PGTABLE_DEPOSIT
 extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
 
@@ -1242,16 +1250,15 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 
 static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
 {
-	unsigned long pgprot_pmd = 0;
-
-	if (pgprot_val(pgprot) & _PAGE_INVALID) {
-		if (pgprot_val(pgprot) & _PAGE_SWT)
-			pgprot_pmd |= _HPAGE_TYPE_NONE;
-		pgprot_pmd |= _SEGMENT_ENTRY_INV;
-	}
-	if (pgprot_val(pgprot) & _PAGE_RO)
-		pgprot_pmd |= _SEGMENT_ENTRY_RO;
-	return pgprot_pmd;
+	/*
+	 * pgprot is PAGE_NONE, PAGE_RO, or PAGE_RW (see __Pxxx / __Sxxx)
+	 * Convert to segment table entry format.
+	 */
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
+		return pgprot_val(SEGMENT_NONE);
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_RO))
+		return pgprot_val(SEGMENT_RO);
+	return pgprot_val(SEGMENT_RW);
 }
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
@@ -1269,7 +1276,9 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
 
 static inline pmd_t pmd_mkwrite(pmd_t pmd)
 {
-	pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
+	/* Do not clobber _HPAGE_TYPE_NONE pages! */
+	if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INV))
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
 	return pmd;
 }
 

+ 7 - 1
arch/s390/kernel/sclp.S

@@ -44,6 +44,12 @@ _sclp_wait_int:
 #endif
 	mvc	.LoldpswS1-.LbaseS1(16,%r13),0(%r8)
 	mvc	0(16,%r8),0(%r9)
+#ifdef CONFIG_64BIT
+	epsw	%r6,%r7				# set current addressing mode
+	nill	%r6,0x1				# in new psw (31 or 64 bit mode)
+	nilh	%r7,0x8000
+	stm	%r6,%r7,0(%r8)
+#endif
 	lhi	%r6,0x0200			# cr mask for ext int (cr0.54)
 	ltr	%r2,%r2
 	jz	.LsetctS1
@@ -87,7 +93,7 @@ _sclp_wait_int:
 	.long	0x00080000, 0x80000000+.LwaitS1	# PSW to handle ext int
 #ifdef CONFIG_64BIT
 .LextpswS1_64:
-	.quad	0x0000000180000000, .LwaitS1	# PSW to handle ext int, 64 bit
+	.quad	0, .LwaitS1			# PSW to handle ext int, 64 bit
 #endif
 .LwaitpswS1:
 	.long	0x010a0000, 0x00000000+.LloopS1	# PSW to wait for ext int

+ 1 - 1
arch/s390/lib/uaccess_pt.c

@@ -39,7 +39,7 @@ static __always_inline unsigned long follow_table(struct mm_struct *mm,
 	pmd = pmd_offset(pud, addr);
 	if (pmd_none(*pmd))
 		return -0x10UL;
-	if (pmd_huge(*pmd)) {
+	if (pmd_large(*pmd)) {
 		if (write && (pmd_val(*pmd) & _SEGMENT_ENTRY_RO))
 			return -0x04UL;
 		return (pmd_val(*pmd) & HPAGE_MASK) + (addr & ~HPAGE_MASK);

+ 1 - 1
arch/s390/mm/gup.c

@@ -126,7 +126,7 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
 		 */
 		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
 			return 0;
-		if (unlikely(pmd_huge(pmd))) {
+		if (unlikely(pmd_large(pmd))) {
 			if (!gup_huge_pmd(pmdp, pmd, addr, next,
 					  write, pages, nr))
 				return 0;

+ 1 - 0
arch/sparc/Kconfig

@@ -20,6 +20,7 @@ config SPARC
 	select HAVE_ARCH_TRACEHOOK
 	select SYSCTL_EXCEPTION_TRACE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select RTC_CLASS
 	select RTC_DRV_M48T59
 	select HAVE_IRQ_WORK

+ 8 - 8
arch/sparc/crypto/Makefile

@@ -13,13 +13,13 @@ obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o
 
 obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o
 
-sha1-sparc64-y := sha1_asm.o sha1_glue.o crop_devid.o
-sha256-sparc64-y := sha256_asm.o sha256_glue.o crop_devid.o
-sha512-sparc64-y := sha512_asm.o sha512_glue.o crop_devid.o
-md5-sparc64-y := md5_asm.o md5_glue.o crop_devid.o
+sha1-sparc64-y := sha1_asm.o sha1_glue.o
+sha256-sparc64-y := sha256_asm.o sha256_glue.o
+sha512-sparc64-y := sha512_asm.o sha512_glue.o
+md5-sparc64-y := md5_asm.o md5_glue.o
 
-aes-sparc64-y := aes_asm.o aes_glue.o crop_devid.o
-des-sparc64-y := des_asm.o des_glue.o crop_devid.o
-camellia-sparc64-y := camellia_asm.o camellia_glue.o crop_devid.o
+aes-sparc64-y := aes_asm.o aes_glue.o
+des-sparc64-y := des_asm.o des_glue.o
+camellia-sparc64-y := camellia_asm.o camellia_glue.o
 
-crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o crop_devid.o
+crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o

+ 2 - 0
arch/sparc/crypto/aes_glue.c

@@ -475,3 +475,5 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated");
 
 MODULE_ALIAS("aes");
+
+#include "crop_devid.c"

+ 2 - 0
arch/sparc/crypto/camellia_glue.c

@@ -320,3 +320,5 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated");
 
 MODULE_ALIAS("aes");
+
+#include "crop_devid.c"

+ 2 - 0
arch/sparc/crypto/crc32c_glue.c

@@ -177,3 +177,5 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
 
 MODULE_ALIAS("crc32c");
+
+#include "crop_devid.c"

+ 2 - 0
arch/sparc/crypto/des_glue.c

@@ -527,3 +527,5 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated");
 
 MODULE_ALIAS("des");
+
+#include "crop_devid.c"

+ 2 - 0
arch/sparc/crypto/md5_glue.c

@@ -186,3 +186,5 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated");
 
 MODULE_ALIAS("md5");
+
+#include "crop_devid.c"

+ 2 - 0
arch/sparc/crypto/sha1_glue.c

@@ -181,3 +181,5 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated");
 
 MODULE_ALIAS("sha1");
+
+#include "crop_devid.c"

+ 2 - 0
arch/sparc/crypto/sha256_glue.c

@@ -239,3 +239,5 @@ MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 op
 
 MODULE_ALIAS("sha224");
 MODULE_ALIAS("sha256");
+
+#include "crop_devid.c"

+ 2 - 0
arch/sparc/crypto/sha512_glue.c

@@ -224,3 +224,5 @@ MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 op
 
 MODULE_ALIAS("sha384");
 MODULE_ALIAS("sha512");
+
+#include "crop_devid.c"

+ 3 - 1
arch/sparc/include/asm/atomic_64.h

@@ -1,7 +1,7 @@
 /* atomic.h: Thankfully the V9 is at least reasonable for this
  *           stuff.
  *
- * Copyright (C) 1996, 1997, 2000 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1996, 1997, 2000, 2012 David S. Miller (davem@redhat.com)
  */
 
 #ifndef __ARCH_SPARC64_ATOMIC__
@@ -106,6 +106,8 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
 
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
 
+extern long atomic64_dec_if_positive(atomic64_t *v);
+
 /* Atomic operations are already serializing */
 #define smp_mb__before_atomic_dec()	barrier()
 #define smp_mb__after_atomic_dec()	barrier()

+ 59 - 10
arch/sparc/include/asm/backoff.h

@@ -1,6 +1,46 @@
 #ifndef _SPARC64_BACKOFF_H
 #define _SPARC64_BACKOFF_H
 
+/* The macros in this file implement an exponential backoff facility
+ * for atomic operations.
+ *
+ * When multiple threads compete on an atomic operation, it is
+ * possible for one thread to be continually denied a successful
+ * completion of the compare-and-swap instruction.  Heavily
+ * threaded cpu implementations like Niagara can compound this
+ * problem even further.
+ *
+ * When an atomic operation fails and needs to be retried, we spin a
+ * certain number of times.  At each subsequent failure of the same
+ * operation we double the spin count, realizing an exponential
+ * backoff.
+ *
+ * When we spin, we try to use an operation that will cause the
+ * current cpu strand to block, and therefore make the core fully
+ * available to any other other runnable strands.  There are two
+ * options, based upon cpu capabilities.
+ *
+ * On all cpus prior to SPARC-T4 we do three dummy reads of the
+ * condition code register.  Each read blocks the strand for something
+ * between 40 and 50 cpu cycles.
+ *
+ * For SPARC-T4 and later we have a special "pause" instruction
+ * available.  This is implemented using writes to register %asr27.
+ * The cpu will block the number of cycles written into the register,
+ * unless a disrupting trap happens first.  SPARC-T4 specifically
+ * implements pause with a granularity of 8 cycles.  Each strand has
+ * an internal pause counter which decrements every 8 cycles.  So the
+ * chip shifts the %asr27 value down by 3 bits, and writes the result
+ * into the pause counter.  If a value smaller than 8 is written, the
+ * chip blocks for 1 cycle.
+ *
+ * To achieve the same amount of backoff as the three %ccr reads give
+ * on earlier chips, we shift the backoff value up by 7 bits.  (Three
+ * %ccr reads block for about 128 cycles, 1 << 7 == 128) We write the
+ * whole amount we want to block into the pause register, rather than
+ * loop writing 128 each time.
+ */
+
 #define BACKOFF_LIMIT	(4 * 1024)
 
 #ifdef CONFIG_SMP
@@ -11,16 +51,25 @@
 #define BACKOFF_LABEL(spin_label, continue_label) \
 	spin_label
 
-#define BACKOFF_SPIN(reg, tmp, label)	\
-	mov	reg, tmp; \
-88:	brnz,pt	tmp, 88b; \
-	 sub	tmp, 1, tmp; \
-	set	BACKOFF_LIMIT, tmp; \
-	cmp	reg, tmp; \
-	bg,pn	%xcc, label; \
-	 nop; \
-	ba,pt	%xcc, label; \
-	 sllx	reg, 1, reg;
+#define BACKOFF_SPIN(reg, tmp, label)		\
+	mov		reg, tmp;		\
+88:	rd		%ccr, %g0;		\
+	rd		%ccr, %g0;		\
+	rd		%ccr, %g0;		\
+	.section	.pause_3insn_patch,"ax";\
+	.word		88b;			\
+	sllx		tmp, 7, tmp;		\
+	wr		tmp, 0, %asr27;		\
+	clr		tmp;			\
+	.previous;				\
+	brnz,pt		tmp, 88b;		\
+	 sub		tmp, 1, tmp;		\
+	set		BACKOFF_LIMIT, tmp;	\
+	cmp		reg, tmp;		\
+	bg,pn		%xcc, label;		\
+	 nop;					\
+	ba,pt		%xcc, label;		\
+	 sllx		reg, 1, reg;
 
 #else
 

+ 3 - 2
arch/sparc/include/asm/compat.h

@@ -232,9 +232,10 @@ static inline void __user *arch_compat_alloc_user_space(long len)
 	struct pt_regs *regs = current_thread_info()->kregs;
 	unsigned long usp = regs->u_regs[UREG_I6];
 
-	if (!(test_thread_flag(TIF_32BIT)))
+	if (test_thread_64bit_stack(usp))
 		usp += STACK_BIAS;
-	else
+
+	if (test_thread_flag(TIF_32BIT))
 		usp &= 0xffffffffUL;
 
 	usp -= len;

+ 16 - 1
arch/sparc/include/asm/processor_64.h

@@ -196,7 +196,22 @@ extern unsigned long get_wchan(struct task_struct *task);
 #define KSTK_EIP(tsk)  (task_pt_regs(tsk)->tpc)
 #define KSTK_ESP(tsk)  (task_pt_regs(tsk)->u_regs[UREG_FP])
 
-#define cpu_relax()	barrier()
+/* Please see the commentary in asm/backoff.h for a description of
+ * what these instructions are doing and how they have been choosen.
+ * To make a long story short, we are trying to yield the current cpu
+ * strand during busy loops.
+ */
+#define cpu_relax()	asm volatile("\n99:\n\t"			\
+				     "rd	%%ccr, %%g0\n\t"	\
+				     "rd	%%ccr, %%g0\n\t"	\
+				     "rd	%%ccr, %%g0\n\t"	\
+				     ".section	.pause_3insn_patch,\"ax\"\n\t"\
+				     ".word	99b\n\t"		\
+				     "wr	%%g0, 128, %%asr27\n\t"	\
+				     "nop\n\t"				\
+				     "nop\n\t"				\
+				     ".previous"			\
+				     ::: "memory")
 
 /* Prefetch support.  This is tuned for UltraSPARC-III and later.
  * UltraSPARC-I will treat these as nops, and UltraSPARC-II has

+ 5 - 0
arch/sparc/include/asm/prom.h

@@ -63,5 +63,10 @@ extern char *of_console_options;
 extern void irq_trans_init(struct device_node *dp);
 extern char *build_path_component(struct device_node *dp);
 
+/* SPARC has a local implementation */
+extern int of_address_to_resource(struct device_node *dev, int index,
+				  struct resource *r);
+#define of_address_to_resource of_address_to_resource
+
 #endif /* __KERNEL__ */
 #endif /* _SPARC_PROM_H */

+ 5 - 0
arch/sparc/include/asm/thread_info_64.h

@@ -259,6 +259,11 @@ static inline bool test_and_clear_restore_sigmask(void)
 
 #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
 
+#define thread32_stack_is_64bit(__SP) (((__SP) & 0x1) != 0)
+#define test_thread_64bit_stack(__SP) \
+	((test_thread_flag(TIF_32BIT) && !thread32_stack_is_64bit(__SP)) ? \
+	 false : true)
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __KERNEL__ */

+ 16 - 8
arch/sparc/include/asm/ttable.h

@@ -372,7 +372,9 @@ etrap_spill_fixup_64bit:				\
 
 /* Normal 32bit spill */
 #define SPILL_2_GENERIC(ASI)				\
-	srl	%sp, 0, %sp;				\
+	and	%sp, 1, %g3;				\
+	brnz,pn	%g3, (. - (128 + 4));			\
+	 srl	%sp, 0, %sp;				\
 	stwa	%l0, [%sp + %g0] ASI;			\
 	mov	0x04, %g3;				\
 	stwa	%l1, [%sp + %g3] ASI;			\
@@ -398,14 +400,16 @@ etrap_spill_fixup_64bit:				\
 	stwa	%i6, [%g1 + %g0] ASI;			\
 	stwa	%i7, [%g1 + %g3] ASI;			\
 	saved;						\
-        retry; nop; nop;				\
+        retry;						\
 	b,a,pt	%xcc, spill_fixup_dax;			\
 	b,a,pt	%xcc, spill_fixup_mna;			\
 	b,a,pt	%xcc, spill_fixup;
 
 #define SPILL_2_GENERIC_ETRAP		\
 etrap_user_spill_32bit:			\
-	srl	%sp, 0, %sp;		\
+	and	%sp, 1, %g3;		\
+	brnz,pn	%g3, etrap_user_spill_64bit;	\
+	 srl	%sp, 0, %sp;		\
 	stwa	%l0, [%sp + 0x00] %asi;	\
 	stwa	%l1, [%sp + 0x04] %asi;	\
 	stwa	%l2, [%sp + 0x08] %asi;	\
@@ -427,7 +431,7 @@ etrap_user_spill_32bit:			\
 	ba,pt	%xcc, etrap_save;	\
 	 wrpr	%g1, %cwp;		\
 	nop; nop; nop; nop;		\
-	nop; nop; nop; nop;		\
+	nop; nop;			\
 	ba,a,pt	%xcc, etrap_spill_fixup_32bit; \
 	ba,a,pt	%xcc, etrap_spill_fixup_32bit; \
 	ba,a,pt	%xcc, etrap_spill_fixup_32bit;
@@ -592,7 +596,9 @@ user_rtt_fill_64bit:					\
 
 /* Normal 32bit fill */
 #define FILL_2_GENERIC(ASI)				\
-	srl	%sp, 0, %sp;				\
+	and	%sp, 1, %g3;				\
+	brnz,pn	%g3, (. - (128 + 4));			\
+	 srl	%sp, 0, %sp;				\
 	lduwa	[%sp + %g0] ASI, %l0;			\
 	mov	0x04, %g2;				\
 	mov	0x08, %g3;				\
@@ -616,14 +622,16 @@ user_rtt_fill_64bit:					\
 	lduwa	[%g1 + %g3] ASI, %i6;			\
 	lduwa	[%g1 + %g5] ASI, %i7;			\
 	restored;					\
-	retry; nop; nop; nop; nop;			\
+	retry; nop; nop;				\
 	b,a,pt	%xcc, fill_fixup_dax;			\
 	b,a,pt	%xcc, fill_fixup_mna;			\
 	b,a,pt	%xcc, fill_fixup;
 
 #define FILL_2_GENERIC_RTRAP				\
 user_rtt_fill_32bit:					\
-	srl	%sp, 0, %sp;				\
+	and	%sp, 1, %g3;				\
+	brnz,pn	%g3, user_rtt_fill_64bit;		\
+	 srl	%sp, 0, %sp;				\
 	lduwa	[%sp + 0x00] %asi, %l0;			\
 	lduwa	[%sp + 0x04] %asi, %l1;			\
 	lduwa	[%sp + 0x08] %asi, %l2;			\
@@ -643,7 +651,7 @@ user_rtt_fill_32bit:					\
 	ba,pt	%xcc, user_rtt_pre_restore;		\
 	 restored;					\
 	nop; nop; nop; nop; nop;			\
-	nop; nop; nop; nop; nop;			\
+	nop; nop; nop;					\
 	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
 	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
 	ba,a,pt	%xcc, user_rtt_fill_fixup;

+ 6 - 1
arch/sparc/include/uapi/asm/unistd.h

@@ -405,8 +405,13 @@
 #define __NR_setns		337
 #define __NR_process_vm_readv	338
 #define __NR_process_vm_writev	339
+#define __NR_kern_features	340
+#define __NR_kcmp		341
 
-#define NR_syscalls		340
+#define NR_syscalls		342
+
+/* Bitmask values returned from kern_features system call.  */
+#define KERN_FEATURE_MIXED_MODE_STACK	0x00000001
 
 #ifdef __32bit_syscall_numbers__
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,

+ 7 - 0
arch/sparc/kernel/entry.h

@@ -59,6 +59,13 @@ struct popc_6insn_patch_entry {
 extern struct popc_6insn_patch_entry __popc_6insn_patch,
 	__popc_6insn_patch_end;
 
+struct pause_patch_entry {
+	unsigned int	addr;
+	unsigned int	insns[3];
+};
+extern struct pause_patch_entry __pause_3insn_patch,
+	__pause_3insn_patch_end;
+
 extern void __init per_cpu_patch(void);
 extern void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
 				    struct sun4v_1insn_patch_entry *);

+ 4 - 2
arch/sparc/kernel/leon_kernel.c

@@ -56,11 +56,13 @@ static inline unsigned int leon_eirq_get(int cpu)
 static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc)
 {
 	unsigned int eirq;
+	struct irq_bucket *p;
 	int cpu = sparc_leon3_cpuid();
 
 	eirq = leon_eirq_get(cpu);
-	if ((eirq & 0x10) && irq_map[eirq]->irq) /* bit4 tells if IRQ happened */
-		generic_handle_irq(irq_map[eirq]->irq);
+	p = irq_map[eirq];
+	if ((eirq & 0x10) && p && p->irq) /* bit4 tells if IRQ happened */
+		generic_handle_irq(p->irq);
 }
 
 /* The extended IRQ controller has been found, this function registers it */

+ 16 - 6
arch/sparc/kernel/perf_event.c

@@ -1762,15 +1762,25 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
 
 	ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
 	do {
-		struct sparc_stackf32 *usf, sf;
 		unsigned long pc;
 
-		usf = (struct sparc_stackf32 *) ufp;
-		if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
-			break;
+		if (thread32_stack_is_64bit(ufp)) {
+			struct sparc_stackf *usf, sf;
 
-		pc = sf.callers_pc;
-		ufp = (unsigned long)sf.fp;
+			ufp += STACK_BIAS;
+			usf = (struct sparc_stackf *) ufp;
+			if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+				break;
+			pc = sf.callers_pc & 0xffffffff;
+			ufp = ((unsigned long) sf.fp) & 0xffffffff;
+		} else {
+			struct sparc_stackf32 *usf, sf;
+			usf = (struct sparc_stackf32 *) ufp;
+			if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+				break;
+			pc = sf.callers_pc;
+			ufp = (unsigned long)sf.fp;
+		}
 		perf_callchain_store(entry, pc);
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 }

+ 23 - 19
arch/sparc/kernel/process_64.c

@@ -452,13 +452,16 @@ void flush_thread(void)
 /* It's a bit more tricky when 64-bit tasks are involved... */
 static unsigned long clone_stackframe(unsigned long csp, unsigned long psp)
 {
+	bool stack_64bit = test_thread_64bit_stack(psp);
 	unsigned long fp, distance, rval;
 
-	if (!(test_thread_flag(TIF_32BIT))) {
+	if (stack_64bit) {
 		csp += STACK_BIAS;
 		psp += STACK_BIAS;
 		__get_user(fp, &(((struct reg_window __user *)psp)->ins[6]));
 		fp += STACK_BIAS;
+		if (test_thread_flag(TIF_32BIT))
+			fp &= 0xffffffff;
 	} else
 		__get_user(fp, &(((struct reg_window32 __user *)psp)->ins[6]));
 
@@ -472,7 +475,7 @@ static unsigned long clone_stackframe(unsigned long csp, unsigned long psp)
 	rval = (csp - distance);
 	if (copy_in_user((void __user *) rval, (void __user *) psp, distance))
 		rval = 0;
-	else if (test_thread_flag(TIF_32BIT)) {
+	else if (!stack_64bit) {
 		if (put_user(((u32)csp),
 			     &(((struct reg_window32 __user *)rval)->ins[6])))
 			rval = 0;
@@ -507,18 +510,18 @@ void synchronize_user_stack(void)
 
 	flush_user_windows();
 	if ((window = get_thread_wsaved()) != 0) {
-		int winsize = sizeof(struct reg_window);
-		int bias = 0;
-
-		if (test_thread_flag(TIF_32BIT))
-			winsize = sizeof(struct reg_window32);
-		else
-			bias = STACK_BIAS;
-
 		window -= 1;
 		do {
-			unsigned long sp = (t->rwbuf_stkptrs[window] + bias);
 			struct reg_window *rwin = &t->reg_window[window];
+			int winsize = sizeof(struct reg_window);
+			unsigned long sp;
+
+			sp = t->rwbuf_stkptrs[window];
+
+			if (test_thread_64bit_stack(sp))
+				sp += STACK_BIAS;
+			else
+				winsize = sizeof(struct reg_window32);
 
 			if (!copy_to_user((char __user *)sp, rwin, winsize)) {
 				shift_window_buffer(window, get_thread_wsaved() - 1, t);
@@ -544,13 +547,6 @@ void fault_in_user_windows(void)
 {
 	struct thread_info *t = current_thread_info();
 	unsigned long window;
-	int winsize = sizeof(struct reg_window);
-	int bias = 0;
-
-	if (test_thread_flag(TIF_32BIT))
-		winsize = sizeof(struct reg_window32);
-	else
-		bias = STACK_BIAS;
 
 	flush_user_windows();
 	window = get_thread_wsaved();
@@ -558,8 +554,16 @@ void fault_in_user_windows(void)
 	if (likely(window != 0)) {
 		window -= 1;
 		do {
-			unsigned long sp = (t->rwbuf_stkptrs[window] + bias);
 			struct reg_window *rwin = &t->reg_window[window];
+			int winsize = sizeof(struct reg_window);
+			unsigned long sp;
+
+			sp = t->rwbuf_stkptrs[window];
+
+			if (test_thread_64bit_stack(sp))
+				sp += STACK_BIAS;
+			else
+				winsize = sizeof(struct reg_window32);
 
 			if (unlikely(sp & 0x7UL))
 				stack_unaligned(sp);

+ 2 - 2
arch/sparc/kernel/ptrace_64.c

@@ -151,7 +151,7 @@ static int regwindow64_get(struct task_struct *target,
 {
 	unsigned long rw_addr = regs->u_regs[UREG_I6];
 
-	if (test_tsk_thread_flag(current, TIF_32BIT)) {
+	if (!test_thread_64bit_stack(rw_addr)) {
 		struct reg_window32 win32;
 		int i;
 
@@ -176,7 +176,7 @@ static int regwindow64_set(struct task_struct *target,
 {
 	unsigned long rw_addr = regs->u_regs[UREG_I6];
 
-	if (test_tsk_thread_flag(current, TIF_32BIT)) {
+	if (!test_thread_64bit_stack(rw_addr)) {
 		struct reg_window32 win32;
 		int i;
 

+ 21 - 0
arch/sparc/kernel/setup_64.c

@@ -316,6 +316,25 @@ static void __init popc_patch(void)
 	}
 }
 
+static void __init pause_patch(void)
+{
+	struct pause_patch_entry *p;
+
+	p = &__pause_3insn_patch;
+	while (p < &__pause_3insn_patch_end) {
+		unsigned long i, addr = p->addr;
+
+		for (i = 0; i < 3; i++) {
+			*(unsigned int *) (addr +  (i * 4)) = p->insns[i];
+			wmb();
+			__asm__ __volatile__("flush	%0"
+					     : : "r" (addr +  (i * 4)));
+		}
+
+		p++;
+	}
+}
+
 #ifdef CONFIG_SMP
 void __init boot_cpu_id_too_large(int cpu)
 {
@@ -528,6 +547,8 @@ static void __init init_sparc64_elf_hwcap(void)
 
 	if (sparc64_elf_hwcap & AV_SPARC_POPC)
 		popc_patch();
+	if (sparc64_elf_hwcap & AV_SPARC_PAUSE)
+		pause_patch();
 }
 
 void __init setup_arch(char **cmdline_p)

+ 5 - 0
arch/sparc/kernel/sys_sparc_64.c

@@ -751,3 +751,8 @@ int kernel_execve(const char *filename,
 		      : "cc");
 	return __res;
 }
+
+asmlinkage long sys_kern_features(void)
+{
+	return KERN_FEATURE_MIXED_MODE_STACK;
+}

+ 1 - 0
arch/sparc/kernel/systbls_32.S

@@ -85,3 +85,4 @@ sys_call_table:
 /*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
 /*335*/	.long sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
+/*340*/	.long sys_ni_syscall, sys_kcmp

+ 2 - 0
arch/sparc/kernel/systbls_64.S

@@ -86,6 +86,7 @@ sys_call_table32:
 	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
 	.word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev
+/*340*/	.word sys_kern_features, sys_kcmp
 
 #endif /* CONFIG_COMPAT */
 
@@ -163,3 +164,4 @@ sys_call_table:
 	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
 	.word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
+/*340*/	.word sys_kern_features, sys_kcmp

+ 23 - 13
arch/sparc/kernel/unaligned_64.c

@@ -113,21 +113,24 @@ static inline long sign_extend_imm13(long imm)
 
 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
 {
-	unsigned long value;
+	unsigned long value, fp;
 	
 	if (reg < 16)
 		return (!reg ? 0 : regs->u_regs[reg]);
+
+	fp = regs->u_regs[UREG_FP];
+
 	if (regs->tstate & TSTATE_PRIV) {
 		struct reg_window *win;
-		win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window *)(fp + STACK_BIAS);
 		value = win->locals[reg - 16];
-	} else if (test_thread_flag(TIF_32BIT)) {
+	} else if (!test_thread_64bit_stack(fp)) {
 		struct reg_window32 __user *win32;
-		win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+		win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
 		get_user(value, &win32->locals[reg - 16]);
 	} else {
 		struct reg_window __user *win;
-		win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window __user *)(fp + STACK_BIAS);
 		get_user(value, &win->locals[reg - 16]);
 	}
 	return value;
@@ -135,19 +138,24 @@ static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
 
 static unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs)
 {
+	unsigned long fp;
+
 	if (reg < 16)
 		return &regs->u_regs[reg];
+
+	fp = regs->u_regs[UREG_FP];
+
 	if (regs->tstate & TSTATE_PRIV) {
 		struct reg_window *win;
-		win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window *)(fp + STACK_BIAS);
 		return &win->locals[reg - 16];
-	} else if (test_thread_flag(TIF_32BIT)) {
+	} else if (!test_thread_64bit_stack(fp)) {
 		struct reg_window32 *win32;
-		win32 = (struct reg_window32 *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+		win32 = (struct reg_window32 *)((unsigned long)((u32)fp));
 		return (unsigned long *)&win32->locals[reg - 16];
 	} else {
 		struct reg_window *win;
-		win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window *)(fp + STACK_BIAS);
 		return &win->locals[reg - 16];
 	}
 }
@@ -392,13 +400,15 @@ int handle_popc(u32 insn, struct pt_regs *regs)
 		if (rd)
 			regs->u_regs[rd] = ret;
 	} else {
-		if (test_thread_flag(TIF_32BIT)) {
+		unsigned long fp = regs->u_regs[UREG_FP];
+
+		if (!test_thread_64bit_stack(fp)) {
 			struct reg_window32 __user *win32;
-			win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+			win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
 			put_user(ret, &win32->locals[rd - 16]);
 		} else {
 			struct reg_window __user *win;
-			win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+			win = (struct reg_window __user *)(fp + STACK_BIAS);
 			put_user(ret, &win->locals[rd - 16]);
 		}
 	}
@@ -554,7 +564,7 @@ void handle_ld_nf(u32 insn, struct pt_regs *regs)
 		reg[0] = 0;
 		if ((insn & 0x780000) == 0x180000)
 			reg[1] = 0;
-	} else if (test_thread_flag(TIF_32BIT)) {
+	} else if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) {
 		put_user(0, (int __user *) reg);
 		if ((insn & 0x780000) == 0x180000)
 			put_user(0, ((int __user *) reg) + 1);

+ 14 - 9
arch/sparc/kernel/visemul.c

@@ -149,21 +149,24 @@ static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
 
 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
 {
-	unsigned long value;
+	unsigned long value, fp;
 	
 	if (reg < 16)
 		return (!reg ? 0 : regs->u_regs[reg]);
+
+	fp = regs->u_regs[UREG_FP];
+
 	if (regs->tstate & TSTATE_PRIV) {
 		struct reg_window *win;
-		win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window *)(fp + STACK_BIAS);
 		value = win->locals[reg - 16];
-	} else if (test_thread_flag(TIF_32BIT)) {
+	} else if (!test_thread_64bit_stack(fp)) {
 		struct reg_window32 __user *win32;
-		win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+		win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
 		get_user(value, &win32->locals[reg - 16]);
 	} else {
 		struct reg_window __user *win;
-		win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window __user *)(fp + STACK_BIAS);
 		get_user(value, &win->locals[reg - 16]);
 	}
 	return value;
@@ -172,16 +175,18 @@ static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
 static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
 							  struct pt_regs *regs)
 {
+	unsigned long fp = regs->u_regs[UREG_FP];
+
 	BUG_ON(reg < 16);
 	BUG_ON(regs->tstate & TSTATE_PRIV);
 
-	if (test_thread_flag(TIF_32BIT)) {
+	if (!test_thread_64bit_stack(fp)) {
 		struct reg_window32 __user *win32;
-		win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+		win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
 		return (unsigned long __user *)&win32->locals[reg - 16];
 	} else {
 		struct reg_window __user *win;
-		win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window __user *)(fp + STACK_BIAS);
 		return &win->locals[reg - 16];
 	}
 }
@@ -204,7 +209,7 @@ static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
 	} else {
 		unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
 
-		if (test_thread_flag(TIF_32BIT))
+		if (!test_thread_64bit_stack(regs->u_regs[UREG_FP]))
 			__put_user((u32)val, (u32 __user *)rd_user);
 		else
 			__put_user(val, rd_user);

+ 5 - 0
arch/sparc/kernel/vmlinux.lds.S

@@ -132,6 +132,11 @@ SECTIONS
 		*(.popc_6insn_patch)
 		__popc_6insn_patch_end = .;
 	}
+	.pause_3insn_patch : {
+		__pause_3insn_patch = .;
+		*(.pause_3insn_patch)
+		__pause_3insn_patch_end = .;
+	}
 	PERCPU_SECTION(SMP_CACHE_BYTES)
 
 	. = ALIGN(PAGE_SIZE);

+ 2 - 0
arch/sparc/kernel/winfixup.S

@@ -43,6 +43,8 @@ spill_fixup_mna:
 spill_fixup_dax:
 	TRAP_LOAD_THREAD_REG(%g6, %g1)
 	ldx	[%g6 + TI_FLAGS], %g1
+	andcc	%sp, 0x1, %g0
+	movne	%icc, 0, %g1
 	andcc	%g1, _TIF_32BIT, %g0
 	ldub	[%g6 + TI_WSAVED], %g1
 	sll	%g1, 3, %g3

+ 15 - 1
arch/sparc/lib/atomic_64.S

@@ -1,6 +1,6 @@
 /* atomic.S: These things are too big to do inline.
  *
- * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net)
  */
 
 #include <linux/linkage.h>
@@ -117,3 +117,17 @@ ENTRY(atomic64_sub_ret) /* %o0 = decrement, %o1 = atomic_ptr */
 	 sub	%g1, %o0, %o0
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
 ENDPROC(atomic64_sub_ret)
+
+ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */
+	BACKOFF_SETUP(%o2)
+1:	ldx	[%o0], %g1
+	brlez,pn %g1, 3f
+	 sub	%g1, 1, %g7
+	casx	[%o0], %g1, %g7
+	cmp	%g1, %g7
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
+	 nop
+3:	retl
+	 sub	%g1, 1, %o0
+2:	BACKOFF_SPIN(%o2, %o3, 1b)
+ENDPROC(atomic64_dec_if_positive)

+ 1 - 0
arch/sparc/lib/ksyms.c

@@ -116,6 +116,7 @@ EXPORT_SYMBOL(atomic64_add);
 EXPORT_SYMBOL(atomic64_add_ret);
 EXPORT_SYMBOL(atomic64_sub);
 EXPORT_SYMBOL(atomic64_sub_ret);
+EXPORT_SYMBOL(atomic64_dec_if_positive);
 
 /* Atomic bit operations. */
 EXPORT_SYMBOL(test_and_set_bit);

+ 1 - 1
arch/sparc/math-emu/math_64.c

@@ -320,7 +320,7 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f, bool illegal_insn_trap)
 					XR = 0;
 				else if (freg < 16)
 					XR = regs->u_regs[freg];
-				else if (test_thread_flag(TIF_32BIT)) {
+				else if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) {
 					struct reg_window32 __user *win32;
 					flushw_user ();
 					win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));

+ 7 - 14
arch/x86/include/asm/xen/hypercall.h

@@ -359,18 +359,14 @@ HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
 		return _hypercall4(int, update_va_mapping, va,
 				   new_val.pte, new_val.pte >> 32, flags);
 }
+extern int __must_check xen_event_channel_op_compat(int, void *);
 
 static inline int
 HYPERVISOR_event_channel_op(int cmd, void *arg)
 {
 	int rc = _hypercall2(int, event_channel_op, cmd, arg);
-	if (unlikely(rc == -ENOSYS)) {
-		struct evtchn_op op;
-		op.cmd = cmd;
-		memcpy(&op.u, arg, sizeof(op.u));
-		rc = _hypercall1(int, event_channel_op_compat, &op);
-		memcpy(arg, &op.u, sizeof(op.u));
-	}
+	if (unlikely(rc == -ENOSYS))
+		rc = xen_event_channel_op_compat(cmd, arg);
 	return rc;
 }
 
@@ -386,17 +382,14 @@ HYPERVISOR_console_io(int cmd, int count, char *str)
 	return _hypercall3(int, console_io, cmd, count, str);
 }
 
+extern int __must_check HYPERVISOR_physdev_op_compat(int, void *);
+
 static inline int
 HYPERVISOR_physdev_op(int cmd, void *arg)
 {
 	int rc = _hypercall2(int, physdev_op, cmd, arg);
-	if (unlikely(rc == -ENOSYS)) {
-		struct physdev_op op;
-		op.cmd = cmd;
-		memcpy(&op.u, arg, sizeof(op.u));
-		rc = _hypercall1(int, physdev_op_compat, &op);
-		memcpy(arg, &op.u, sizeof(op.u));
-	}
+	if (unlikely(rc == -ENOSYS))
+		rc = HYPERVISOR_physdev_op_compat(cmd, arg);
 	return rc;
 }
 

+ 0 - 1
arch/x86/include/asm/xen/hypervisor.h

@@ -33,7 +33,6 @@
 #ifndef _ASM_X86_XEN_HYPERVISOR_H
 #define _ASM_X86_XEN_HYPERVISOR_H
 
-/* arch/i386/kernel/setup.c */
 extern struct shared_info *HYPERVISOR_shared_info;
 extern struct start_info *xen_start_info;
 

+ 34 - 26
arch/x86/kvm/x86.c

@@ -3779,7 +3779,7 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
 {
 	struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
 
-	memcpy(vcpu->run->mmio.data, frag->data, frag->len);
+	memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
 	return X86EMUL_CONTINUE;
 }
 
@@ -3832,18 +3832,11 @@ mmio:
 	bytes -= handled;
 	val += handled;
 
-	while (bytes) {
-		unsigned now = min(bytes, 8U);
-
-		frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
-		frag->gpa = gpa;
-		frag->data = val;
-		frag->len = now;
-
-		gpa += now;
-		val += now;
-		bytes -= now;
-	}
+	WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
+	frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
+	frag->gpa = gpa;
+	frag->data = val;
+	frag->len = bytes;
 	return X86EMUL_CONTINUE;
 }
 
@@ -3890,7 +3883,7 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_cur_fragment = 0;
 
-	vcpu->run->mmio.len = vcpu->mmio_fragments[0].len;
+	vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
 	vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
 	vcpu->run->exit_reason = KVM_EXIT_MMIO;
 	vcpu->run->mmio.phys_addr = gpa;
@@ -5522,28 +5515,44 @@ static int complete_emulated_pio(struct kvm_vcpu *vcpu)
  *
  * read:
  *   for each fragment
- *     write gpa, len
- *     exit
- *     copy data
+ *     for each mmio piece in the fragment
+ *       write gpa, len
+ *       exit
+ *       copy data
  *   execute insn
  *
  * write:
  *   for each fragment
- *      write gpa, len
- *      copy data
- *      exit
+ *     for each mmio piece in the fragment
+ *       write gpa, len
+ *       copy data
+ *       exit
  */
 static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	struct kvm_mmio_fragment *frag;
+	unsigned len;
 
 	BUG_ON(!vcpu->mmio_needed);
 
 	/* Complete previous fragment */
-	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
+	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
+	len = min(8u, frag->len);
 	if (!vcpu->mmio_is_write)
-		memcpy(frag->data, run->mmio.data, frag->len);
+		memcpy(frag->data, run->mmio.data, len);
+
+	if (frag->len <= 8) {
+		/* Switch to the next fragment. */
+		frag++;
+		vcpu->mmio_cur_fragment++;
+	} else {
+		/* Go forward to the next mmio piece. */
+		frag->data += len;
+		frag->gpa += len;
+		frag->len -= len;
+	}
+
 	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
 		vcpu->mmio_needed = 0;
 		if (vcpu->mmio_is_write)
@@ -5551,13 +5560,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 		vcpu->mmio_read_completed = 1;
 		return complete_emulated_io(vcpu);
 	}
-	/* Initiate next fragment */
-	++frag;
+
 	run->exit_reason = KVM_EXIT_MMIO;
 	run->mmio.phys_addr = frag->gpa;
 	if (vcpu->mmio_is_write)
-		memcpy(run->mmio.data, frag->data, frag->len);
-	run->mmio.len = frag->len;
+		memcpy(run->mmio.data, frag->data, min(8u, frag->len));
+	run->mmio.len = min(8u, frag->len);
 	run->mmio.is_write = vcpu->mmio_is_write;
 	vcpu->arch.complete_userspace_io = complete_emulated_mmio;
 	return 0;

+ 20 - 1
arch/x86/xen/mmu.c

@@ -1288,6 +1288,25 @@ unsigned long xen_read_cr2_direct(void)
 	return this_cpu_read(xen_vcpu_info.arch.cr2);
 }
 
+void xen_flush_tlb_all(void)
+{
+	struct mmuext_op *op;
+	struct multicall_space mcs;
+
+	trace_xen_mmu_flush_tlb_all(0);
+
+	preempt_disable();
+
+	mcs = xen_mc_entry(sizeof(*op));
+
+	op = mcs.args;
+	op->cmd = MMUEXT_TLB_FLUSH_ALL;
+	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+
+	xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+	preempt_enable();
+}
 static void xen_flush_tlb(void)
 {
 	struct mmuext_op *op;
@@ -2518,7 +2537,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 	err = 0;
 out:
 
-	flush_tlb_all();
+	xen_flush_tlb_all();
 
 	return err;
 }

+ 2 - 0
arch/xtensa/Kconfig

@@ -13,6 +13,8 @@ config XTENSA
 	select GENERIC_CPU_DEVICES
 	select MODULES_USE_ELF_RELA
 	select GENERIC_PCI_IOMAP
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica

+ 4 - 0
arch/xtensa/include/asm/io.h

@@ -62,6 +62,10 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
 static inline void iounmap(volatile void __iomem *addr)
 {
 }
+
+#define virt_to_bus     virt_to_phys
+#define bus_to_virt     phys_to_virt
+
 #endif /* CONFIG_MMU */
 
 /*

+ 1 - 3
arch/xtensa/include/asm/processor.h

@@ -152,6 +152,7 @@ struct thread_struct {
 
 /* Clearing a0 terminates the backtrace. */
 #define start_thread(regs, new_pc, new_sp) \
+	memset(regs, 0, sizeof(*regs)); \
 	regs->pc = new_pc; \
 	regs->ps = USER_PS_VALUE; \
 	regs->areg[1] = new_sp; \
@@ -168,9 +169,6 @@ struct mm_struct;
 /* Free all resources held by a thread. */
 #define release_thread(thread) do { } while(0)
 
-/* Create a kernel thread without removing it from tasklists */
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 /* Copy and release all segment info associated with a VM */
 #define copy_segments(p, mm)	do { } while(0)
 #define release_segments(mm)	do { } while(0)

+ 1 - 1
arch/xtensa/include/asm/syscall.h

@@ -10,7 +10,7 @@
 
 struct pt_regs;
 struct sigaction;
-asmlinkage long xtensa_execve(char*, char**, char**, struct pt_regs*);
+asmlinkage long sys_execve(char*, char**, char**, struct pt_regs*);
 asmlinkage long xtensa_clone(unsigned long, unsigned long, struct pt_regs*);
 asmlinkage long xtensa_ptrace(long, long, long, long);
 asmlinkage long xtensa_sigreturn(struct pt_regs*);

+ 5 - 10
arch/xtensa/include/asm/unistd.h

@@ -1,16 +1,9 @@
-/*
- * include/asm-xtensa/unistd.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
+#ifndef _XTENSA_UNISTD_H
+#define _XTENSA_UNISTD_H
 
+#define __ARCH_WANT_SYS_EXECVE
 #include <uapi/asm/unistd.h>
 
-
 /*
  * "Conditional" syscalls
  *
@@ -37,3 +30,5 @@
 #define __IGNORE_mmap				/* use mmap2 */
 #define __IGNORE_vfork				/* use clone */
 #define __IGNORE_fadvise64			/* use fadvise64_64 */
+
+#endif /* _XTENSA_UNISTD_H */

+ 4 - 12
arch/xtensa/include/uapi/asm/unistd.h

@@ -1,14 +1,4 @@
-/*
- * include/asm-xtensa/unistd.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2012 Tensilica Inc.
- */
-
-#ifndef _UAPI_XTENSA_UNISTD_H
+#if !defined(_UAPI_XTENSA_UNISTD_H) || defined(__SYSCALL)
 #define _UAPI_XTENSA_UNISTD_H
 
 #ifndef __SYSCALL
@@ -272,7 +262,7 @@ __SYSCALL(115, sys_sendmmsg, 4)
 #define __NR_clone 				116
 __SYSCALL(116, xtensa_clone, 5)
 #define __NR_execve 				117
-__SYSCALL(117, xtensa_execve, 3)
+__SYSCALL(117, sys_execve, 3)
 #define __NR_exit 				118
 __SYSCALL(118, sys_exit, 1)
 #define __NR_exit_group 			119
@@ -759,4 +749,6 @@ __SYSCALL(331, sys_kcmp, 5)
 
 #define SYS_XTENSA_COUNT                  5     /* count */
 
+#undef __SYSCALL
+
 #endif /* _UAPI_XTENSA_UNISTD_H */

+ 13 - 44
arch/xtensa/kernel/entry.S

@@ -1832,50 +1832,6 @@ ENTRY(system_call)
 	retw
 
 
-/*
- * Create a kernel thread
- *
- * int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
- * a2                    a2                 a3             a4
- */
-
-ENTRY(kernel_thread)
-	entry	a1, 16
-
-	mov	a5, a2			# preserve fn over syscall
-	mov	a7, a3			# preserve args over syscall
-
-	movi	a3, _CLONE_VM | _CLONE_UNTRACED
-	movi	a2, __NR_clone
-	or	a6, a4, a3		# arg0: flags
-	mov	a3, a1			# arg1: sp
-	syscall
-
-	beq	a3, a1, 1f		# branch if parent
-	mov	a6, a7			# args
-	callx4	a5			# fn(args)
-
-	movi	a2, __NR_exit
-	syscall				# return value of fn(args) still in a6
-
-1:	retw
-
-/*
- * Do a system call from kernel instead of calling sys_execve, so we end up
- * with proper pt_regs.
- *
- * int kernel_execve(const char *fname, char *const argv[], charg *const envp[])
- * a2                        a2               a3                  a4
- */
-
-ENTRY(kernel_execve)
-	entry	a1, 16
-	mov	a6, a2			# arg0 is in a6
-	movi	a2, __NR_execve
-	syscall
-
-	retw
-
 /*
  * Task switch.
  *
@@ -1958,3 +1914,16 @@ ENTRY(ret_from_fork)
 
 	j	common_exception_return
 
+/*
+ * Kernel thread creation helper
+ * On entry, set up by copy_thread: a2 = thread_fn, a3 = thread_fn arg
+ *           left from _switch_to: a6 = prev
+ */
+ENTRY(ret_from_kernel_thread)
+
+	call4	schedule_tail
+	mov	a6, a3
+	callx4	a2
+	j	common_exception_return
+
+ENDPROC(ret_from_kernel_thread)

+ 71 - 57
arch/xtensa/kernel/process.c

@@ -45,6 +45,7 @@
 #include <asm/regs.h>
 
 extern void ret_from_fork(void);
+extern void ret_from_kernel_thread(void);
 
 struct task_struct *current_set[NR_CPUS] = {&init_task, };
 
@@ -158,18 +159,30 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 /*
  * Copy thread.
  *
+ * There are two modes in which this function is called:
+ * 1) Userspace thread creation,
+ *    regs != NULL, usp_thread_fn is userspace stack pointer.
+ *    It is expected to copy parent regs (in case CLONE_VM is not set
+ *    in the clone_flags) and set up passed usp in the childregs.
+ * 2) Kernel thread creation,
+ *    regs == NULL, usp_thread_fn is the function to run in the new thread
+ *    and thread_fn_arg is its parameter.
+ *    childregs are not used for the kernel threads.
+ *
  * The stack layout for the new thread looks like this:
  *
- *	+------------------------+ <- sp in childregs (= tos)
+ *	+------------------------+
  *	|       childregs        |
  *	+------------------------+ <- thread.sp = sp in dummy-frame
  *	|      dummy-frame       |    (saved in dummy-frame spill-area)
  *	+------------------------+
  *
- * We create a dummy frame to return to ret_from_fork:
- *   a0 points to ret_from_fork (simulating a call4)
+ * We create a dummy frame to return to either ret_from_fork or
+ *   ret_from_kernel_thread:
+ *   a0 points to ret_from_fork/ret_from_kernel_thread (simulating a call4)
  *   sp points to itself (thread.sp)
- *   a2, a3 are unused.
+ *   a2, a3 are unused for userspace threads,
+ *   a2 points to thread_fn, a3 holds thread_fn arg for kernel threads.
  *
  * Note: This is a pristine frame, so we don't need any spill region on top of
  *       childregs.
@@ -185,43 +198,63 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
  * involved.  Much simpler to just not copy those live frames across.
  */
 
-int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long unused,
-                struct task_struct * p, struct pt_regs * regs)
+int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
+		unsigned long thread_fn_arg,
+		struct task_struct *p, struct pt_regs *unused)
 {
-	struct pt_regs *childregs;
-	unsigned long tos;
-	int user_mode = user_mode(regs);
+	struct pt_regs *childregs = task_pt_regs(p);
 
 #if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
 	struct thread_info *ti;
 #endif
 
-	/* Set up new TSS. */
-	tos = (unsigned long)task_stack_page(p) + THREAD_SIZE;
-	if (user_mode)
-		childregs = (struct pt_regs*)(tos - PT_USER_SIZE);
-	else
-		childregs = (struct pt_regs*)tos - 1;
-
-	/* This does not copy all the regs.  In a bout of brilliance or madness,
-	   ARs beyond a0-a15 exist past the end of the struct. */
-	*childregs = *regs;
-
 	/* Create a call4 dummy-frame: a0 = 0, a1 = childregs. */
 	*((int*)childregs - 3) = (unsigned long)childregs;
 	*((int*)childregs - 4) = 0;
 
-	childregs->areg[2] = 0;
-	p->set_child_tid = p->clear_child_tid = NULL;
-	p->thread.ra = MAKE_RA_FOR_CALL((unsigned long)ret_from_fork, 0x1);
 	p->thread.sp = (unsigned long)childregs;
 
-	if (user_mode(regs)) {
+	if (!(p->flags & PF_KTHREAD)) {
+		struct pt_regs *regs = current_pt_regs();
+		unsigned long usp = usp_thread_fn ?
+			usp_thread_fn : regs->areg[1];
 
+		p->thread.ra = MAKE_RA_FOR_CALL(
+				(unsigned long)ret_from_fork, 0x1);
+
+		/* This does not copy all the regs.
+		 * In a bout of brilliance or madness,
+		 * ARs beyond a0-a15 exist past the end of the struct.
+		 */
+		*childregs = *regs;
 		childregs->areg[1] = usp;
+		childregs->areg[2] = 0;
+
+		/* When sharing memory with the parent thread, the child
+		   usually starts on a pristine stack, so we have to reset
+		   windowbase, windowstart and wmask.
+		   (Note that such a new thread is required to always create
+		   an initial call4 frame)
+		   The exception is vfork, where the new thread continues to
+		   run on the parent's stack until it calls execve. This could
+		   be a call8 or call12, which requires a legal stack frame
+		   of the previous caller for the overflow handlers to work.
+		   (Note that it's always legal to overflow live registers).
+		   In this case, ensure to spill at least the stack pointer
+		   of that frame. */
+
 		if (clone_flags & CLONE_VM) {
-			childregs->wmask = 1;	/* can't share live windows */
+			/* check that caller window is live and same stack */
+			int len = childregs->wmask & ~0xf;
+			if (regs->areg[1] == usp && len != 0) {
+				int callinc = (regs->areg[0] >> 30) & 3;
+				int caller_ars = XCHAL_NUM_AREGS - callinc * 4;
+				put_user(regs->areg[caller_ars+1],
+					 (unsigned __user*)(usp - 12));
+			}
+			childregs->wmask = 1;
+			childregs->windowstart = 1;
+			childregs->windowbase = 0;
 		} else {
 			int len = childregs->wmask & ~0xf;
 			memcpy(&childregs->areg[XCHAL_NUM_AREGS - len/4],
@@ -230,11 +263,19 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 // FIXME: we need to set THREADPTR in thread_info...
 		if (clone_flags & CLONE_SETTLS)
 			childregs->areg[2] = childregs->areg[6];
-
 	} else {
-		/* In kernel space, we start a new thread with a new stack. */
-		childregs->wmask = 1;
-		childregs->areg[1] = tos;
+		p->thread.ra = MAKE_RA_FOR_CALL(
+				(unsigned long)ret_from_kernel_thread, 1);
+
+		/* pass parameters to ret_from_kernel_thread:
+		 * a2 = thread_fn, a3 = thread_fn arg
+		 */
+		*((int *)childregs - 1) = thread_fn_arg;
+		*((int *)childregs - 2) = usp_thread_fn;
+
+		/* Childregs are only used when we're going to userspace
+		 * in which case start_thread will set them up.
+		 */
 	}
 
 #if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
@@ -330,32 +371,5 @@ long xtensa_clone(unsigned long clone_flags, unsigned long newsp,
                   void __user *child_tid, long a5,
                   struct pt_regs *regs)
 {
-        if (!newsp)
-                newsp = regs->areg[1];
         return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
-
-/*
- * xtensa_execve() executes a new program.
- */
-
-asmlinkage
-long xtensa_execve(const char __user *name,
-		   const char __user *const __user *argv,
-                   const char __user *const __user *envp,
-                   long a3, long a4, long a5,
-                   struct pt_regs *regs)
-{
-	long error;
-	struct filename *filename;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename->name, argv, envp, regs);
-	putname(filename);
-out:
-	return error;
-}
-

+ 3 - 4
arch/xtensa/kernel/syscall.c

@@ -32,10 +32,8 @@ typedef void (*syscall_t)(void);
 syscall_t sys_call_table[__NR_syscall_count] /* FIXME __cacheline_aligned */= {
 	[0 ... __NR_syscall_count - 1] = (syscall_t)&sys_ni_syscall,
 
-#undef __SYSCALL
 #define __SYSCALL(nr,symbol,nargs) [ nr ] = (syscall_t)symbol,
-#undef  __KERNEL_SYSCALLS__
-#include <asm/unistd.h>
+#include <uapi/asm/unistd.h>
 };
 
 asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg)
@@ -49,7 +47,8 @@ asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg)
 	return (long)ret;
 }
 
-asmlinkage long xtensa_fadvise64_64(int fd, int advice, unsigned long long offset, unsigned long long len)
+asmlinkage long xtensa_fadvise64_64(int fd, int advice,
+		unsigned long long offset, unsigned long long len)
 {
 	return sys_fadvise64_64(fd, offset, len, advice);
 }

+ 0 - 1
arch/xtensa/kernel/xtensa_ksyms.c

@@ -43,7 +43,6 @@ EXPORT_SYMBOL(__strncpy_user);
 EXPORT_SYMBOL(clear_page);
 EXPORT_SYMBOL(copy_page);
 
-EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(empty_zero_page);
 
 /*

+ 1 - 1
block/Kconfig

@@ -89,7 +89,7 @@ config BLK_DEV_INTEGRITY
 
 config BLK_DEV_THROTTLING
 	bool "Block layer bio throttling support"
-	depends on BLK_CGROUP=y && EXPERIMENTAL
+	depends on BLK_CGROUP=y
 	default n
 	---help---
 	Block layer bio throttling support. It can be used to limit

+ 10 - 0
block/blk-cgroup.c

@@ -285,6 +285,13 @@ static void blkg_destroy_all(struct request_queue *q)
 		blkg_destroy(blkg);
 		spin_unlock(&blkcg->lock);
 	}
+
+	/*
+	 * root blkg is destroyed.  Just clear the pointer since
+	 * root_rl does not take reference on root blkg.
+	 */
+	q->root_blkg = NULL;
+	q->root_rl.blkg = NULL;
 }
 
 static void blkg_rcu_free(struct rcu_head *rcu_head)
@@ -326,6 +333,9 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl,
 	 */
 	if (rl == &q->root_rl) {
 		ent = &q->blkg_list;
+		/* There are no more block groups, hence no request lists */
+		if (list_empty(ent))
+			return NULL;
 	} else {
 		blkg = container_of(rl, struct blkcg_gq, rl);
 		ent = &blkg->q_node;

+ 2 - 1
block/blk-core.c

@@ -2868,7 +2868,8 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
 	struct request *rqa = container_of(a, struct request, queuelist);
 	struct request *rqb = container_of(b, struct request, queuelist);
 
-	return !(rqa->q <= rqb->q);
+	return !(rqa->q < rqb->q ||
+		(rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb)));
 }
 
 /*

+ 8 - 3
crypto/cryptd.c

@@ -137,13 +137,18 @@ static void cryptd_queue_worker(struct work_struct *work)
 	struct crypto_async_request *req, *backlog;
 
 	cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
-	/* Only handle one request at a time to avoid hogging crypto
-	 * workqueue. preempt_disable/enable is used to prevent
-	 * being preempted by cryptd_enqueue_request() */
+	/*
+	 * Only handle one request at a time to avoid hogging crypto workqueue.
+	 * preempt_disable/enable is used to prevent being preempted by
+	 * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
+	 * cryptd_enqueue_request() being accessed from software interrupts.
+	 */
+	local_bh_disable();
 	preempt_disable();
 	backlog = crypto_get_backlog(&cpu_queue->queue);
 	req = crypto_dequeue_request(&cpu_queue->queue);
 	preempt_enable();
+	local_bh_enable();
 
 	if (!req)
 		return;

+ 7 - 4
drivers/acpi/video.c

@@ -1345,12 +1345,15 @@ static int
 acpi_video_bus_get_devices(struct acpi_video_bus *video,
 			   struct acpi_device *device)
 {
-	int status;
+	int status = 0;
 	struct acpi_device *dev;
 
-	status = acpi_video_device_enumerate(video);
-	if (status)
-		return status;
+	/*
+	 * There are systems where video module known to work fine regardless
+	 * of broken _DOD and ignoring returned value here doesn't cause
+	 * any issues later.
+	 */
+	acpi_video_device_enumerate(video);
 
 	list_for_each_entry(dev, &device->children, node) {
 

+ 7 - 0
drivers/base/platform.c

@@ -83,9 +83,16 @@ EXPORT_SYMBOL_GPL(platform_get_resource);
  */
 int platform_get_irq(struct platform_device *dev, unsigned int num)
 {
+#ifdef CONFIG_SPARC
+	/* sparc does not have irqs represented as IORESOURCE_IRQ resources */
+	if (!dev || num >= dev->archdata.num_irqs)
+		return -ENXIO;
+	return dev->archdata.irqs[num];
+#else
 	struct resource *r = platform_get_resource(dev, IORESOURCE_IRQ, num);
 
 	return r ? r->start : -ENXIO;
+#endif
 }
 EXPORT_SYMBOL_GPL(platform_get_irq);
 

+ 8 - 7
drivers/block/Kconfig

@@ -131,6 +131,7 @@ config BLK_CPQ_DA
 config BLK_CPQ_CISS_DA
 	tristate "Compaq Smart Array 5xxx support"
 	depends on PCI
+	select CHECK_SIGNATURE
 	help
 	  This is the driver for Compaq Smart Array 5xxx controllers.
 	  Everyone using these boards should say Y here.
@@ -166,8 +167,8 @@ config BLK_DEV_DAC960
 	  module will be called DAC960.
 
 config BLK_DEV_UMEM
-	tristate "Micro Memory MM5415 Battery Backed RAM support (EXPERIMENTAL)"
-	depends on PCI && EXPERIMENTAL
+	tristate "Micro Memory MM5415 Battery Backed RAM support"
+	depends on PCI
 	---help---
 	  Saying Y here will include support for the MM5415 family of
 	  battery backed (Non-volatile) RAM cards.
@@ -430,8 +431,8 @@ config CDROM_PKTCDVD_BUFFERS
 	  a disc is opened for writing.
 
 config CDROM_PKTCDVD_WCACHE
-	bool "Enable write caching (EXPERIMENTAL)"
-	depends on CDROM_PKTCDVD && EXPERIMENTAL
+	bool "Enable write caching"
+	depends on CDROM_PKTCDVD
 	help
 	  If enabled, write caching will be set for the CD-R/W device. For now
 	  this option is dangerous unless the CD-RW media is known good, as we
@@ -508,8 +509,8 @@ config XEN_BLKDEV_BACKEND
 
 
 config VIRTIO_BLK
-	tristate "Virtio block driver (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && VIRTIO
+	tristate "Virtio block driver"
+	depends on VIRTIO
 	---help---
 	  This is the virtual block driver for virtio.  It can be used with
           lguest or QEMU based VMMs (like KVM or Xen).  Say Y or M.
@@ -528,7 +529,7 @@ config BLK_DEV_HD
 
 config BLK_DEV_RBD
 	tristate "Rados block device (RBD)"
-	depends on INET && EXPERIMENTAL && BLOCK
+	depends on INET && BLOCK
 	select CEPH_LIB
 	select LIBCRC32C
 	select CRYPTO_AES

Some files were not shown because too many files changed in this diff