Jelajahi Sumber

Merge branch 'devel-stable' of http://ftp.arm.linux.org.uk/pub/linux/arm/kernel/git-cur/linux-2.6-arm

* 'devel-stable' of http://ftp.arm.linux.org.uk/pub/linux/arm/kernel/git-cur/linux-2.6-arm: (178 commits)
  ARM: 7139/1: fix compilation with CONFIG_ARM_ATAG_DTB_COMPAT and large TEXT_OFFSET
  ARM: gic, local timers: use the request_percpu_irq() interface
  ARM: gic: consolidate PPI handling
  ARM: switch from NO_MACH_MEMORY_H to NEED_MACH_MEMORY_H
  ARM: mach-s5p64x0: remove mach/memory.h
  ARM: mach-s3c64xx: remove mach/memory.h
  ARM: plat-mxc: remove mach/memory.h
  ARM: mach-prima2: remove mach/memory.h
  ARM: mach-zynq: remove mach/memory.h
  ARM: mach-bcmring: remove mach/memory.h
  ARM: mach-davinci: remove mach/memory.h
  ARM: mach-pxa: remove mach/memory.h
  ARM: mach-ixp4xx: remove mach/memory.h
  ARM: mach-h720x: remove mach/memory.h
  ARM: mach-vt8500: remove mach/memory.h
  ARM: mach-s5pc100: remove mach/memory.h
  ARM: mach-tegra: remove mach/memory.h
  ARM: plat-tcc: remove mach/memory.h
  ARM: mach-mmp: remove mach/memory.h
  ARM: mach-cns3xxx: remove mach/memory.h
  ...

Fix up mostly pretty trivial conflicts in:
 - arch/arm/Kconfig
 - arch/arm/include/asm/localtimer.h
 - arch/arm/kernel/Makefile
 - arch/arm/mach-shmobile/board-ap4evb.c
 - arch/arm/mach-u300/core.c
 - arch/arm/mm/dma-mapping.c
 - arch/arm/mm/proc-v7.S
 - arch/arm/plat-omap/Kconfig
largely due to some CONFIG option renaming (ie CONFIG_PM_SLEEP ->
CONFIG_ARM_CPU_SUSPEND for the arm-specific suspend code etc) and
addition of NEED_MACH_MEMORY_H next to HAVE_IDE.
Linus Torvalds 13 tahun lalu
induk
melakukan
1fdb24e969
100 mengubah file dengan 6464 tambahan dan 1249 penghapusan
  1. 63 0
      arch/arm/Kconfig
  2. 6 0
      arch/arm/Kconfig.debug
  3. 9 0
      arch/arm/boot/compressed/.gitignore
  4. 28 4
      arch/arm/boot/compressed/Makefile
  5. 97 0
      arch/arm/boot/compressed/atags_to_fdt.c
  6. 115 7
      arch/arm/boot/compressed/head.S
  7. 15 0
      arch/arm/boot/compressed/libfdt_env.h
  8. 1 41
      arch/arm/boot/compressed/misc.c
  9. 127 0
      arch/arm/boot/compressed/string.c
  10. 4 0
      arch/arm/boot/compressed/vmlinux.lds.in
  11. 220 11
      arch/arm/common/gic.c
  12. 7 0
      arch/arm/include/asm/dma-mapping.h
  13. 0 7
      arch/arm/include/asm/entry-macro-multi.S
  14. 0 3
      arch/arm/include/asm/hardirq.h
  15. 2 17
      arch/arm/include/asm/hardware/entry-macro-gic.S
  16. 8 1
      arch/arm/include/asm/hardware/gic.h
  17. 2 0
      arch/arm/include/asm/hw_breakpoint.h
  18. 8 14
      arch/arm/include/asm/localtimer.h
  19. 1 1
      arch/arm/include/asm/mach/arch.h
  20. 1 0
      arch/arm/include/asm/mach/map.h
  21. 8 10
      arch/arm/include/asm/memory.h
  22. 3 0
      arch/arm/include/asm/pgtable.h
  23. 74 19
      arch/arm/include/asm/pmu.h
  24. 8 0
      arch/arm/include/asm/proc-fns.h
  25. 0 5
      arch/arm/include/asm/smp.h
  26. 1 1
      arch/arm/include/asm/smp_twd.h
  27. 1 16
      arch/arm/include/asm/suspend.h
  28. 8 1
      arch/arm/kernel/Makefile
  29. 2 2
      arch/arm/kernel/debug.S
  30. 2 2
      arch/arm/kernel/head.S
  31. 170 105
      arch/arm/kernel/hw_breakpoint.c
  32. 0 3
      arch/arm/kernel/irq.c
  33. 4 0
      arch/arm/kernel/kprobes-arm.c
  34. 1323 0
      arch/arm/kernel/kprobes-test-arm.c
  35. 1187 0
      arch/arm/kernel/kprobes-test-thumb.c
  36. 1748 0
      arch/arm/kernel/kprobes-test.c
  37. 392 0
      arch/arm/kernel/kprobes-test.h
  38. 7 0
      arch/arm/kernel/kprobes-thumb.c
  39. 8 0
      arch/arm/kernel/kprobes.h
  40. 254 221
      arch/arm/kernel/perf_event.c
  41. 59 28
      arch/arm/kernel/perf_event_v6.c
  42. 196 199
      arch/arm/kernel/perf_event_v7.c
  43. 47 43
      arch/arm/kernel/perf_event_xscale.c
  44. 8 174
      arch/arm/kernel/pmu.c
  45. 2 19
      arch/arm/kernel/setup.c
  46. 27 58
      arch/arm/kernel/sleep.S
  47. 1 37
      arch/arm/kernel/smp.c
  48. 45 2
      arch/arm/kernel/smp_twd.c
  49. 72 0
      arch/arm/kernel/suspend.c
  50. 2 0
      arch/arm/mach-at91/at91sam9g45.c
  51. 0 2
      arch/arm/mach-at91/include/mach/at91sam9g45.h
  52. 1 1
      arch/arm/mach-at91/include/mach/debug-macro.S
  53. 1 2
      arch/arm/mach-bcmring/include/mach/hardware.h
  54. 0 33
      arch/arm/mach-bcmring/include/mach/memory.h
  55. 3 0
      arch/arm/mach-bcmring/mm.c
  56. 1 1
      arch/arm/mach-clps711x/autcpu12.c
  57. 1 1
      arch/arm/mach-clps711x/cdb89712.c
  58. 1 1
      arch/arm/mach-clps711x/ceiva.c
  59. 1 1
      arch/arm/mach-clps711x/clep7312.c
  60. 1 1
      arch/arm/mach-clps711x/edb7211-arch.c
  61. 0 1
      arch/arm/mach-clps711x/fortunet.c
  62. 1 1
      arch/arm/mach-clps711x/include/mach/debug-macro.S
  63. 1 1
      arch/arm/mach-clps711x/p720t.c
  64. 1 1
      arch/arm/mach-cns3xxx/cns3420vb.c
  65. 1 1
      arch/arm/mach-cns3xxx/include/mach/debug-macro.S
  66. 0 26
      arch/arm/mach-cns3xxx/include/mach/memory.h
  67. 1 1
      arch/arm/mach-davinci/board-da830-evm.c
  68. 1 1
      arch/arm/mach-davinci/board-da850-evm.c
  69. 1 1
      arch/arm/mach-davinci/board-dm355-evm.c
  70. 1 1
      arch/arm/mach-davinci/board-dm355-leopard.c
  71. 1 1
      arch/arm/mach-davinci/board-dm365-evm.c
  72. 1 1
      arch/arm/mach-davinci/board-dm644x-evm.c
  73. 2 2
      arch/arm/mach-davinci/board-dm646x-evm.c
  74. 1 1
      arch/arm/mach-davinci/board-mityomapl138.c
  75. 1 1
      arch/arm/mach-davinci/board-neuros-osd2.c
  76. 1 1
      arch/arm/mach-davinci/board-omapl138-hawk.c
  77. 1 1
      arch/arm/mach-davinci/board-sffsdr.c
  78. 1 1
      arch/arm/mach-davinci/board-tnetv107x-evm.c
  79. 3 0
      arch/arm/mach-davinci/common.c
  80. 1 1
      arch/arm/mach-davinci/cpuidle.c
  81. 4 0
      arch/arm/mach-davinci/include/mach/ddr2.h
  82. 23 29
      arch/arm/mach-davinci/include/mach/debug-macro.S
  83. 0 44
      arch/arm/mach-davinci/include/mach/memory.h
  84. 2 1
      arch/arm/mach-davinci/include/mach/serial.h
  85. 6 1
      arch/arm/mach-davinci/include/mach/uncompress.h
  86. 1 1
      arch/arm/mach-davinci/sleep.S
  87. 1 1
      arch/arm/mach-dove/cm-a510.c
  88. 1 1
      arch/arm/mach-dove/dove-db-setup.c
  89. 1 1
      arch/arm/mach-dove/include/mach/debug-macro.S
  90. 0 10
      arch/arm/mach-dove/include/mach/memory.h
  91. 1 1
      arch/arm/mach-ebsa110/core.c
  92. 1 1
      arch/arm/mach-ebsa110/include/mach/debug-macro.S
  93. 1 1
      arch/arm/mach-ep93xx/adssphere.c
  94. 8 8
      arch/arm/mach-ep93xx/edb93xx.c
  95. 1 1
      arch/arm/mach-ep93xx/gesbc9312.c
  96. 1 1
      arch/arm/mach-ep93xx/include/mach/debug-macro.S
  97. 4 4
      arch/arm/mach-ep93xx/micro9.c
  98. 2 2
      arch/arm/mach-ep93xx/simone.c
  99. 1 1
      arch/arm/mach-ep93xx/snappercl15.c
  100. 1 1
      arch/arm/mach-ep93xx/ts72xx.c

+ 63 - 0
arch/arm/Kconfig

@@ -29,6 +29,7 @@ config ARM
 	select HAVE_GENERIC_HARDIRQS
 	select HAVE_SPARSE_IRQ
 	select GENERIC_IRQ_SHOW
+	select CPU_PM if (SUSPEND || CPU_IDLE)
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
@@ -211,6 +212,19 @@ config ARM_PATCH_PHYS_VIRT
 	  this feature (eg, building a kernel for a single machine) and
 	  you need to shrink the kernel to the minimal size.
 
+config NEED_MACH_MEMORY_H
+	bool
+	help
+	  Select this when mach/memory.h is required to provide special
+	  definitions for this platform.  The need for mach/memory.h should
+	  be avoided when possible.
+
+config PHYS_OFFSET
+	hex "Physical address of main memory"
+	depends on !ARM_PATCH_PHYS_VIRT && !NEED_MACH_MEMORY_H
+	help
+	  Please provide the physical address corresponding to the
+	  location of main memory in your system.
 
 config GENERIC_BUG
 	def_bool y
@@ -247,6 +261,7 @@ config ARCH_INTEGRATOR
 	select GENERIC_CLOCKEVENTS
 	select PLAT_VERSATILE
 	select PLAT_VERSATILE_FPGA_IRQ
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for ARM's Integrator platform.
 
@@ -262,6 +277,7 @@ config ARCH_REALVIEW
 	select PLAT_VERSATILE_CLCD
 	select ARM_TIMER_SP804
 	select GPIO_PL061 if GPIOLIB
+	select NEED_MACH_MEMORY_H
 	help
 	  This enables support for ARM Ltd RealView boards.
 
@@ -322,6 +338,7 @@ config ARCH_CLPS711X
 	bool "Cirrus Logic CLPS711x/EP721x-based"
 	select CPU_ARM720T
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for Cirrus Logic 711x/721x based boards.
 
@@ -361,6 +378,7 @@ config ARCH_EBSA110
 	select ISA
 	select NO_IOPORT
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MACH_MEMORY_H
 	help
 	  This is an evaluation board for the StrongARM processor available
 	  from Digital. It has limited hardware on-board, including an
@@ -376,6 +394,7 @@ config ARCH_EP93XX
 	select ARCH_REQUIRE_GPIOLIB
 	select ARCH_HAS_HOLES_MEMORYMODEL
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MEMORY_H
 	help
 	  This enables support for the Cirrus EP93xx series of CPUs.
 
@@ -385,6 +404,7 @@ config ARCH_FOOTBRIDGE
 	select FOOTBRIDGE
 	select GENERIC_CLOCKEVENTS
 	select HAVE_IDE
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for systems based on the DC21285 companion chip
 	  ("FootBridge"), such as the Simtec CATS and the Rebel NetWinder.
@@ -434,6 +454,7 @@ config ARCH_IOP13XX
 	select PCI
 	select ARCH_SUPPORTS_MSI
 	select VMSPLIT_1G
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for Intel's IOP13XX (XScale) family of processors.
 
@@ -464,6 +485,7 @@ config ARCH_IXP23XX
 	select CPU_XSC3
  	select PCI
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for Intel's IXP23xx (XScale) family of processors.
 
@@ -473,6 +495,7 @@ config ARCH_IXP2000
 	select CPU_XSCALE
 	select PCI
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for Intel's IXP2400/2800 (XScale) family of processors.
 
@@ -565,6 +588,7 @@ config ARCH_KS8695
 	select CPU_ARM922T
 	select ARCH_REQUIRE_GPIOLIB
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for Micrel/Kendin KS8695 "Centaur" (ARM922T) based
 	  System-on-Chip devices.
@@ -657,6 +681,7 @@ config ARCH_SHMOBILE
 	select SPARSE_IRQ
 	select MULTI_IRQ_HANDLER
 	select PM_GENERIC_DOMAINS if PM
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for Renesas's SH-Mobile and R-Mobile ARM platforms.
 
@@ -672,6 +697,7 @@ config ARCH_RPC
 	select ARCH_SPARSEMEM_ENABLE
 	select ARCH_USES_GETTIMEOFFSET
 	select HAVE_IDE
+	select NEED_MACH_MEMORY_H
 	help
 	  On the Acorn Risc-PC, Linux can support the internal IDE disk and
 	  CD-ROM interface, serial and parallel port, and the floppy drive.
@@ -691,6 +717,7 @@ config ARCH_SA1100
 	select TICK_ONESHOT
 	select ARCH_REQUIRE_GPIOLIB
 	select HAVE_IDE
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for StrongARM 11x0 based boards.
 
@@ -782,6 +809,7 @@ config ARCH_S5PV210
 	select HAVE_S3C2410_I2C if I2C
 	select HAVE_S3C_RTC if RTC_CLASS
 	select HAVE_S3C2410_WATCHDOG if WATCHDOG
+	select NEED_MACH_MEMORY_H
 	help
 	  Samsung S5PV210/S5PC110 series based systems
 
@@ -798,6 +826,7 @@ config ARCH_EXYNOS4
 	select HAVE_S3C_RTC if RTC_CLASS
 	select HAVE_S3C2410_I2C if I2C
 	select HAVE_S3C2410_WATCHDOG if WATCHDOG
+	select NEED_MACH_MEMORY_H
 	help
 	  Samsung EXYNOS4 series based systems
 
@@ -809,6 +838,7 @@ config ARCH_SHARK
 	select ZONE_DMA
 	select PCI
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for the StrongARM based Digital DNARD machine, also known
 	  as "Shark" (<http://www.shark-linux.de/shark.html>).
@@ -837,6 +867,7 @@ config ARCH_U300
 	select HAVE_MACH_CLKDEV
 	select GENERIC_GPIO
 	select ARCH_REQUIRE_GPIOLIB
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for ST-Ericsson U300 series mobile platforms.
 
@@ -1835,6 +1866,38 @@ config ZBOOT_ROM_SH_MOBILE_SDHI
 
 endchoice
 
+config ARM_APPENDED_DTB
+	bool "Use appended device tree blob to zImage (EXPERIMENTAL)"
+	depends on OF && !ZBOOT_ROM && EXPERIMENTAL
+	help
+	  With this option, the boot code will look for a device tree binary
+	  (DTB) appended to zImage
+	  (e.g. cat zImage <filename>.dtb > zImage_w_dtb).
+
+	  This is meant as a backward compatibility convenience for those
+	  systems with a bootloader that can't be upgraded to accommodate
+	  the documented boot protocol using a device tree.
+
+	  Beware that there is very little in terms of protection against
+	  this option being confused by leftover garbage in memory that might
+	  look like a DTB header after a reboot if no actual DTB is appended
+	  to zImage.  Do not leave this option active in a production kernel
+	  if you don't intend to always append a DTB.  Proper passing of the
+	  location into r2 of a bootloader provided DTB is always preferable
+	  to this option.
+
+config ARM_ATAG_DTB_COMPAT
+	bool "Supplement the appended DTB with traditional ATAG information"
+	depends on ARM_APPENDED_DTB
+	help
+	  Some old bootloaders can't be updated to a DTB capable one, yet
+	  they provide ATAGs with memory configuration, the ramdisk address,
+	  the kernel cmdline string, etc.  Such information is dynamically
+	  provided by the bootloader and can't always be stored in a static
+	  DTB.  To allow a device tree enabled kernel to be used with such
+	  bootloaders, this option allows zImage to extract the information
+	  from the ATAG list and store it at run time into the appended DTB.
+
 config CMDLINE
 	string "Default kernel command string"
 	default ""

+ 6 - 0
arch/arm/Kconfig.debug

@@ -158,4 +158,10 @@ config DEBUG_S3C_UART
 	  The uncompressor code port configuration is now handled
 	  by CONFIG_S3C_LOWLEVEL_UART_PORT.
 
+config ARM_KPROBES_TEST
+	tristate "Kprobes test module"
+	depends on KPROBES && MODULES
+	help
+	  Perform tests of kprobes API and instruction set simulation.
+
 endmenu

+ 9 - 0
arch/arm/boot/compressed/.gitignore

@@ -5,3 +5,12 @@ piggy.lzo
 piggy.lzma
 vmlinux
 vmlinux.lds
+
+# borrowed libfdt files
+fdt.c
+fdt.h
+fdt_ro.c
+fdt_rw.c
+fdt_wip.c
+libfdt.h
+libfdt_internal.h

+ 28 - 4
arch/arm/boot/compressed/Makefile

@@ -26,6 +26,10 @@ HEAD	= head.o
 OBJS	+= misc.o decompress.o
 FONTC	= $(srctree)/drivers/video/console/font_acorn_8x8.c
 
+# string library code (-Os is enforced to keep it much smaller)
+OBJS		+= string.o
+CFLAGS_string.o	:= -Os
+
 #
 # Architecture dependencies
 #
@@ -89,21 +93,41 @@ suffix_$(CONFIG_KERNEL_GZIP) = gzip
 suffix_$(CONFIG_KERNEL_LZO)  = lzo
 suffix_$(CONFIG_KERNEL_LZMA) = lzma
 
+# Borrowed libfdt files for the ATAG compatibility mode
+
+libfdt		:= fdt_rw.c fdt_ro.c fdt_wip.c fdt.c
+libfdt_hdrs	:= fdt.h libfdt.h libfdt_internal.h
+
+libfdt_objs	:= $(addsuffix .o, $(basename $(libfdt)))
+
+$(addprefix $(obj)/,$(libfdt) $(libfdt_hdrs)): $(obj)/%: $(srctree)/scripts/dtc/libfdt/%
+	$(call cmd,shipped)
+
+$(addprefix $(obj)/,$(libfdt_objs) atags_to_fdt.o): \
+	$(addprefix $(obj)/,$(libfdt_hdrs))
+
+ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y)
+OBJS	+= $(libfdt_objs) atags_to_fdt.o
+endif
+
 targets       := vmlinux vmlinux.lds \
 		 piggy.$(suffix_y) piggy.$(suffix_y).o \
-		 font.o font.c head.o misc.o $(OBJS)
+		 lib1funcs.o lib1funcs.S font.o font.c head.o misc.o $(OBJS)
 
 # Make sure files are removed during clean
-extra-y       += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S
+extra-y       += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S $(libfdt) $(libfdt_hdrs)
 
 ifeq ($(CONFIG_FUNCTION_TRACER),y)
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
-ccflags-y := -fpic -fno-builtin
+ccflags-y := -fpic -fno-builtin -I$(obj)
 asflags-y := -Wa,-march=all
 
+# Supply kernel BSS size to the decompressor via a linker symbol.
+KBSS_SZ = $(shell size $(obj)/../../../../vmlinux | awk 'END{print $$3}')
+LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ)
 # Supply ZRELADDR to the decompressor via a linker symbol.
 ifneq ($(CONFIG_AUTO_ZRELADDR),y)
 LDFLAGS_vmlinux += --defsym zreladdr=$(ZRELADDR)
@@ -123,7 +147,7 @@ LDFLAGS_vmlinux += -T
 # For __aeabi_uidivmod
 lib1funcs = $(obj)/lib1funcs.o
 
-$(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S FORCE
+$(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S
 	$(call cmd,shipped)
 
 # We need to prevent any GOTOFF relocs being used with references

+ 97 - 0
arch/arm/boot/compressed/atags_to_fdt.c

@@ -0,0 +1,97 @@
+#include <asm/setup.h>
+#include <libfdt.h>
+
+static int node_offset(void *fdt, const char *node_path)
+{
+	int offset = fdt_path_offset(fdt, node_path);
+	if (offset == -FDT_ERR_NOTFOUND)
+		offset = fdt_add_subnode(fdt, 0, node_path);
+	return offset;
+}
+
+static int setprop(void *fdt, const char *node_path, const char *property,
+		   uint32_t *val_array, int size)
+{
+	int offset = node_offset(fdt, node_path);
+	if (offset < 0)
+		return offset;
+	return fdt_setprop(fdt, offset, property, val_array, size);
+}
+
+static int setprop_string(void *fdt, const char *node_path,
+			  const char *property, const char *string)
+{
+	int offset = node_offset(fdt, node_path);
+	if (offset < 0)
+		return offset;
+	return fdt_setprop_string(fdt, offset, property, string);
+}
+
+static int setprop_cell(void *fdt, const char *node_path,
+			const char *property, uint32_t val)
+{
+	int offset = node_offset(fdt, node_path);
+	if (offset < 0)
+		return offset;
+	return fdt_setprop_cell(fdt, offset, property, val);
+}
+
+/*
+ * Convert and fold provided ATAGs into the provided FDT.
+ *
+ * REturn values:
+ *    = 0 -> pretend success
+ *    = 1 -> bad ATAG (may retry with another possible ATAG pointer)
+ *    < 0 -> error from libfdt
+ */
+int atags_to_fdt(void *atag_list, void *fdt, int total_space)
+{
+	struct tag *atag = atag_list;
+	uint32_t mem_reg_property[2 * NR_BANKS];
+	int memcount = 0;
+	int ret;
+
+	/* make sure we've got an aligned pointer */
+	if ((u32)atag_list & 0x3)
+		return 1;
+
+	/* if we get a DTB here we're done already */
+	if (*(u32 *)atag_list == fdt32_to_cpu(FDT_MAGIC))
+	       return 0;
+
+	/* validate the ATAG */
+	if (atag->hdr.tag != ATAG_CORE ||
+	    (atag->hdr.size != tag_size(tag_core) &&
+	     atag->hdr.size != 2))
+		return 1;
+
+	/* let's give it all the room it could need */
+	ret = fdt_open_into(fdt, fdt, total_space);
+	if (ret < 0)
+		return ret;
+
+	for_each_tag(atag, atag_list) {
+		if (atag->hdr.tag == ATAG_CMDLINE) {
+			setprop_string(fdt, "/chosen", "bootargs",
+					atag->u.cmdline.cmdline);
+		} else if (atag->hdr.tag == ATAG_MEM) {
+			if (memcount >= sizeof(mem_reg_property)/4)
+				continue;
+			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.start);
+			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.size);
+		} else if (atag->hdr.tag == ATAG_INITRD2) {
+			uint32_t initrd_start, initrd_size;
+			initrd_start = atag->u.initrd.start;
+			initrd_size = atag->u.initrd.size;
+			setprop_cell(fdt, "/chosen", "linux,initrd-start",
+					initrd_start);
+			setprop_cell(fdt, "/chosen", "linux,initrd-end",
+					initrd_start + initrd_size);
+		}
+	}
+
+	if (memcount)
+		setprop(fdt, "/memory", "reg", mem_reg_property, 4*memcount);
+
+	return fdt_pack(fdt);
+}

+ 115 - 7
arch/arm/boot/compressed/head.S

@@ -216,6 +216,104 @@ restart:	adr	r0, LC0
 		mov	r10, r6
 #endif
 
+		mov	r5, #0			@ init dtb size to 0
+#ifdef CONFIG_ARM_APPENDED_DTB
+/*
+ *   r0  = delta
+ *   r2  = BSS start
+ *   r3  = BSS end
+ *   r4  = final kernel address
+ *   r5  = appended dtb size (still unknown)
+ *   r6  = _edata
+ *   r7  = architecture ID
+ *   r8  = atags/device tree pointer
+ *   r9  = size of decompressed image
+ *   r10 = end of this image, including  bss/stack/malloc space if non XIP
+ *   r11 = GOT start
+ *   r12 = GOT end
+ *   sp  = stack pointer
+ *
+ * if there are device trees (dtb) appended to zImage, advance r10 so that the
+ * dtb data will get relocated along with the kernel if necessary.
+ */
+
+		ldr	lr, [r6, #0]
+#ifndef __ARMEB__
+		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
+#else
+		ldr	r1, =0xd00dfeed
+#endif
+		cmp	lr, r1
+		bne	dtb_check_done		@ not found
+
+#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
+		/*
+		 * OK... Let's do some funky business here.
+		 * If we do have a DTB appended to zImage, and we do have
+		 * an ATAG list around, we want the later to be translated
+		 * and folded into the former here.  To be on the safe side,
+		 * let's temporarily move  the stack away into the malloc
+		 * area.  No GOT fixup has occurred yet, but none of the
+		 * code we're about to call uses any global variable.
+		*/
+		add	sp, sp, #0x10000
+		stmfd	sp!, {r0-r3, ip, lr}
+		mov	r0, r8
+		mov	r1, r6
+		sub	r2, sp, r6
+		bl	atags_to_fdt
+
+		/*
+		 * If returned value is 1, there is no ATAG at the location
+		 * pointed by r8.  Try the typical 0x100 offset from start
+		 * of RAM and hope for the best.
+		 */
+		cmp	r0, #1
+		sub	r0, r4, #TEXT_OFFSET
+		add	r0, r0, #0x100
+		mov	r1, r6
+		sub	r2, sp, r6
+		blne	atags_to_fdt
+
+		ldmfd	sp!, {r0-r3, ip, lr}
+		sub	sp, sp, #0x10000
+#endif
+
+		mov	r8, r6			@ use the appended device tree
+
+		/*
+		 * Make sure that the DTB doesn't end up in the final
+		 * kernel's .bss area. To do so, we adjust the decompressed
+		 * kernel size to compensate if that .bss size is larger
+		 * than the relocated code.
+		 */
+		ldr	r5, =_kernel_bss_size
+		adr	r1, wont_overwrite
+		sub	r1, r6, r1
+		subs	r1, r5, r1
+		addhi	r9, r9, r1
+
+		/* Get the dtb's size */
+		ldr	r5, [r6, #4]
+#ifndef __ARMEB__
+		/* convert r5 (dtb size) to little endian */
+		eor	r1, r5, r5, ror #16
+		bic	r1, r1, #0x00ff0000
+		mov	r5, r5, ror #8
+		eor	r5, r5, r1, lsr #8
+#endif
+
+		/* preserve 64-bit alignment */
+		add	r5, r5, #7
+		bic	r5, r5, #7
+
+		/* relocate some pointers past the appended dtb */
+		add	r6, r6, r5
+		add	r10, r10, r5
+		add	sp, sp, r5
+dtb_check_done:
+#endif
+
 /*
  * Check to see if we will overwrite ourselves.
  *   r4  = final kernel address
@@ -223,15 +321,14 @@ restart:	adr	r0, LC0
  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
  * We basically want:
  *   r4 - 16k page directory >= r10 -> OK
- *   r4 + image length <= current position (pc) -> OK
+ *   r4 + image length <= address of wont_overwrite -> OK
  */
 		add	r10, r10, #16384
 		cmp	r4, r10
 		bhs	wont_overwrite
 		add	r10, r4, r9
-   ARM(		cmp	r10, pc		)
- THUMB(		mov	lr, pc		)
- THUMB(		cmp	r10, lr		)
+		adr	r9, wont_overwrite
+		cmp	r10, r9
 		bls	wont_overwrite
 
 /*
@@ -285,14 +382,16 @@ wont_overwrite:
  *   r2  = BSS start
  *   r3  = BSS end
  *   r4  = kernel execution address
+ *   r5  = appended dtb size (0 if not present)
  *   r7  = architecture ID
  *   r8  = atags pointer
  *   r11 = GOT start
  *   r12 = GOT end
  *   sp  = stack pointer
  */
-		teq	r0, #0
+		orrs	r1, r0, r5
 		beq	not_relocated
+
 		add	r11, r11, r0
 		add	r12, r12, r0
 
@@ -307,12 +406,21 @@ wont_overwrite:
 
 		/*
 		 * Relocate all entries in the GOT table.
+		 * Bump bss entries to _edata + dtb size
 		 */
 1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
-		add	r1, r1, r0		@ table.  This fixes up the
-		str	r1, [r11], #4		@ C references.
+		add	r1, r1, r0		@ This fixes up C references
+		cmp	r1, r2			@ if entry >= bss_start &&
+		cmphs	r3, r1			@       bss_end > entry
+		addhi	r1, r1, r5		@    entry += dtb size
+		str	r1, [r11], #4		@ next entry
 		cmp	r11, r12
 		blo	1b
+
+		/* bump our bss pointers too */
+		add	r2, r2, r5
+		add	r3, r3, r5
+
 #else
 
 		/*

+ 15 - 0
arch/arm/boot/compressed/libfdt_env.h

@@ -0,0 +1,15 @@
+#ifndef _ARM_LIBFDT_ENV_H
+#define _ARM_LIBFDT_ENV_H
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+
+#define fdt16_to_cpu(x)		be16_to_cpu(x)
+#define cpu_to_fdt16(x)		cpu_to_be16(x)
+#define fdt32_to_cpu(x)		be32_to_cpu(x)
+#define cpu_to_fdt32(x)		cpu_to_be32(x)
+#define fdt64_to_cpu(x)		be64_to_cpu(x)
+#define cpu_to_fdt64(x)		cpu_to_be64(x)
+
+#endif

+ 1 - 41
arch/arm/boot/compressed/misc.c

@@ -18,14 +18,9 @@
 
 unsigned int __machine_arch_type;
 
-#define _LINUX_STRING_H_
-
 #include <linux/compiler.h>	/* for inline */
-#include <linux/types.h>	/* for size_t */
-#include <linux/stddef.h>	/* for NULL */
+#include <linux/types.h>
 #include <linux/linkage.h>
-#include <asm/string.h>
-
 
 static void putstr(const char *ptr);
 extern void error(char *x);
@@ -101,41 +96,6 @@ static void putstr(const char *ptr)
 	flush();
 }
 
-
-void *memcpy(void *__dest, __const void *__src, size_t __n)
-{
-	int i = 0;
-	unsigned char *d = (unsigned char *)__dest, *s = (unsigned char *)__src;
-
-	for (i = __n >> 3; i > 0; i--) {
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-	}
-
-	if (__n & 1 << 2) {
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-	}
-
-	if (__n & 1 << 1) {
-		*d++ = *s++;
-		*d++ = *s++;
-	}
-
-	if (__n & 1)
-		*d++ = *s++;
-
-	return __dest;
-}
-
 /*
  * gzip declarations
  */

+ 127 - 0
arch/arm/boot/compressed/string.c

@@ -0,0 +1,127 @@
+/*
+ * arch/arm/boot/compressed/string.c
+ *
+ * Small subset of simple string routines
+ */
+
+#include <linux/string.h>
+
+void *memcpy(void *__dest, __const void *__src, size_t __n)
+{
+	int i = 0;
+	unsigned char *d = (unsigned char *)__dest, *s = (unsigned char *)__src;
+
+	for (i = __n >> 3; i > 0; i--) {
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1 << 2) {
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1 << 1) {
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1)
+		*d++ = *s++;
+
+	return __dest;
+}
+
+void *memmove(void *__dest, __const void *__src, size_t count)
+{
+	unsigned char *d = __dest;
+	const unsigned char *s = __src;
+
+	if (__dest == __src)
+		return __dest;
+
+	if (__dest < __src)
+		return memcpy(__dest, __src, count);
+
+	while (count--)
+		d[count] = s[count];
+	return __dest;
+}
+
+size_t strlen(const char *s)
+{
+	const char *sc = s;
+
+	while (*sc != '\0')
+		sc++;
+	return sc - s;
+}
+
+int memcmp(const void *cs, const void *ct, size_t count)
+{
+	const unsigned char *su1 = cs, *su2 = ct, *end = su1 + count;
+	int res = 0;
+
+	while (su1 < end) {
+		res = *su1++ - *su2++;
+		if (res)
+			break;
+	}
+	return res;
+}
+
+int strcmp(const char *cs, const char *ct)
+{
+	unsigned char c1, c2;
+	int res = 0;
+
+	do {
+		c1 = *cs++;
+		c2 = *ct++;
+		res = c1 - c2;
+		if (res)
+			break;
+	} while (c1);
+	return res;
+}
+
+void *memchr(const void *s, int c, size_t count)
+{
+	const unsigned char *p = s;
+
+	while (count--)
+		if ((unsigned char)c == *p++)
+			return (void *)(p - 1);
+	return NULL;
+}
+
+char *strchr(const char *s, int c)
+{
+	while (*s != (char)c)
+		if (*s++ == '\0')
+			return NULL;
+	return (char *)s;
+}
+
+#undef memset
+
+void *memset(void *s, int c, size_t count)
+{
+	char *xs = s;
+	while (count--)
+		*xs++ = c;
+	return s;
+}
+
+void __memzero(void *s, size_t count)
+{
+	memset(s, 0, count);
+}

+ 4 - 0
arch/arm/boot/compressed/vmlinux.lds.in

@@ -51,6 +51,10 @@ SECTIONS
   _got_start = .;
   .got			: { *(.got) }
   _got_end = .;
+
+  /* ensure the zImage file size is always a multiple of 64 bits */
+  /* (without a dummy byte, ld just ignores the empty section) */
+  .pad			: { BYTE(0); . = ALIGN(8); }
   _edata = .;
 
   . = BSS_START;

+ 220 - 11
arch/arm/common/gic.c

@@ -26,8 +26,12 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/smp.h>
+#include <linux/cpu_pm.h>
 #include <linux/cpumask.h>
 #include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
 
 #include <asm/irq.h>
 #include <asm/mach/irq.h>
@@ -262,6 +266,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic,
 	u32 cpumask;
 	void __iomem *base = gic->dist_base;
 	u32 cpu = 0;
+	u32 nrppis = 0, ppi_base = 0;
 
 #ifdef CONFIG_SMP
 	cpu = cpu_logical_map(smp_processor_id());
@@ -282,6 +287,25 @@ static void __init gic_dist_init(struct gic_chip_data *gic,
 	if (gic_irqs > 1020)
 		gic_irqs = 1020;
 
+	gic->gic_irqs = gic_irqs;
+
+	/*
+	 * Nobody would be insane enough to use PPIs on a secondary
+	 * GIC, right?
+	 */
+	if (gic == &gic_data[0]) {
+		nrppis = (32 - irq_start) & 31;
+
+		/* The GIC only supports up to 16 PPIs. */
+		if (nrppis > 16)
+			BUG();
+
+		ppi_base = gic->irq_offset + 32 - nrppis;
+	}
+
+	pr_info("Configuring GIC with %d sources (%d PPIs)\n",
+		gic_irqs, (gic == &gic_data[0]) ? nrppis : 0);
+
 	/*
 	 * Set all global interrupts to be level triggered, active low.
 	 */
@@ -317,7 +341,17 @@ static void __init gic_dist_init(struct gic_chip_data *gic,
 	/*
 	 * Setup the Linux IRQ subsystem.
 	 */
-	for (i = irq_start; i < irq_limit; i++) {
+	for (i = 0; i < nrppis; i++) {
+		int ppi = i + ppi_base;
+
+		irq_set_percpu_devid(ppi);
+		irq_set_chip_and_handler(ppi, &gic_chip,
+					 handle_percpu_devid_irq);
+		irq_set_chip_data(ppi, gic);
+		set_irq_flags(ppi, IRQF_VALID | IRQF_NOAUTOEN);
+	}
+
+	for (i = irq_start + nrppis; i < irq_limit; i++) {
 		irq_set_chip_and_handler(i, &gic_chip, handle_fasteoi_irq);
 		irq_set_chip_data(i, gic);
 		set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
@@ -349,6 +383,189 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic)
 	writel_relaxed(1, base + GIC_CPU_CTRL);
 }
 
+#ifdef CONFIG_CPU_PM
+/*
+ * Saves the GIC distributor registers during suspend or idle.  Must be called
+ * with interrupts disabled but before powering down the GIC.  After calling
+ * this function, no interrupts will be delivered by the GIC, and another
+ * platform-specific wakeup source must be enabled.
+ */
+static void gic_dist_save(unsigned int gic_nr)
+{
+	unsigned int gic_irqs;
+	void __iomem *dist_base;
+	int i;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+	dist_base = gic_data[gic_nr].dist_base;
+
+	if (!dist_base)
+		return;
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 16); i++)
+		gic_data[gic_nr].saved_spi_conf[i] =
+			readl_relaxed(dist_base + GIC_DIST_CONFIG + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
+		gic_data[gic_nr].saved_spi_target[i] =
+			readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
+		gic_data[gic_nr].saved_spi_enable[i] =
+			readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4);
+}
+
+/*
+ * Restores the GIC distributor registers during resume or when coming out of
+ * idle.  Must be called before enabling interrupts.  If a level interrupt
+ * that occured while the GIC was suspended is still present, it will be
+ * handled normally, but any edge interrupts that occured will not be seen by
+ * the GIC and need to be handled by the platform-specific wakeup source.
+ */
+static void gic_dist_restore(unsigned int gic_nr)
+{
+	unsigned int gic_irqs;
+	unsigned int i;
+	void __iomem *dist_base;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+	dist_base = gic_data[gic_nr].dist_base;
+
+	if (!dist_base)
+		return;
+
+	writel_relaxed(0, dist_base + GIC_DIST_CTRL);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 16); i++)
+		writel_relaxed(gic_data[gic_nr].saved_spi_conf[i],
+			dist_base + GIC_DIST_CONFIG + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
+		writel_relaxed(0xa0a0a0a0,
+			dist_base + GIC_DIST_PRI + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
+		writel_relaxed(gic_data[gic_nr].saved_spi_target[i],
+			dist_base + GIC_DIST_TARGET + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
+		writel_relaxed(gic_data[gic_nr].saved_spi_enable[i],
+			dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	writel_relaxed(1, dist_base + GIC_DIST_CTRL);
+}
+
+static void gic_cpu_save(unsigned int gic_nr)
+{
+	int i;
+	u32 *ptr;
+	void __iomem *dist_base;
+	void __iomem *cpu_base;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data[gic_nr].dist_base;
+	cpu_base = gic_data[gic_nr].cpu_base;
+
+	if (!dist_base || !cpu_base)
+		return;
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_enable);
+	for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
+		ptr[i] = readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_conf);
+	for (i = 0; i < DIV_ROUND_UP(32, 16); i++)
+		ptr[i] = readl_relaxed(dist_base + GIC_DIST_CONFIG + i * 4);
+
+}
+
+static void gic_cpu_restore(unsigned int gic_nr)
+{
+	int i;
+	u32 *ptr;
+	void __iomem *dist_base;
+	void __iomem *cpu_base;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data[gic_nr].dist_base;
+	cpu_base = gic_data[gic_nr].cpu_base;
+
+	if (!dist_base || !cpu_base)
+		return;
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_enable);
+	for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
+		writel_relaxed(ptr[i], dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_conf);
+	for (i = 0; i < DIV_ROUND_UP(32, 16); i++)
+		writel_relaxed(ptr[i], dist_base + GIC_DIST_CONFIG + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(32, 4); i++)
+		writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4);
+
+	writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK);
+	writel_relaxed(1, cpu_base + GIC_CPU_CTRL);
+}
+
+static int gic_notifier(struct notifier_block *self, unsigned long cmd,	void *v)
+{
+	int i;
+
+	for (i = 0; i < MAX_GIC_NR; i++) {
+		switch (cmd) {
+		case CPU_PM_ENTER:
+			gic_cpu_save(i);
+			break;
+		case CPU_PM_ENTER_FAILED:
+		case CPU_PM_EXIT:
+			gic_cpu_restore(i);
+			break;
+		case CPU_CLUSTER_PM_ENTER:
+			gic_dist_save(i);
+			break;
+		case CPU_CLUSTER_PM_ENTER_FAILED:
+		case CPU_CLUSTER_PM_EXIT:
+			gic_dist_restore(i);
+			break;
+		}
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block gic_notifier_block = {
+	.notifier_call = gic_notifier,
+};
+
+static void __init gic_pm_init(struct gic_chip_data *gic)
+{
+	gic->saved_ppi_enable = __alloc_percpu(DIV_ROUND_UP(32, 32) * 4,
+		sizeof(u32));
+	BUG_ON(!gic->saved_ppi_enable);
+
+	gic->saved_ppi_conf = __alloc_percpu(DIV_ROUND_UP(32, 16) * 4,
+		sizeof(u32));
+	BUG_ON(!gic->saved_ppi_conf);
+
+	cpu_pm_register_notifier(&gic_notifier_block);
+}
+#else
+static void __init gic_pm_init(struct gic_chip_data *gic)
+{
+}
+#endif
+
 void __init gic_init(unsigned int gic_nr, unsigned int irq_start,
 	void __iomem *dist_base, void __iomem *cpu_base)
 {
@@ -364,8 +581,10 @@ void __init gic_init(unsigned int gic_nr, unsigned int irq_start,
 	if (gic_nr == 0)
 		gic_cpu_base_addr = cpu_base;
 
+	gic_chip.flags |= gic_arch_extn.flags;
 	gic_dist_init(gic, irq_start);
 	gic_cpu_init(gic);
+	gic_pm_init(gic);
 }
 
 void __cpuinit gic_secondary_init(unsigned int gic_nr)
@@ -375,16 +594,6 @@ void __cpuinit gic_secondary_init(unsigned int gic_nr)
 	gic_cpu_init(&gic_data[gic_nr]);
 }
 
-void __cpuinit gic_enable_ppi(unsigned int irq)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	irq_set_status_flags(irq, IRQ_NOPROBE);
-	gic_unmask_irq(irq_get_irq_data(irq));
-	local_irq_restore(flags);
-}
-
 #ifdef CONFIG_SMP
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 {

+ 7 - 0
arch/arm/include/asm/dma-mapping.h

@@ -205,6 +205,13 @@ extern void *dma_alloc_writecombine(struct device *, size_t, dma_addr_t *,
 int dma_mmap_writecombine(struct device *, struct vm_area_struct *,
 		void *, dma_addr_t, size_t);
 
+/*
+ * This can be called during boot to increase the size of the consistent
+ * DMA region above it's default value of 2MB. It must be called before the
+ * memory allocator is initialised, i.e. before any core_initcall.
+ */
+extern void __init init_consistent_dma_size(unsigned long size);
+
 
 #ifdef CONFIG_DMABOUNCE
 /*

+ 0 - 7
arch/arm/include/asm/entry-macro-multi.S

@@ -25,13 +25,6 @@
 	movne	r1, sp
 	adrne	lr, BSYM(1b)
 	bne	do_IPI
-
-#ifdef CONFIG_LOCAL_TIMERS
-	test_for_ltirq r0, r2, r6, lr
-	movne	r0, sp
-	adrne	lr, BSYM(1b)
-	bne	do_local_timer
-#endif
 #endif
 9997:
 	.endm

+ 0 - 3
arch/arm/include/asm/hardirq.h

@@ -9,9 +9,6 @@
 
 typedef struct {
 	unsigned int __softirq_pending;
-#ifdef CONFIG_LOCAL_TIMERS
-	unsigned int local_timer_irqs;
-#endif
 #ifdef CONFIG_SMP
 	unsigned int ipi_irqs[NR_IPI];
 #endif

+ 2 - 17
arch/arm/include/asm/hardware/entry-macro-gic.S

@@ -22,15 +22,11 @@
  * interrupt controller spec.  To wit:
  *
  * Interrupts 0-15 are IPI
- * 16-28 are reserved
- * 29-31 are local.  We allow 30 to be used for the watchdog.
+ * 16-31 are local.  We allow 30 to be used for the watchdog.
  * 32-1020 are global
  * 1021-1022 are reserved
  * 1023 is "spurious" (no interrupt)
  *
- * For now, we ignore all local interrupts so only return an interrupt if it's
- * between 30 and 1020.  The test_for_ipi routine below will pick up on IPIs.
- *
  * A simple read from the controller will tell us the number of the highest
  * priority enabled interrupt.  We then just need to check whether it is in the
  * valid range for an IRQ (30-1020 inclusive).
@@ -43,7 +39,7 @@
 
 	ldr	\tmp, =1021
 	bic     \irqnr, \irqstat, #0x1c00
-	cmp     \irqnr, #29
+	cmp     \irqnr, #15
 	cmpcc	\irqnr, \irqnr
 	cmpne	\irqnr, \tmp
 	cmpcs	\irqnr, \irqnr
@@ -62,14 +58,3 @@
 	strcc	\irqstat, [\base, #GIC_CPU_EOI]
 	cmpcs	\irqnr, \irqnr
 	.endm
-
-/* As above, this assumes that irqstat and base are preserved.. */
-
-	.macro test_for_ltirq, irqnr, irqstat, base, tmp
-	bic	\irqnr, \irqstat, #0x1c00
-	mov 	\tmp, #0
-	cmp	\irqnr, #29
-	moveq	\tmp, #1
-	streq	\irqstat, [\base, #GIC_CPU_EOI]
-	cmp	\tmp, #0
-	.endm

+ 8 - 1
arch/arm/include/asm/hardware/gic.h

@@ -40,12 +40,19 @@ void gic_init(unsigned int, unsigned int, void __iomem *, void __iomem *);
 void gic_secondary_init(unsigned int);
 void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq);
-void gic_enable_ppi(unsigned int);
 
 struct gic_chip_data {
 	unsigned int irq_offset;
 	void __iomem *dist_base;
 	void __iomem *cpu_base;
+#ifdef CONFIG_CPU_PM
+	u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)];
+	u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)];
+	u32 saved_spi_target[DIV_ROUND_UP(1020, 4)];
+	u32 __percpu *saved_ppi_enable;
+	u32 __percpu *saved_ppi_conf;
+#endif
+	unsigned int gic_irqs;
 };
 #endif
 

+ 2 - 0
arch/arm/include/asm/hw_breakpoint.h

@@ -50,6 +50,7 @@ static inline void decode_ctrl_reg(u32 reg,
 #define ARM_DEBUG_ARCH_V6_1	2
 #define ARM_DEBUG_ARCH_V7_ECP14	3
 #define ARM_DEBUG_ARCH_V7_MM	4
+#define ARM_DEBUG_ARCH_V7_1	5
 
 /* Breakpoint */
 #define ARM_BREAKPOINT_EXECUTE	0
@@ -57,6 +58,7 @@ static inline void decode_ctrl_reg(u32 reg,
 /* Watchpoints */
 #define ARM_BREAKPOINT_LOAD	1
 #define ARM_BREAKPOINT_STORE	2
+#define ARM_FSR_ACCESS_MASK	(1 << 11)
 
 /* Privilege Levels */
 #define ARM_BREAKPOINT_PRIV	1

+ 8 - 14
arch/arm/include/asm/localtimer.h

@@ -11,6 +11,7 @@
 #define __ASM_ARM_LOCALTIMER_H
 
 #include <linux/errno.h>
+#include <linux/interrupt.h>
 
 struct clock_event_device;
 
@@ -19,31 +20,20 @@ struct clock_event_device;
  */
 void percpu_timer_setup(void);
 
-/*
- * Called from assembly, this is the local timer IRQ handler
- */
-asmlinkage void do_local_timer(struct pt_regs *);
-
-/*
- * Called from C code
- */
-void handle_local_timer(struct pt_regs *);
-
 #ifdef CONFIG_LOCAL_TIMERS
 
 #ifdef CONFIG_HAVE_ARM_TWD
 
 #include "smp_twd.h"
 
-#define local_timer_ack()	twd_timer_ack()
+#define local_timer_stop(c)	twd_timer_stop((c))
 
 #else
 
 /*
- * Platform provides this to acknowledge a local timer IRQ.
- * Returns true if the local timer IRQ is to be processed.
+ * Stop the local timer
  */
-int local_timer_ack(void);
+void local_timer_stop(struct clock_event_device *);
 
 #endif
 
@@ -58,6 +48,10 @@ static inline int local_timer_setup(struct clock_event_device *evt)
 {
 	return -ENXIO;
 }
+
+static inline void local_timer_stop(struct clock_event_device *evt)
+{
+}
 #endif
 
 #endif

+ 1 - 1
arch/arm/include/asm/mach/arch.h

@@ -17,7 +17,7 @@ struct sys_timer;
 struct machine_desc {
 	unsigned int		nr;		/* architecture number	*/
 	const char		*name;		/* architecture name	*/
-	unsigned long		boot_params;	/* tagged list		*/
+	unsigned long		atag_offset;	/* tagged list (relative) */
 	const char		**dt_compat;	/* array of device tree
 						 * 'compatible' strings	*/
 

+ 1 - 0
arch/arm/include/asm/mach/map.h

@@ -29,6 +29,7 @@ struct map_desc {
 #define MT_MEMORY_NONCACHED	11
 #define MT_MEMORY_DTCM		12
 #define MT_MEMORY_ITCM		13
+#define MT_MEMORY_SO		14
 
 #ifdef CONFIG_MMU
 extern void iotable_init(struct map_desc *, int);

+ 8 - 10
arch/arm/include/asm/memory.h

@@ -16,9 +16,12 @@
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <linux/types.h>
-#include <mach/memory.h>
 #include <asm/sizes.h>
 
+#ifdef CONFIG_NEED_MACH_MEMORY_H
+#include <mach/memory.h>
+#endif
+
 /*
  * Allow for constants defined here to be used from assembly code
  * by prepending the UL suffix only with actual C code compilation.
@@ -77,16 +80,7 @@
  */
 #define IOREMAP_MAX_ORDER	24
 
-/*
- * Size of DMA-consistent memory region.  Must be multiple of 2M,
- * between 2MB and 14MB inclusive.
- */
-#ifndef CONSISTENT_DMA_SIZE
-#define CONSISTENT_DMA_SIZE 	SZ_2M
-#endif
-
 #define CONSISTENT_END		(0xffe00000UL)
-#define CONSISTENT_BASE		(CONSISTENT_END - CONSISTENT_DMA_SIZE)
 
 #else /* CONFIG_MMU */
 
@@ -193,7 +187,11 @@ static inline unsigned long __phys_to_virt(unsigned long x)
 #endif
 
 #ifndef PHYS_OFFSET
+#ifdef PLAT_PHYS_OFFSET
 #define PHYS_OFFSET	PLAT_PHYS_OFFSET
+#else
+#define PHYS_OFFSET	UL(CONFIG_PHYS_OFFSET)
+#endif
 #endif
 
 /*

+ 3 - 0
arch/arm/include/asm/pgtable.h

@@ -101,6 +101,9 @@ extern pgprot_t		pgprot_kernel;
 #define pgprot_writecombine(prot) \
 	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE)
 
+#define pgprot_stronglyordered(prot) \
+	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED)
+
 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
 #define pgprot_dmacoherent(prot) \
 	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE | L_PTE_XN)

+ 74 - 19
arch/arm/include/asm/pmu.h

@@ -13,7 +13,12 @@
 #define __ARM_PMU_H__
 
 #include <linux/interrupt.h>
+#include <linux/perf_event.h>
 
+/*
+ * Types of PMUs that can be accessed directly and require mutual
+ * exclusion between profiling tools.
+ */
 enum arm_pmu_type {
 	ARM_PMU_DEVICE_CPU	= 0,
 	ARM_NUM_PMU_DEVICES,
@@ -37,21 +42,17 @@ struct arm_pmu_platdata {
  * reserve_pmu() - reserve the hardware performance counters
  *
  * Reserve the hardware performance counters in the system for exclusive use.
- * The platform_device for the system is returned on success, ERR_PTR()
- * encoded error on failure.
+ * Returns 0 on success or -EBUSY if the lock is already held.
  */
-extern struct platform_device *
+extern int
 reserve_pmu(enum arm_pmu_type type);
 
 /**
  * release_pmu() - Relinquish control of the performance counters
  *
  * Release the performance counters and allow someone else to use them.
- * Callers must have disabled the counters and released IRQs before calling
- * this. The platform_device returned from reserve_pmu() must be passed as
- * a cookie.
  */
-extern int
+extern void
 release_pmu(enum arm_pmu_type type);
 
 /**
@@ -68,24 +69,78 @@ init_pmu(enum arm_pmu_type type);
 
 #include <linux/err.h>
 
-static inline struct platform_device *
-reserve_pmu(enum arm_pmu_type type)
-{
-	return ERR_PTR(-ENODEV);
-}
-
 static inline int
-release_pmu(enum arm_pmu_type type)
+reserve_pmu(enum arm_pmu_type type)
 {
 	return -ENODEV;
 }
 
-static inline int
-init_pmu(enum arm_pmu_type type)
-{
-	return -ENODEV;
-}
+static inline void
+release_pmu(enum arm_pmu_type type)	{ }
 
 #endif /* CONFIG_CPU_HAS_PMU */
 
+#ifdef CONFIG_HW_PERF_EVENTS
+
+/* The events for a given PMU register set. */
+struct pmu_hw_events {
+	/*
+	 * The events that are active on the PMU for the given index.
+	 */
+	struct perf_event	**events;
+
+	/*
+	 * A 1 bit for an index indicates that the counter is being used for
+	 * an event. A 0 means that the counter can be used.
+	 */
+	unsigned long           *used_mask;
+
+	/*
+	 * Hardware lock to serialize accesses to PMU registers. Needed for the
+	 * read/modify/write sequences.
+	 */
+	raw_spinlock_t		pmu_lock;
+};
+
+struct arm_pmu {
+	struct pmu	pmu;
+	enum arm_perf_pmu_ids id;
+	enum arm_pmu_type type;
+	cpumask_t	active_irqs;
+	const char	*name;
+	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
+	void		(*enable)(struct hw_perf_event *evt, int idx);
+	void		(*disable)(struct hw_perf_event *evt, int idx);
+	int		(*get_event_idx)(struct pmu_hw_events *hw_events,
+					 struct hw_perf_event *hwc);
+	int		(*set_event_filter)(struct hw_perf_event *evt,
+					    struct perf_event_attr *attr);
+	u32		(*read_counter)(int idx);
+	void		(*write_counter)(int idx, u32 val);
+	void		(*start)(void);
+	void		(*stop)(void);
+	void		(*reset)(void *);
+	int		(*map_event)(struct perf_event *event);
+	int		num_events;
+	atomic_t	active_events;
+	struct mutex	reserve_mutex;
+	u64		max_period;
+	struct platform_device	*plat_device;
+	struct pmu_hw_events	*(*get_hw_events)(void);
+};
+
+#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
+
+int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
+
+u64 armpmu_event_update(struct perf_event *event,
+			struct hw_perf_event *hwc,
+			int idx, int overflow);
+
+int armpmu_event_set_period(struct perf_event *event,
+			    struct hw_perf_event *hwc,
+			    int idx);
+
+#endif /* CONFIG_HW_PERF_EVENTS */
+
 #endif /* __ARM_PMU_H__ */

+ 8 - 0
arch/arm/include/asm/proc-fns.h

@@ -81,6 +81,10 @@ extern void cpu_dcache_clean_area(void *, int);
 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+
+/* These three are private to arch/arm/kernel/suspend.c */
+extern void cpu_do_suspend(void *);
+extern void cpu_do_resume(void *);
 #else
 #define cpu_proc_init			processor._proc_init
 #define cpu_proc_fin			processor._proc_fin
@@ -89,6 +93,10 @@ extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
 #define cpu_dcache_clean_area		processor.dcache_clean_area
 #define cpu_set_pte_ext			processor.set_pte_ext
 #define cpu_do_switch_mm		processor.switch_mm
+
+/* These three are private to arch/arm/kernel/suspend.c */
+#define cpu_do_suspend			processor.do_suspend
+#define cpu_do_resume			processor.do_resume
 #endif
 
 extern void cpu_resume(void);

+ 0 - 5
arch/arm/include/asm/smp.h

@@ -99,9 +99,4 @@ extern void platform_cpu_enable(unsigned int cpu);
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
-/*
- * show local interrupt info
- */
-extern void show_local_irqs(struct seq_file *, int);
-
 #endif /* ifndef __ASM_ARM_SMP_H */

+ 1 - 1
arch/arm/include/asm/smp_twd.h

@@ -22,7 +22,7 @@ struct clock_event_device;
 
 extern void __iomem *twd_base;
 
-int twd_timer_ack(void);
 void twd_timer_setup(struct clock_event_device *);
+void twd_timer_stop(struct clock_event_device *);
 
 #endif

+ 1 - 16
arch/arm/include/asm/suspend.h

@@ -1,22 +1,7 @@
 #ifndef __ASM_ARM_SUSPEND_H
 #define __ASM_ARM_SUSPEND_H
 
-#include <asm/memory.h>
-#include <asm/tlbflush.h>
-
 extern void cpu_resume(void);
-
-/*
- * Hide the first two arguments to __cpu_suspend - these are an implementation
- * detail which platform code shouldn't have to know about.
- */
-static inline int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
-{
-	extern int __cpu_suspend(int, long, unsigned long,
-				 int (*)(unsigned long));
-	int ret = __cpu_suspend(0, PHYS_OFFSET - PAGE_OFFSET, arg, fn);
-	flush_tlb_all();
-	return ret;
-}
+extern int cpu_suspend(unsigned long, int (*)(unsigned long));
 
 #endif

+ 8 - 1
arch/arm/kernel/Makefile

@@ -29,7 +29,7 @@ obj-$(CONFIG_MODULES)		+= armksyms.o module.o
 obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
-obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o
+obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
 obj-$(CONFIG_HAVE_SCHED_CLOCK)	+= sched_clock.o
 obj-$(CONFIG_SMP)		+= smp.o smp_tlb.o
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
@@ -43,6 +43,13 @@ obj-$(CONFIG_KPROBES)		+= kprobes-thumb.o
 else
 obj-$(CONFIG_KPROBES)		+= kprobes-arm.o
 endif
+obj-$(CONFIG_ARM_KPROBES_TEST)	+= test-kprobes.o
+test-kprobes-objs		:= kprobes-test.o
+ifdef CONFIG_THUMB2_KERNEL
+test-kprobes-objs		+= kprobes-test-thumb.o
+else
+test-kprobes-objs		+= kprobes-test-arm.o
+endif
 obj-$(CONFIG_ATAGS_PROC)	+= atags.o
 obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
 obj-$(CONFIG_ARM_THUMBEE)	+= thumbee.o

+ 2 - 2
arch/arm/kernel/debug.S

@@ -22,7 +22,7 @@
 #if defined(CONFIG_DEBUG_ICEDCC)
 		@@ debug using ARM EmbeddedICE DCC channel
 
-		.macro	addruart, rp, rv
+		.macro	addruart, rp, rv, tmp
 		.endm
 
 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
@@ -106,7 +106,7 @@
 
 #ifdef CONFIG_MMU
 		.macro	addruart_current, rx, tmp1, tmp2
-		addruart	\tmp1, \tmp2
+		addruart	\tmp1, \tmp2, \rx
 		mrc		p15, 0, \rx, c1, c0
 		tst		\rx, #1
 		moveq		\rx, \tmp1

+ 2 - 2
arch/arm/kernel/head.S

@@ -99,7 +99,7 @@ ENTRY(stext)
 	sub	r4, r3, r4			@ (PHYS_OFFSET - PAGE_OFFSET)
 	add	r8, r8, r4			@ PHYS_OFFSET
 #else
-	ldr	r8, =PLAT_PHYS_OFFSET
+	ldr	r8, =PHYS_OFFSET		@ always constant in this case
 #endif
 
 	/*
@@ -238,7 +238,7 @@ __create_page_tables:
 	 * This allows debug messages to be output
 	 * via a serial console before paging_init.
 	 */
-	addruart r7, r3
+	addruart r7, r3, r0
 
 	mov	r3, r3, lsr #SECTION_SHIFT
 	mov	r3, r3, lsl #PMD_ORDER

+ 170 - 105
arch/arm/kernel/hw_breakpoint.c

@@ -45,7 +45,6 @@ static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]);
 
 /* Number of BRP/WRP registers on this CPU. */
 static int core_num_brps;
-static int core_num_reserved_brps;
 static int core_num_wrps;
 
 /* Debug architecture version. */
@@ -137,10 +136,11 @@ static u8 get_debug_arch(void)
 	u32 didr;
 
 	/* Do we implement the extended CPUID interface? */
-	if (WARN_ONCE((((read_cpuid_id() >> 16) & 0xf) != 0xf),
-	    "CPUID feature registers not supported. "
-	    "Assuming v6 debug is present.\n"))
+	if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
+		pr_warning("CPUID feature registers not supported. "
+			   "Assuming v6 debug is present.\n");
 		return ARM_DEBUG_ARCH_V6;
+	}
 
 	ARM_DBG_READ(c0, 0, didr);
 	return (didr >> 16) & 0xf;
@@ -154,10 +154,21 @@ u8 arch_get_debug_arch(void)
 static int debug_arch_supported(void)
 {
 	u8 arch = get_debug_arch();
-	return arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14;
+
+	/* We don't support the memory-mapped interface. */
+	return (arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14) ||
+		arch >= ARM_DEBUG_ARCH_V7_1;
+}
+
+/* Determine number of WRP registers available. */
+static int get_num_wrp_resources(void)
+{
+	u32 didr;
+	ARM_DBG_READ(c0, 0, didr);
+	return ((didr >> 28) & 0xf) + 1;
 }
 
-/* Determine number of BRP register available. */
+/* Determine number of BRP registers available. */
 static int get_num_brp_resources(void)
 {
 	u32 didr;
@@ -176,9 +187,10 @@ static int core_has_mismatch_brps(void)
 static int get_num_wrps(void)
 {
 	/*
-	 * FIXME: When a watchpoint fires, the only way to work out which
-	 * watchpoint it was is by disassembling the faulting instruction
-	 * and working out the address of the memory access.
+	 * On debug architectures prior to 7.1, when a watchpoint fires, the
+	 * only way to work out which watchpoint it was is by disassembling
+	 * the faulting instruction and working out the address of the memory
+	 * access.
 	 *
 	 * Furthermore, we can only do this if the watchpoint was precise
 	 * since imprecise watchpoints prevent us from calculating register
@@ -192,36 +204,17 @@ static int get_num_wrps(void)
 	 * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows
 	 * that it is set on some implementations].
 	 */
+	if (get_debug_arch() < ARM_DEBUG_ARCH_V7_1)
+		return 1;
 
-#if 0
-	int wrps;
-	u32 didr;
-	ARM_DBG_READ(c0, 0, didr);
-	wrps = ((didr >> 28) & 0xf) + 1;
-#endif
-	int wrps = 1;
-
-	if (core_has_mismatch_brps() && wrps >= get_num_brp_resources())
-		wrps = get_num_brp_resources() - 1;
-
-	return wrps;
-}
-
-/* We reserve one breakpoint for each watchpoint. */
-static int get_num_reserved_brps(void)
-{
-	if (core_has_mismatch_brps())
-		return get_num_wrps();
-	return 0;
+	return get_num_wrp_resources();
 }
 
 /* Determine number of usable BRPs available. */
 static int get_num_brps(void)
 {
 	int brps = get_num_brp_resources();
-	if (core_has_mismatch_brps())
-		brps -= get_num_reserved_brps();
-	return brps;
+	return core_has_mismatch_brps() ? brps - 1 : brps;
 }
 
 /*
@@ -239,7 +232,7 @@ static int enable_monitor_mode(void)
 
 	/* Ensure that halting mode is disabled. */
 	if (WARN_ONCE(dscr & ARM_DSCR_HDBGEN,
-			"halting debug mode enabled. Unable to access hardware resources.\n")) {
+		"halting debug mode enabled. Unable to access hardware resources.\n")) {
 		ret = -EPERM;
 		goto out;
 	}
@@ -255,6 +248,7 @@ static int enable_monitor_mode(void)
 		ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN));
 		break;
 	case ARM_DEBUG_ARCH_V7_ECP14:
+	case ARM_DEBUG_ARCH_V7_1:
 		ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN));
 		break;
 	default:
@@ -346,24 +340,10 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 		val_base = ARM_BASE_BVR;
 		slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
 		max_slots = core_num_brps;
-		if (info->step_ctrl.enabled) {
-			/* Override the breakpoint data with the step data. */
-			addr = info->trigger & ~0x3;
-			ctrl = encode_ctrl_reg(info->step_ctrl);
-		}
 	} else {
 		/* Watchpoint */
-		if (info->step_ctrl.enabled) {
-			/* Install into the reserved breakpoint region. */
-			ctrl_base = ARM_BASE_BCR + core_num_brps;
-			val_base = ARM_BASE_BVR + core_num_brps;
-			/* Override the watchpoint data with the step data. */
-			addr = info->trigger & ~0x3;
-			ctrl = encode_ctrl_reg(info->step_ctrl);
-		} else {
-			ctrl_base = ARM_BASE_WCR;
-			val_base = ARM_BASE_WVR;
-		}
+		ctrl_base = ARM_BASE_WCR;
+		val_base = ARM_BASE_WVR;
 		slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 		max_slots = core_num_wrps;
 	}
@@ -382,6 +362,17 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 		goto out;
 	}
 
+	/* Override the breakpoint data with the step data. */
+	if (info->step_ctrl.enabled) {
+		addr = info->trigger & ~0x3;
+		ctrl = encode_ctrl_reg(info->step_ctrl);
+		if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE) {
+			i = 0;
+			ctrl_base = ARM_BASE_BCR + core_num_brps;
+			val_base = ARM_BASE_BVR + core_num_brps;
+		}
+	}
+
 	/* Setup the address register. */
 	write_wb_reg(val_base + i, addr);
 
@@ -405,10 +396,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 		max_slots = core_num_brps;
 	} else {
 		/* Watchpoint */
-		if (info->step_ctrl.enabled)
-			base = ARM_BASE_BCR + core_num_brps;
-		else
-			base = ARM_BASE_WCR;
+		base = ARM_BASE_WCR;
 		slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 		max_slots = core_num_wrps;
 	}
@@ -426,6 +414,13 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n"))
 		return;
 
+	/* Ensure that we disable the mismatch breakpoint. */
+	if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE &&
+	    info->step_ctrl.enabled) {
+		i = 0;
+		base = ARM_BASE_BCR + core_num_brps;
+	}
+
 	/* Reset the control register. */
 	write_wb_reg(base + i, 0);
 }
@@ -632,10 +627,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	 * we can use the mismatch feature as a poor-man's hardware
 	 * single-step, but this only works for per-task breakpoints.
 	 */
-	if (WARN_ONCE(!bp->overflow_handler &&
-		(arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()
-		 || !bp->hw.bp_target),
-			"overflow handler required but none found\n")) {
+	if (!bp->overflow_handler && (arch_check_bp_in_kernelspace(bp) ||
+	    !core_has_mismatch_brps() || !bp->hw.bp_target)) {
+		pr_warning("overflow handler required but none found\n");
 		ret = -EINVAL;
 	}
 out:
@@ -666,34 +660,62 @@ static void disable_single_step(struct perf_event *bp)
 	arch_install_hw_breakpoint(bp);
 }
 
-static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
+static void watchpoint_handler(unsigned long addr, unsigned int fsr,
+			       struct pt_regs *regs)
 {
-	int i;
+	int i, access;
+	u32 val, ctrl_reg, alignment_mask;
 	struct perf_event *wp, **slots;
 	struct arch_hw_breakpoint *info;
+	struct arch_hw_breakpoint_ctrl ctrl;
 
 	slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 
-	/* Without a disassembler, we can only handle 1 watchpoint. */
-	BUG_ON(core_num_wrps > 1);
-
 	for (i = 0; i < core_num_wrps; ++i) {
 		rcu_read_lock();
 
 		wp = slots[i];
 
-		if (wp == NULL) {
-			rcu_read_unlock();
-			continue;
-		}
+		if (wp == NULL)
+			goto unlock;
 
+		info = counter_arch_bp(wp);
 		/*
-		 * The DFAR is an unknown value. Since we only allow a
-		 * single watchpoint, we can set the trigger to the lowest
-		 * possible faulting address.
+		 * The DFAR is an unknown value on debug architectures prior
+		 * to 7.1. Since we only allow a single watchpoint on these
+		 * older CPUs, we can set the trigger to the lowest possible
+		 * faulting address.
 		 */
-		info = counter_arch_bp(wp);
-		info->trigger = wp->attr.bp_addr;
+		if (debug_arch < ARM_DEBUG_ARCH_V7_1) {
+			BUG_ON(i > 0);
+			info->trigger = wp->attr.bp_addr;
+		} else {
+			if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
+				alignment_mask = 0x7;
+			else
+				alignment_mask = 0x3;
+
+			/* Check if the watchpoint value matches. */
+			val = read_wb_reg(ARM_BASE_WVR + i);
+			if (val != (addr & ~alignment_mask))
+				goto unlock;
+
+			/* Possible match, check the byte address select. */
+			ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
+			decode_ctrl_reg(ctrl_reg, &ctrl);
+			if (!((1 << (addr & alignment_mask)) & ctrl.len))
+				goto unlock;
+
+			/* Check that the access type matches. */
+			access = (fsr & ARM_FSR_ACCESS_MASK) ? HW_BREAKPOINT_W :
+				 HW_BREAKPOINT_R;
+			if (!(access & hw_breakpoint_type(wp)))
+				goto unlock;
+
+			/* We have a winner. */
+			info->trigger = addr;
+		}
+
 		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
 		perf_bp_event(wp, regs);
 
@@ -705,6 +727,7 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
 		if (!wp->overflow_handler)
 			enable_single_step(wp, instruction_pointer(regs));
 
+unlock:
 		rcu_read_unlock();
 	}
 }
@@ -717,7 +740,7 @@ static void watchpoint_single_step_handler(unsigned long pc)
 
 	slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 
-	for (i = 0; i < core_num_reserved_brps; ++i) {
+	for (i = 0; i < core_num_wrps; ++i) {
 		rcu_read_lock();
 
 		wp = slots[i];
@@ -820,7 +843,7 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 	case ARM_ENTRY_ASYNC_WATCHPOINT:
 		WARN(1, "Asynchronous watchpoint exception taken. Debugging results may be unreliable\n");
 	case ARM_ENTRY_SYNC_WATCHPOINT:
-		watchpoint_handler(addr, regs);
+		watchpoint_handler(addr, fsr, regs);
 		break;
 	default:
 		ret = 1; /* Unhandled fault. */
@@ -834,11 +857,31 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 /*
  * One-time initialisation.
  */
-static void reset_ctrl_regs(void *info)
+static cpumask_t debug_err_mask;
+
+static int debug_reg_trap(struct pt_regs *regs, unsigned int instr)
 {
-	int i, cpu = smp_processor_id();
+	int cpu = smp_processor_id();
+
+	pr_warning("Debug register access (0x%x) caused undefined instruction on CPU %d\n",
+		   instr, cpu);
+
+	/* Set the error flag for this CPU and skip the faulting instruction. */
+	cpumask_set_cpu(cpu, &debug_err_mask);
+	instruction_pointer(regs) += 4;
+	return 0;
+}
+
+static struct undef_hook debug_reg_hook = {
+	.instr_mask	= 0x0fe80f10,
+	.instr_val	= 0x0e000e10,
+	.fn		= debug_reg_trap,
+};
+
+static void reset_ctrl_regs(void *unused)
+{
+	int i, raw_num_brps, err = 0, cpu = smp_processor_id();
 	u32 dbg_power;
-	cpumask_t *cpumask = info;
 
 	/*
 	 * v7 debug contains save and restore registers so that debug state
@@ -848,38 +891,57 @@ static void reset_ctrl_regs(void *info)
 	 * Access Register to avoid taking undefined instruction exceptions
 	 * later on.
 	 */
-	if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) {
+	switch (debug_arch) {
+	case ARM_DEBUG_ARCH_V6:
+	case ARM_DEBUG_ARCH_V6_1:
+		/* ARMv6 cores just need to reset the registers. */
+		goto reset_regs;
+	case ARM_DEBUG_ARCH_V7_ECP14:
 		/*
 		 * Ensure sticky power-down is clear (i.e. debug logic is
 		 * powered up).
 		 */
 		asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (dbg_power));
-		if ((dbg_power & 0x1) == 0) {
-			pr_warning("CPU %d debug is powered down!\n", cpu);
-			cpumask_or(cpumask, cpumask, cpumask_of(cpu));
-			return;
-		}
-
+		if ((dbg_power & 0x1) == 0)
+			err = -EPERM;
+		break;
+	case ARM_DEBUG_ARCH_V7_1:
 		/*
-		 * Unconditionally clear the lock by writing a value
-		 * other than 0xC5ACCE55 to the access register.
+		 * Ensure the OS double lock is clear.
 		 */
-		asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
-		isb();
+		asm volatile("mrc p14, 0, %0, c1, c3, 4" : "=r" (dbg_power));
+		if ((dbg_power & 0x1) == 1)
+			err = -EPERM;
+		break;
+	}
 
-		/*
-		 * Clear any configured vector-catch events before
-		 * enabling monitor mode.
-		 */
-		asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
-		isb();
+	if (err) {
+		pr_warning("CPU %d debug is powered down!\n", cpu);
+		cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu));
+		return;
 	}
 
+	/*
+	 * Unconditionally clear the lock by writing a value
+	 * other than 0xC5ACCE55 to the access register.
+	 */
+	asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
+	isb();
+
+	/*
+	 * Clear any configured vector-catch events before
+	 * enabling monitor mode.
+	 */
+	asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
+	isb();
+
+reset_regs:
 	if (enable_monitor_mode())
 		return;
 
 	/* We must also reset any reserved registers. */
-	for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) {
+	raw_num_brps = get_num_brp_resources();
+	for (i = 0; i < raw_num_brps; ++i) {
 		write_wb_reg(ARM_BASE_BCR + i, 0UL);
 		write_wb_reg(ARM_BASE_BVR + i, 0UL);
 	}
@@ -895,6 +957,7 @@ static int __cpuinit dbg_reset_notify(struct notifier_block *self,
 {
 	if (action == CPU_ONLINE)
 		smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1);
+
 	return NOTIFY_OK;
 }
 
@@ -905,7 +968,6 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = {
 static int __init arch_hw_breakpoint_init(void)
 {
 	u32 dscr;
-	cpumask_t cpumask = { CPU_BITS_NONE };
 
 	debug_arch = get_debug_arch();
 
@@ -916,28 +978,31 @@ static int __init arch_hw_breakpoint_init(void)
 
 	/* Determine how many BRPs/WRPs are available. */
 	core_num_brps = get_num_brps();
-	core_num_reserved_brps = get_num_reserved_brps();
 	core_num_wrps = get_num_wrps();
 
-	pr_info("found %d breakpoint and %d watchpoint registers.\n",
-		core_num_brps + core_num_reserved_brps, core_num_wrps);
-
-	if (core_num_reserved_brps)
-		pr_info("%d breakpoint(s) reserved for watchpoint "
-				"single-step.\n", core_num_reserved_brps);
+	/*
+	 * We need to tread carefully here because DBGSWENABLE may be
+	 * driven low on this core and there isn't an architected way to
+	 * determine that.
+	 */
+	register_undef_hook(&debug_reg_hook);
 
 	/*
 	 * Reset the breakpoint resources. We assume that a halting
 	 * debugger will leave the world in a nice state for us.
 	 */
-	on_each_cpu(reset_ctrl_regs, &cpumask, 1);
-	if (!cpumask_empty(&cpumask)) {
+	on_each_cpu(reset_ctrl_regs, NULL, 1);
+	unregister_undef_hook(&debug_reg_hook);
+	if (!cpumask_empty(&debug_err_mask)) {
 		core_num_brps = 0;
-		core_num_reserved_brps = 0;
 		core_num_wrps = 0;
 		return 0;
 	}
 
+	pr_info("found %d " "%s" "breakpoint and %d watchpoint registers.\n",
+		core_num_brps, core_has_mismatch_brps() ? "(+1 reserved) " :
+		"", core_num_wrps);
+
 	ARM_DBG_READ(c1, 0, dscr);
 	if (dscr & ARM_DSCR_HDBGEN) {
 		max_watchpoint_len = 4;

+ 0 - 3
arch/arm/kernel/irq.c

@@ -58,9 +58,6 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 #endif
 #ifdef CONFIG_SMP
 	show_ipi_list(p, prec);
-#endif
-#ifdef CONFIG_LOCAL_TIMERS
-	show_local_irqs(p, prec);
 #endif
 	seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
 	return 0;

+ 4 - 0
arch/arm/kernel/kprobes-arm.c

@@ -60,6 +60,7 @@
 
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
+#include <linux/module.h>
 
 #include "kprobes.h"
 
@@ -971,6 +972,9 @@ const union decode_item kprobe_decode_arm_table[] = {
 
 	DECODE_END
 };
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(kprobe_decode_arm_table);
+#endif
 
 static void __kprobes arm_singlestep(struct kprobe *p, struct pt_regs *regs)
 {

+ 1323 - 0
arch/arm/kernel/kprobes-test-arm.c

@@ -0,0 +1,1323 @@
+/*
+ * arch/arm/kernel/kprobes-test-arm.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "kprobes-test.h"
+
+
+#define TEST_ISA "32"
+
+#define TEST_ARM_TO_THUMB_INTERWORK_R(code1, reg, val, code2)	\
+	TESTCASE_START(code1 #reg code2)			\
+	TEST_ARG_REG(reg, val)					\
+	TEST_ARG_REG(14, 99f)					\
+	TEST_ARG_END("")					\
+	"50:	nop			\n\t"			\
+	"1:	"code1 #reg code2"	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".thumb				\n\t"			\
+	"3:	adr	lr, 2f		\n\t"			\
+	"	bx	lr		\n\t"			\
+	".arm				\n\t"			\
+	"2:	nop			\n\t"			\
+	TESTCASE_END
+
+#define TEST_ARM_TO_THUMB_INTERWORK_P(code1, reg, val, code2)	\
+	TESTCASE_START(code1 #reg code2)			\
+	TEST_ARG_PTR(reg, val)					\
+	TEST_ARG_REG(14, 99f)					\
+	TEST_ARG_MEM(15, 3f+1)					\
+	TEST_ARG_END("")					\
+	"50:	nop			\n\t"			\
+	"1:	"code1 #reg code2"	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".thumb				\n\t"			\
+	"3:	adr	lr, 2f		\n\t"			\
+	"	bx	lr		\n\t"			\
+	".arm				\n\t"			\
+	"2:	nop			\n\t"			\
+	TESTCASE_END
+
+
+void kprobe_arm_test_cases(void)
+{
+	kprobe_test_flags = 0;
+
+	TEST_GROUP("Data-processing (register), (register-shifted register), (immediate)")
+
+#define _DATA_PROCESSING_DNM(op,s,val)						\
+	TEST_RR(  op "eq" s "	r0,  r",1, VAL1,", r",2, val, "")		\
+	TEST_RR(  op "ne" s "	r1,  r",1, VAL1,", r",2, val, ", lsl #3")	\
+	TEST_RR(  op "cs" s "	r2,  r",3, VAL1,", r",2, val, ", lsr #4")	\
+	TEST_RR(  op "cc" s "	r3,  r",3, VAL1,", r",2, val, ", asr #5")	\
+	TEST_RR(  op "mi" s "	r4,  r",5, VAL1,", r",2, N(val),", asr #6")	\
+	TEST_RR(  op "pl" s "	r5,  r",5, VAL1,", r",2, val, ", ror #7")	\
+	TEST_RR(  op "vs" s "	r6,  r",7, VAL1,", r",2, val, ", rrx")		\
+	TEST_R(   op "vc" s "	r6,  r",7, VAL1,", pc, lsl #3")			\
+	TEST_R(   op "vc" s "	r6,  r",7, VAL1,", sp, lsr #4")			\
+	TEST_R(   op "vc" s "	r6,  pc, r",7, VAL1,", asr #5")			\
+	TEST_R(   op "vc" s "	r6,  sp, r",7, VAL1,", ror #6")			\
+	TEST_RRR( op "hi" s "	r8,  r",9, VAL1,", r",14,val, ", lsl r",0, 3,"")\
+	TEST_RRR( op "ls" s "	r9,  r",9, VAL1,", r",14,val, ", lsr r",7, 4,"")\
+	TEST_RRR( op "ge" s "	r10, r",11,VAL1,", r",14,val, ", asr r",7, 5,"")\
+	TEST_RRR( op "lt" s "	r11, r",11,VAL1,", r",14,N(val),", asr r",7, 6,"")\
+	TEST_RR(  op "gt" s "	r12, r13"       ", r",14,val, ", ror r",14,7,"")\
+	TEST_RR(  op "le" s "	r14, r",0, val, ", r13"       ", lsl r",14,8,"")\
+	TEST_RR(  op s "	r12, pc"        ", r",14,val, ", ror r",14,7,"")\
+	TEST_RR(  op s "	r14, r",0, val, ", pc"        ", lsl r",14,8,"")\
+	TEST_R(   op "eq" s "	r0,  r",11,VAL1,", #0xf5")			\
+	TEST_R(   op "ne" s "	r11, r",0, VAL1,", #0xf5000000")		\
+	TEST_R(   op s "	r7,  r",8, VAL2,", #0x000af000")		\
+	TEST(     op s "	r4,  pc"        ", #0x00005a00")
+
+#define DATA_PROCESSING_DNM(op,val)		\
+	_DATA_PROCESSING_DNM(op,"",val)		\
+	_DATA_PROCESSING_DNM(op,"s",val)
+
+#define DATA_PROCESSING_NM(op,val)						\
+	TEST_RR(  op "ne	r",1, VAL1,", r",2, val, "")			\
+	TEST_RR(  op "eq	r",1, VAL1,", r",2, val, ", lsl #3")		\
+	TEST_RR(  op "cc	r",3, VAL1,", r",2, val, ", lsr #4")		\
+	TEST_RR(  op "cs	r",3, VAL1,", r",2, val, ", asr #5")		\
+	TEST_RR(  op "pl	r",5, VAL1,", r",2, N(val),", asr #6")		\
+	TEST_RR(  op "mi	r",5, VAL1,", r",2, val, ", ror #7")		\
+	TEST_RR(  op "vc	r",7, VAL1,", r",2, val, ", rrx")		\
+	TEST_R (  op "vs	r",7, VAL1,", pc, lsl #3")			\
+	TEST_R (  op "vs	r",7, VAL1,", sp, lsr #4")			\
+	TEST_R(   op "vs	pc, r",7, VAL1,", asr #5")			\
+	TEST_R(   op "vs	sp, r",7, VAL1,", ror #6")			\
+	TEST_RRR( op "ls	r",9, VAL1,", r",14,val, ", lsl r",0, 3,"")	\
+	TEST_RRR( op "hi	r",9, VAL1,", r",14,val, ", lsr r",7, 4,"")	\
+	TEST_RRR( op "lt	r",11,VAL1,", r",14,val, ", asr r",7, 5,"")	\
+	TEST_RRR( op "ge	r",11,VAL1,", r",14,N(val),", asr r",7, 6,"")	\
+	TEST_RR(  op "le	r13"       ", r",14,val, ", ror r",14,7,"")	\
+	TEST_RR(  op "gt	r",0, val, ", r13"       ", lsl r",14,8,"")	\
+	TEST_RR(  op "	pc"        ", r",14,val, ", ror r",14,7,"")		\
+	TEST_RR(  op "	r",0, val, ", pc"        ", lsl r",14,8,"")		\
+	TEST_R(   op "eq	r",11,VAL1,", #0xf5")				\
+	TEST_R(   op "ne	r",0, VAL1,", #0xf5000000")			\
+	TEST_R(   op "	r",8, VAL2,", #0x000af000")
+
+#define _DATA_PROCESSING_DM(op,s,val)					\
+	TEST_R(   op "eq" s "	r0,  r",1, val, "")			\
+	TEST_R(   op "ne" s "	r1,  r",1, val, ", lsl #3")		\
+	TEST_R(   op "cs" s "	r2,  r",3, val, ", lsr #4")		\
+	TEST_R(   op "cc" s "	r3,  r",3, val, ", asr #5")		\
+	TEST_R(   op "mi" s "	r4,  r",5, N(val),", asr #6")		\
+	TEST_R(   op "pl" s "	r5,  r",5, val, ", ror #7")		\
+	TEST_R(   op "vs" s "	r6,  r",10,val, ", rrx")		\
+	TEST(     op "vs" s "	r7,  pc, lsl #3")			\
+	TEST(     op "vs" s "	r7,  sp, lsr #4")			\
+	TEST_RR(  op "vc" s "	r8,  r",7, val, ", lsl r",0, 3,"")	\
+	TEST_RR(  op "hi" s "	r9,  r",9, val, ", lsr r",7, 4,"")	\
+	TEST_RR(  op "ls" s "	r10, r",9, val, ", asr r",7, 5,"")	\
+	TEST_RR(  op "ge" s "	r11, r",11,N(val),", asr r",7, 6,"")	\
+	TEST_RR(  op "lt" s "	r12, r",11,val, ", ror r",14,7,"")	\
+	TEST_R(   op "gt" s "	r14, r13"       ", lsl r",14,8,"")	\
+	TEST_R(   op "le" s "	r14, pc"        ", lsl r",14,8,"")	\
+	TEST(     op "eq" s "	r0,  #0xf5")				\
+	TEST(     op "ne" s "	r11, #0xf5000000")			\
+	TEST(     op s "	r7,  #0x000af000")			\
+	TEST(     op s "	r4,  #0x00005a00")
+
+#define DATA_PROCESSING_DM(op,val)		\
+	_DATA_PROCESSING_DM(op,"",val)		\
+	_DATA_PROCESSING_DM(op,"s",val)
+
+	DATA_PROCESSING_DNM("and",0xf00f00ff)
+	DATA_PROCESSING_DNM("eor",0xf00f00ff)
+	DATA_PROCESSING_DNM("sub",VAL2)
+	DATA_PROCESSING_DNM("rsb",VAL2)
+	DATA_PROCESSING_DNM("add",VAL2)
+	DATA_PROCESSING_DNM("adc",VAL2)
+	DATA_PROCESSING_DNM("sbc",VAL2)
+	DATA_PROCESSING_DNM("rsc",VAL2)
+	DATA_PROCESSING_NM("tst",0xf00f00ff)
+	DATA_PROCESSING_NM("teq",0xf00f00ff)
+	DATA_PROCESSING_NM("cmp",VAL2)
+	DATA_PROCESSING_NM("cmn",VAL2)
+	DATA_PROCESSING_DNM("orr",0xf00f00ff)
+	DATA_PROCESSING_DM("mov",VAL2)
+	DATA_PROCESSING_DNM("bic",0xf00f00ff)
+	DATA_PROCESSING_DM("mvn",VAL2)
+
+	TEST("mov	ip, sp") /* This has special case emulation code */
+
+	TEST_SUPPORTED("mov	pc, #0x1000");
+	TEST_SUPPORTED("mov	sp, #0x1000");
+	TEST_SUPPORTED("cmp	pc, #0x1000");
+	TEST_SUPPORTED("cmp	sp, #0x1000");
+
+	/* Data-processing with PC as shift*/
+	TEST_UNSUPPORTED(".word 0xe15c0f1e	@ cmp	r12, r14, asl pc")
+	TEST_UNSUPPORTED(".word 0xe1a0cf1e	@ mov	r12, r14, asl pc")
+	TEST_UNSUPPORTED(".word 0xe08caf1e	@ add	r10, r12, r14, asl pc")
+
+	/* Data-processing with PC as shift*/
+	TEST_UNSUPPORTED("movs	pc, r1")
+	TEST_UNSUPPORTED("movs	pc, r1, lsl r2")
+	TEST_UNSUPPORTED("movs	pc, #0x10000")
+	TEST_UNSUPPORTED("adds	pc, lr, r1")
+	TEST_UNSUPPORTED("adds	pc, lr, r1, lsl r2")
+	TEST_UNSUPPORTED("adds	pc, lr, #4")
+
+	/* Data-processing with SP as target */
+	TEST("add	sp, sp, #16")
+	TEST("sub	sp, sp, #8")
+	TEST("bic	sp, sp, #0x20")
+	TEST("orr	sp, sp, #0x20")
+	TEST_PR( "add	sp, r",10,0,", r",11,4,"")
+	TEST_PRR("add	sp, r",10,0,", r",11,4,", asl r",12,1,"")
+	TEST_P(  "mov	sp, r",10,0,"")
+	TEST_PR( "mov	sp, r",10,0,", asl r",12,0,"")
+
+	/* Data-processing with PC as target */
+	TEST_BF(   "add	pc, pc, #2f-1b-8")
+	TEST_BF_R ("add	pc, pc, r",14,2f-1f-8,"")
+	TEST_BF_R ("add	pc, r",14,2f-1f-8,", pc")
+	TEST_BF_R ("mov	pc, r",0,2f,"")
+	TEST_BF_RR("mov	pc, r",0,2f,", asl r",1,0,"")
+	TEST_BB(   "sub	pc, pc, #1b-2b+8")
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_BB(   "sub	pc, pc, #1b-2b+8-2") /* UNPREDICTABLE before ARMv6 */
+#endif
+	TEST_BB_R( "sub	pc, pc, r",14, 1f-2f+8,"")
+	TEST_BB_R( "rsb	pc, r",14,1f-2f+8,", pc")
+	TEST_RR(   "add	pc, pc, r",10,-2,", asl r",11,1,"")
+#ifdef CONFIG_THUMB2_KERNEL
+	TEST_ARM_TO_THUMB_INTERWORK_R("add	pc, pc, r",0,3f-1f-8+1,"")
+	TEST_ARM_TO_THUMB_INTERWORK_R("sub	pc, r",0,3f+8+1,", #8")
+#endif
+	TEST_GROUP("Miscellaneous instructions")
+
+	TEST("mrs	r0, cpsr")
+	TEST("mrspl	r7, cpsr")
+	TEST("mrs	r14, cpsr")
+	TEST_UNSUPPORTED(".word 0xe10ff000	@ mrs r15, cpsr")
+	TEST_UNSUPPORTED("mrs	r0, spsr")
+	TEST_UNSUPPORTED("mrs	lr, spsr")
+
+	TEST_UNSUPPORTED("msr	cpsr, r0")
+	TEST_UNSUPPORTED("msr	cpsr_f, lr")
+	TEST_UNSUPPORTED("msr	spsr, r0")
+
+	TEST_BF_R("bx	r",0,2f,"")
+	TEST_BB_R("bx	r",7,2f,"")
+	TEST_BF_R("bxeq	r",14,2f,"")
+
+	TEST_R("clz	r0, r",0, 0x0,"")
+	TEST_R("clzeq	r7, r",14,0x1,"")
+	TEST_R("clz	lr, r",7, 0xffffffff,"")
+	TEST(  "clz	r4, sp")
+	TEST_UNSUPPORTED(".word 0x016fff10	@ clz pc, r0")
+	TEST_UNSUPPORTED(".word 0x016f0f1f	@ clz r0, pc")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("bxj	r0")
+#endif
+
+	TEST_BF_R("blx	r",0,2f,"")
+	TEST_BB_R("blx	r",7,2f,"")
+	TEST_BF_R("blxeq	r",14,2f,"")
+	TEST_UNSUPPORTED(".word 0x0120003f	@ blx pc")
+
+	TEST_RR(   "qadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qaddvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qadd	lr, r",9, VAL2,", r13")
+	TEST_RR(   "qsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qsubvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qsub	lr, r",9, VAL2,", r13")
+	TEST_RR(   "qdadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qdaddvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qdadd	lr, r",9, VAL2,", r13")
+	TEST_RR(   "qdsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qdsubvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qdsub	lr, r",9, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe101f050	@ qadd pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe121f050	@ qsub pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe141f050	@ qdadd pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe161f050	@ qdsub pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe16f2050	@ qdsub r2, r0, pc")
+	TEST_UNSUPPORTED(".word 0xe161205f	@ qdsub r2, pc, r1")
+
+	TEST_UNSUPPORTED("bkpt	0xffff")
+	TEST_UNSUPPORTED("bkpt	0x0000")
+
+	TEST_UNSUPPORTED(".word 0xe1600070 @ smc #0")
+
+	TEST_GROUP("Halfword multiply and multiply-accumulate")
+
+	TEST_RRR(    "smlabb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlabbge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlabb	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f3281 @ smlabb pc, r1, r2, r3")
+	TEST_RRR(    "smlatb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlatbge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlatb	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f32a1 @ smlatb pc, r1, r2, r3")
+	TEST_RRR(    "smlabt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlabtge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlabt	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f32c1 @ smlabt pc, r1, r2, r3")
+	TEST_RRR(    "smlatt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlattge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlatt	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f32e1 @ smlatt pc, r1, r2, r3")
+
+	TEST_RRR(    "smlawb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlawbge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlawb	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f3281 @ smlawb pc, r1, r2, r3")
+	TEST_RRR(    "smlawt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlawtge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlawt	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f32c1 @ smlawt pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe12032cf @ smlawt r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe1203fc1 @ smlawt r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe120f2c1 @ smlawt r0, r1, r2, pc")
+
+	TEST_RR(    "smulwb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwbge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulwb	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f02a1 @ smulwb pc, r1, r2")
+	TEST_RR(    "smulwt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwtge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulwt	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f02e1 @ smulwt pc, r1, r2")
+
+	TEST_RRRR(  "smlalbb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalbble	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlalbb	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f1382 @ smlalbb pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe141f382 @ smlalbb r1, pc, r2, r3")
+	TEST_RRRR(  "smlaltb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlaltble	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlaltb	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f13a2 @ smlaltb pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe141f3a2 @ smlaltb r1, pc, r2, r3")
+	TEST_RRRR(  "smlalbt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalbtle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlalbt	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f13c2 @ smlalbt pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe141f3c2 @ smlalbt r1, pc, r2, r3")
+	TEST_RRRR(  "smlaltt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalttle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlaltt	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f13e2 @ smlalbb pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe140f3e2 @ smlalbb r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe14013ef @ smlalbb r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe1401fe2 @ smlalbb r0, r1, r2, pc")
+
+	TEST_RR(    "smulbb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbbge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulbb	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f0281 @ smulbb pc, r1, r2")
+	TEST_RR(    "smultb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smultbge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smultb	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f02a1 @ smultb pc, r1, r2")
+	TEST_RR(    "smulbt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbtge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulbt	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f02c1 @ smultb pc, r1, r2")
+	TEST_RR(    "smultt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulttge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smultt	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f02e1 @ smultt pc, r1, r2")
+	TEST_UNSUPPORTED(".word 0xe16002ef @ smultt r0, pc, r2")
+	TEST_UNSUPPORTED(".word 0xe1600fe1 @ smultt r0, r1, pc")
+
+	TEST_GROUP("Multiply and multiply-accumulate")
+
+	TEST_RR(    "mul	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "mulls	r7, r",8, VAL2,", r",9, VAL2,"")
+	TEST_R(     "mul	lr, r",4, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe00f0291 @ mul pc, r1, r2")
+	TEST_UNSUPPORTED(".word 0xe000029f @ mul r0, pc, r2")
+	TEST_UNSUPPORTED(".word 0xe0000f91 @ mul r0, r1, pc")
+	TEST_RR(    "muls	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "mullss	r7, r",8, VAL2,", r",9, VAL2,"")
+	TEST_R(     "muls	lr, r",4, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe01f0291 @ muls pc, r1, r2")
+
+	TEST_RRR(    "mla	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "mlahi	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "mla	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe02f3291 @ mla pc, r1, r2, r3")
+	TEST_RRR(    "mlas	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "mlahis	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "mlas	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe03f3291 @ mlas pc, r1, r2, r3")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_RR(  "umaal	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "umaalls	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "umaal	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe041f392 @ umaal pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe04f0392 @ umaal r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0500090 @ undef")
+	TEST_UNSUPPORTED(".word 0xe05fff9f @ undef")
+
+	TEST_RRR(  "mls		r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(  "mlshi	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(   "mls		lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe06f3291 @ mls pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe060329f @ mls r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0603f91 @ mls r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe060f291 @ mls r0, r1, r2, pc")
+#endif
+
+	TEST_UNSUPPORTED(".word 0xe0700090 @ undef")
+	TEST_UNSUPPORTED(".word 0xe07fff9f @ undef")
+
+	TEST_RR(  "umull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "umullls	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "umull	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe081f392 @ umull pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe08f1392 @ umull r1, pc, r2, r3")
+	TEST_RR(  "umulls	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "umulllss	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "umulls	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe091f392 @ umulls pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe09f1392 @ umulls r1, pc, r2, r3")
+
+	TEST_RRRR(  "umlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "umlalle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "umlal	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0af1392 @ umlal pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0a1f392 @ umlal r1, pc, r2, r3")
+	TEST_RRRR(  "umlals	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "umlalles	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "umlals	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0bf1392 @ umlals pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0b1f392 @ umlals r1, pc, r2, r3")
+
+	TEST_RR(  "smull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "smullls	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "smull	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe0c1f392 @ smull pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0cf1392 @ smull r1, pc, r2, r3")
+	TEST_RR(  "smulls	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "smulllss	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "smulls	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe0d1f392 @ smulls pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0df1392 @ smulls r1, pc, r2, r3")
+
+	TEST_RRRR(  "smlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlal	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0ef1392 @ smlal pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0e1f392 @ smlal r1, pc, r2, r3")
+	TEST_RRRR(  "smlals	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalles	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlals	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0ff1392 @ smlals pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0f0f392 @ smlals r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0f0139f @ smlals r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe0f01f92 @ smlals r0, r1, r2, pc")
+
+	TEST_GROUP("Synchronization primitives")
+
+	/*
+	 * Use hard coded constants for SWP instructions to avoid warnings
+	 * about deprecated instructions.
+	 */
+	TEST_RP( ".word 0xe108e097 @ swp	lr, r",7,VAL2,", [r",8,0,"]")
+	TEST_R(  ".word 0x610d0091 @ swpvs	r0, r",1,VAL1,", [sp]")
+	TEST_RP( ".word 0xe10cd09e @ swp	sp, r",14,VAL2,", [r",12,13*4,"]")
+	TEST_UNSUPPORTED(".word 0xe102f091 @ swp pc, r1, [r2]")
+	TEST_UNSUPPORTED(".word 0xe102009f @ swp r0, pc, [r2]")
+	TEST_UNSUPPORTED(".word 0xe10f0091 @ swp r0, r1, [pc]")
+	TEST_RP( ".word 0xe148e097 @ swpb	lr, r",7,VAL2,", [r",8,0,"]")
+	TEST_R(  ".word 0x614d0091 @ swpvsb	r0, r",1,VAL1,", [sp]")
+	TEST_UNSUPPORTED(".word 0xe142f091 @ swpb pc, r1, [r2]")
+
+	TEST_UNSUPPORTED(".word	0xe1100090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1200090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1300090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1500090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1600090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1700090") /* Unallocated space */
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("ldrex	r2, [sp]")
+	TEST_UNSUPPORTED("strexd	r0, r2, r3, [sp]")
+	TEST_UNSUPPORTED("ldrexd	r2, r3, [sp]")
+	TEST_UNSUPPORTED("strexb	r0, r2, [sp]")
+	TEST_UNSUPPORTED("ldrexb	r2, [sp]")
+	TEST_UNSUPPORTED("strexh	r0, r2, [sp]")
+	TEST_UNSUPPORTED("ldrexh	r2, [sp]")
+#endif
+	TEST_GROUP("Extra load/store instructions")
+
+	TEST_RPR(  "strh	r",0, VAL1,", [r",1, 48,", -r",2, 24,"]")
+	TEST_RPR(  "streqh	r",14,VAL2,", [r",13,0, ", r",12, 48,"]")
+	TEST_RPR(  "strh	r",1, VAL1,", [r",2, 24,", r",3,  48,"]!")
+	TEST_RPR(  "strneh	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
+	TEST_RPR(  "strh	r",2, VAL1,", [r",3, 24,"], r",4, 48,"")
+	TEST_RPR(  "strh	r",10,VAL2,", [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1afc0ba	@ strh r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe089f0bb	@ strh pc, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe089a0bf	@ strh r10, [r9], pc")
+
+	TEST_PR(   "ldrh	r0, [r",0,  48,", -r",2, 24,"]")
+	TEST_PR(   "ldrcsh	r14, [r",13,0, ", r",12, 48,"]")
+	TEST_PR(   "ldrh	r1, [r",2,  24,", r",3,  48,"]!")
+	TEST_PR(   "ldrcch	r12, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrh	r2, [r",3,  24,"], r",4, 48,"")
+	TEST_PR(   "ldrh	r10, [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1bfc0ba	@ ldrh r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe099f0bb	@ ldrh pc, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe099a0bf	@ ldrh r10, [r9], pc")
+
+	TEST_RP(   "strh	r",0, VAL1,", [r",1, 24,", #-2]")
+	TEST_RP(   "strmih	r",14,VAL2,", [r",13,0, ", #2]")
+	TEST_RP(   "strh	r",1, VAL1,", [r",2, 24,", #4]!")
+	TEST_RP(   "strplh	r",12,VAL2,", [r",11,24,", #-4]!")
+	TEST_RP(   "strh	r",2, VAL1,", [r",3, 24,"], #48")
+	TEST_RP(   "strh	r",10,VAL2,", [r",9, 64,"], #-48")
+	TEST_UNSUPPORTED(".word 0xe1efc3b0	@ strh r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0c9f3b0	@ strh pc, [r9], #48")
+
+	TEST_P(	   "ldrh	r0, [r",0,  24,", #-2]")
+	TEST_P(	   "ldrvsh	r14, [r",13,0, ", #2]")
+	TEST_P(	   "ldrh	r1, [r",2,  24,", #4]!")
+	TEST_P(	   "ldrvch	r12, [r",11,24,", #-4]!")
+	TEST_P(	   "ldrh	r2, [r",3,  24,"], #48")
+	TEST_P(	   "ldrh	r10, [r",9, 64,"], #-48")
+	TEST(      "ldrh	r0, [pc, #0]")
+	TEST_UNSUPPORTED(".word 0xe1ffc3b0	@ ldrh r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0d9f3b0	@ ldrh pc, [r9], #48")
+
+	TEST_PR(   "ldrsb	r0, [r",0,  48,", -r",2, 24,"]")
+	TEST_PR(   "ldrhisb	r14, [r",13,0,", r",12,  48,"]")
+	TEST_PR(   "ldrsb	r1, [r",2,  24,", r",3,  48,"]!")
+	TEST_PR(   "ldrlssb	r12, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrsb	r2, [r",3,  24,"], r",4, 48,"")
+	TEST_PR(   "ldrsb	r10, [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1bfc0da	@ ldrsb r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe099f0db	@ ldrsb pc, [r9], r11")
+
+	TEST_P(	   "ldrsb	r0, [r",0,  24,", #-1]")
+	TEST_P(	   "ldrgesb	r14, [r",13,0, ", #1]")
+	TEST_P(	   "ldrsb	r1, [r",2,  24,", #4]!")
+	TEST_P(	   "ldrltsb	r12, [r",11,24,", #-4]!")
+	TEST_P(	   "ldrsb	r2, [r",3,  24,"], #48")
+	TEST_P(	   "ldrsb	r10, [r",9, 64,"], #-48")
+	TEST(      "ldrsb	r0, [pc, #0]")
+	TEST_UNSUPPORTED(".word 0xe1ffc3d0	@ ldrsb r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0d9f3d0	@ ldrsb pc, [r9], #48")
+
+	TEST_PR(   "ldrsh	r0, [r",0,  48,", -r",2, 24,"]")
+	TEST_PR(   "ldrgtsh	r14, [r",13,0, ", r",12, 48,"]")
+	TEST_PR(   "ldrsh	r1, [r",2,  24,", r",3,  48,"]!")
+	TEST_PR(   "ldrlesh	r12, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrsh	r2, [r",3,  24,"], r",4, 48,"")
+	TEST_PR(   "ldrsh	r10, [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1bfc0fa	@ ldrsh r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe099f0fb	@ ldrsh pc, [r9], r11")
+
+	TEST_P(	   "ldrsh	r0, [r",0,  24,", #-1]")
+	TEST_P(	   "ldreqsh	r14, [r",13,0 ,", #1]")
+	TEST_P(	   "ldrsh	r1, [r",2,  24,", #4]!")
+	TEST_P(	   "ldrnesh	r12, [r",11,24,", #-4]!")
+	TEST_P(	   "ldrsh	r2, [r",3,  24,"], #48")
+	TEST_P(	   "ldrsh	r10, [r",9, 64,"], #-48")
+	TEST(      "ldrsh	r0, [pc, #0]")
+	TEST_UNSUPPORTED(".word 0xe1ffc3f0	@ ldrsh r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0d9f3f0	@ ldrsh pc, [r9], #48")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_UNSUPPORTED("strht	r1, [r2], r3")
+	TEST_UNSUPPORTED("ldrht	r1, [r2], r3")
+	TEST_UNSUPPORTED("strht	r1, [r2], #48")
+	TEST_UNSUPPORTED("ldrht	r1, [r2], #48")
+	TEST_UNSUPPORTED("ldrsbt	r1, [r2], r3")
+	TEST_UNSUPPORTED("ldrsbt	r1, [r2], #48")
+	TEST_UNSUPPORTED("ldrsht	r1, [r2], r3")
+	TEST_UNSUPPORTED("ldrsht	r1, [r2], #48")
+#endif
+
+	TEST_RPR(  "strd	r",0, VAL1,", [r",1, 48,", -r",2,24,"]")
+	TEST_RPR(  "strccd	r",8, VAL2,", [r",13,0, ", r",12,48,"]")
+	TEST_RPR(  "strd	r",4, VAL1,", [r",2, 24,", r",3, 48,"]!")
+	TEST_RPR(  "strcsd	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
+	TEST_RPR(  "strd	r",2, VAL1,", [r",3, 24,"], r",4,48,"")
+	TEST_RPR(  "strd	r",10,VAL2,", [r",9, 48,"], -r",7,24,"")
+	TEST_UNSUPPORTED(".word 0xe1afc0fa	@ strd r12, [pc, r10]!")
+
+	TEST_PR(   "ldrd	r0, [r",0, 48,", -r",2,24,"]")
+	TEST_PR(   "ldrmid	r8, [r",13,0, ", r",12,48,"]")
+	TEST_PR(   "ldrd	r4, [r",2, 24,", r",3, 48,"]!")
+	TEST_PR(   "ldrpld	r6, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrd	r2, [r",5, 24,"], r",4,48,"")
+	TEST_PR(   "ldrd	r10, [r",9,48,"], -r",7,24,"")
+	TEST_UNSUPPORTED(".word 0xe1afc0da	@ ldrd r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe089f0db	@ ldrd pc, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe089e0db	@ ldrd lr, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe089c0df	@ ldrd r12, [r9], pc")
+
+	TEST_RP(   "strd	r",0, VAL1,", [r",1, 24,", #-8]")
+	TEST_RP(   "strvsd	r",8, VAL2,", [r",13,0, ", #8]")
+	TEST_RP(   "strd	r",4, VAL1,", [r",2, 24,", #16]!")
+	TEST_RP(   "strvcd	r",12,VAL2,", [r",11,24,", #-16]!")
+	TEST_RP(   "strd	r",2, VAL1,", [r",4, 24,"], #48")
+	TEST_RP(   "strd	r",10,VAL2,", [r",9, 64,"], #-48")
+	TEST_UNSUPPORTED(".word 0xe1efc3f0	@ strd r12, [pc, #48]!")
+
+	TEST_P(	   "ldrd	r0, [r",0, 24,", #-8]")
+	TEST_P(	   "ldrhid	r8, [r",13,0, ", #8]")
+	TEST_P(	   "ldrd	r4, [r",2, 24,", #16]!")
+	TEST_P(	   "ldrlsd	r6, [r",11,24,", #-16]!")
+	TEST_P(	   "ldrd	r2, [r",5, 24,"], #48")
+	TEST_P(	   "ldrd	r10, [r",9,6,"], #-48")
+	TEST_UNSUPPORTED(".word 0xe1efc3d0	@ ldrd r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0c9f3d0	@ ldrd pc, [r9], #48")
+	TEST_UNSUPPORTED(".word 0xe0c9e3d0	@ ldrd lr, [r9], #48")
+
+	TEST_GROUP("Miscellaneous")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST("movw	r0, #0")
+	TEST("movw	r0, #0xffff")
+	TEST("movw	lr, #0xffff")
+	TEST_UNSUPPORTED(".word 0xe300f000	@ movw pc, #0")
+	TEST_R("movt	r",0, VAL1,", #0")
+	TEST_R("movt	r",0, VAL2,", #0xffff")
+	TEST_R("movt	r",14,VAL1,", #0xffff")
+	TEST_UNSUPPORTED(".word 0xe340f000	@ movt pc, #0")
+#endif
+
+	TEST_UNSUPPORTED("msr	cpsr, 0x13")
+	TEST_UNSUPPORTED("msr	cpsr_f, 0xf0000000")
+	TEST_UNSUPPORTED("msr	spsr, 0x13")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_SUPPORTED("yield")
+	TEST("sev")
+	TEST("nop")
+	TEST("wfi")
+	TEST_SUPPORTED("wfe")
+	TEST_UNSUPPORTED("dbg #0")
+#endif
+
+	TEST_GROUP("Load/store word and unsigned byte")
+
+#define LOAD_STORE(byte)							\
+	TEST_RP( "str"byte"	r",0, VAL1,", [r",1, 24,", #-2]")		\
+	TEST_RP( "str"byte"	r",14,VAL2,", [r",13,0, ", #2]")		\
+	TEST_RP( "str"byte"	r",1, VAL1,", [r",2, 24,", #4]!")		\
+	TEST_RP( "str"byte"	r",12,VAL2,", [r",11,24,", #-4]!")		\
+	TEST_RP( "str"byte"	r",2, VAL1,", [r",3, 24,"], #48")		\
+	TEST_RP( "str"byte"	r",10,VAL2,", [r",9, 64,"], #-48")		\
+	TEST_RPR("str"byte"	r",0, VAL1,", [r",1, 48,", -r",2, 24,"]")	\
+	TEST_RPR("str"byte"	r",14,VAL2,", [r",13,0, ", r",12, 48,"]")	\
+	TEST_RPR("str"byte"	r",1, VAL1,", [r",2, 24,", r",3,  48,"]!")	\
+	TEST_RPR("str"byte"	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")	\
+	TEST_RPR("str"byte"	r",2, VAL1,", [r",3, 24,"], r",4, 48,"")	\
+	TEST_RPR("str"byte"	r",10,VAL2,", [r",9, 48,"], -r",11,24,"")	\
+	TEST_RPR("str"byte"	r",0, VAL1,", [r",1, 24,", r",2,  32,", asl #1]")\
+	TEST_RPR("str"byte"	r",14,VAL2,", [r",13,0, ", r",12, 32,", lsr #2]")\
+	TEST_RPR("str"byte"	r",1, VAL1,", [r",2, 24,", r",3,  32,", asr #3]!")\
+	TEST_RPR("str"byte"	r",12,VAL2,", [r",11,24,", r",10, 4,", ror #31]!")\
+	TEST_P(  "ldr"byte"	r0, [r",0,  24,", #-2]")			\
+	TEST_P(  "ldr"byte"	r14, [r",13,0, ", #2]")				\
+	TEST_P(  "ldr"byte"	r1, [r",2,  24,", #4]!")			\
+	TEST_P(  "ldr"byte"	r12, [r",11,24,", #-4]!")			\
+	TEST_P(  "ldr"byte"	r2, [r",3,  24,"], #48")			\
+	TEST_P(  "ldr"byte"	r10, [r",9, 64,"], #-48")			\
+	TEST_PR( "ldr"byte"	r0, [r",0,  48,", -r",2, 24,"]")		\
+	TEST_PR( "ldr"byte"	r14, [r",13,0, ", r",12, 48,"]")		\
+	TEST_PR( "ldr"byte"	r1, [r",2,  24,", r",3, 48,"]!")		\
+	TEST_PR( "ldr"byte"	r12, [r",11,48,", -r",10,24,"]!")		\
+	TEST_PR( "ldr"byte"	r2, [r",3,  24,"], r",4, 48,"")			\
+	TEST_PR( "ldr"byte"	r10, [r",9, 48,"], -r",11,24,"")		\
+	TEST_PR( "ldr"byte"	r0, [r",0,  24,", r",2,  32,", asl #1]")	\
+	TEST_PR( "ldr"byte"	r14, [r",13,0, ", r",12, 32,", lsr #2]")	\
+	TEST_PR( "ldr"byte"	r1, [r",2,  24,", r",3,  32,", asr #3]!")	\
+	TEST_PR( "ldr"byte"	r12, [r",11,24,", r",10, 4,", ror #31]!")	\
+	TEST(    "ldr"byte"	r0, [pc, #0]")					\
+	TEST_R(  "ldr"byte"	r12, [pc, r",14,0,"]")
+
+	LOAD_STORE("")
+	TEST_P(   "str	pc, [r",0,0,", #15*4]")
+	TEST_R(   "str	pc, [sp, r",2,15*4,"]")
+	TEST_BF(  "ldr	pc, [sp, #15*4]")
+	TEST_BF_R("ldr	pc, [sp, r",2,15*4,"]")
+
+	TEST_P(   "str	sp, [r",0,0,", #13*4]")
+	TEST_R(   "str	sp, [sp, r",2,13*4,"]")
+	TEST_BF(  "ldr	sp, [sp, #13*4]")
+	TEST_BF_R("ldr	sp, [sp, r",2,13*4,"]")
+
+#ifdef CONFIG_THUMB2_KERNEL
+	TEST_ARM_TO_THUMB_INTERWORK_P("ldr	pc, [r",0,0,", #15*4]")
+#endif
+	TEST_UNSUPPORTED(".word 0xe5af6008	@ str r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7af6008	@ str r6, [pc, r8]!")
+	TEST_UNSUPPORTED(".word 0xe5bf6008	@ ldr r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7bf6008	@ ldr r6, [pc, r8]!")
+	TEST_UNSUPPORTED(".word 0xe788600f	@ str r6, [r8, pc]")
+	TEST_UNSUPPORTED(".word 0xe798600f	@ ldr r6, [r8, pc]")
+
+	LOAD_STORE("b")
+	TEST_UNSUPPORTED(".word 0xe5f7f008	@ ldrb pc, [r7, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7f7f008	@ ldrb pc, [r7, r8]!")
+	TEST_UNSUPPORTED(".word 0xe5ef6008	@ strb r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7ef6008	@ strb r6, [pc, r3]!")
+	TEST_UNSUPPORTED(".word 0xe5ff6008	@ ldrb r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7ff6008	@ ldrb r6, [pc, r3]!")
+
+	TEST_UNSUPPORTED("ldrt	r0, [r1], #4")
+	TEST_UNSUPPORTED("ldrt	r1, [r2], r3")
+	TEST_UNSUPPORTED("strt	r2, [r3], #4")
+	TEST_UNSUPPORTED("strt	r3, [r4], r5")
+	TEST_UNSUPPORTED("ldrbt	r4, [r5], #4")
+	TEST_UNSUPPORTED("ldrbt	r5, [r6], r7")
+	TEST_UNSUPPORTED("strbt	r6, [r7], #4")
+	TEST_UNSUPPORTED("strbt	r7, [r8], r9")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_GROUP("Parallel addition and subtraction, signed")
+
+	TEST_UNSUPPORTED(".word 0xe6000010") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe60fffff") /* Unallocated space */
+
+	TEST_RR(    "sadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff1a	@ sadd16	pc, r12, r10")
+	TEST_RR(    "sasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff3a	@ sasx	pc, r12, r10")
+	TEST_RR(    "ssax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "ssax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff5a	@ ssax	pc, r12, r10")
+	TEST_RR(    "ssub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "ssub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff7a	@ ssub16	pc, r12, r10")
+	TEST_RR(    "sadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff9a	@ sadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe61000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe61fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe61000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe61fffdf") /* Unallocated space */
+	TEST_RR(    "ssub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "ssub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cfffa	@ ssub8	pc, r12, r10")
+
+	TEST_RR(    "qadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff1a	@ qadd16	pc, r12, r10")
+	TEST_RR(    "qasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff3a	@ qasx	pc, r12, r10")
+	TEST_RR(    "qsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff5a	@ qsax	pc, r12, r10")
+	TEST_RR(    "qsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff7a	@ qsub16	pc, r12, r10")
+	TEST_RR(    "qadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff9a	@ qadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe62000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe62fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe62000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe62fffdf") /* Unallocated space */
+	TEST_RR(    "qsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cfffa	@ qsub8	pc, r12, r10")
+
+	TEST_RR(    "shadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff1a	@ shadd16	pc, r12, r10")
+	TEST_RR(    "shasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff3a	@ shasx	pc, r12, r10")
+	TEST_RR(    "shsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff5a	@ shsax	pc, r12, r10")
+	TEST_RR(    "shsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff7a	@ shsub16	pc, r12, r10")
+	TEST_RR(    "shadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff9a	@ shadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe63000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe63fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe63000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe63fffdf") /* Unallocated space */
+	TEST_RR(    "shsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cfffa	@ shsub8	pc, r12, r10")
+
+	TEST_GROUP("Parallel addition and subtraction, unsigned")
+
+	TEST_UNSUPPORTED(".word 0xe6400010") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe64fffff") /* Unallocated space */
+
+	TEST_RR(    "uadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff1a	@ uadd16	pc, r12, r10")
+	TEST_RR(    "uasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff3a	@ uasx	pc, r12, r10")
+	TEST_RR(    "usax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "usax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff5a	@ usax	pc, r12, r10")
+	TEST_RR(    "usub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "usub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff7a	@ usub16	pc, r12, r10")
+	TEST_RR(    "uadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff9a	@ uadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe65000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe65fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe65000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe65fffdf") /* Unallocated space */
+	TEST_RR(    "usub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "usub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cfffa	@ usub8	pc, r12, r10")
+
+	TEST_RR(    "uqadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff1a	@ uqadd16	pc, r12, r10")
+	TEST_RR(    "uqasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff3a	@ uqasx	pc, r12, r10")
+	TEST_RR(    "uqsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff5a	@ uqsax	pc, r12, r10")
+	TEST_RR(    "uqsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff7a	@ uqsub16	pc, r12, r10")
+	TEST_RR(    "uqadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff9a	@ uqadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe66000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe66fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe66000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe66fffdf") /* Unallocated space */
+	TEST_RR(    "uqsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cfffa	@ uqsub8	pc, r12, r10")
+
+	TEST_RR(    "uhadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff1a	@ uhadd16	pc, r12, r10")
+	TEST_RR(    "uhasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff3a	@ uhasx	pc, r12, r10")
+	TEST_RR(    "uhsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff5a	@ uhsax	pc, r12, r10")
+	TEST_RR(    "uhsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff7a	@ uhsub16	pc, r12, r10")
+	TEST_RR(    "uhadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff9a	@ uhadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe67000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe67fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe67000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe67fffdf") /* Unallocated space */
+	TEST_RR(    "uhsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cfffa	@ uhsub8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe67feffa	@ uhsub8	r14, pc, r10")
+	TEST_UNSUPPORTED(".word 0xe67cefff	@ uhsub8	r14, r12, pc")
+#endif /* __LINUX_ARM_ARCH__ >= 7 */
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_GROUP("Packing, unpacking, saturation, and reversal")
+
+	TEST_RR(    "pkhbt	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "pkhbt	r14,r",12, HH1,", r",10,HH2,", lsl #2")
+	TEST_UNSUPPORTED(".word 0xe68cf11a	@ pkhbt	pc, r12, r10, lsl #2")
+	TEST_RR(    "pkhtb	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "pkhtb	r14,r",12, HH1,", r",10,HH2,", asr #2")
+	TEST_UNSUPPORTED(".word 0xe68cf15a	@ pkhtb	pc, r12, r10, asr #2")
+	TEST_UNSUPPORTED(".word 0xe68fe15a	@ pkhtb	r14, pc, r10, asr #2")
+	TEST_UNSUPPORTED(".word 0xe68ce15f	@ pkhtb	r14, r12, pc, asr #2")
+	TEST_UNSUPPORTED(".word 0xe6900010") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe69fffdf") /* Unallocated space */
+
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".word 0xe6b7f01c	@ ssat	pc, #24, r12")
+
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".word 0xe6f7f01c	@ usat	pc, #24, r12")
+
+	TEST_RR(    "sxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb16	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe68cf47a	@ sxtab16	pc,r12, r10, ror #8")
+
+	TEST_RR(    "sel	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "sel	r14, r",12,VAL1,", r",10, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe68cffba	@ sel	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe68fefba	@ sel	r14, pc, r10")
+	TEST_UNSUPPORTED(".word 0xe68cefbf	@ sel	r14, r12, pc")
+
+	TEST_R(     "ssat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "ssat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".word 0xe6abff3c	@ ssat16	pc, #12, r12")
+
+	TEST_RR(    "sxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6acf47a	@ sxtab	pc,r12, r10, ror #8")
+
+	TEST_R(     "rev	r0, r",0,   VAL1,"")
+	TEST_R(     "rev	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6bfff3c	@ rev	pc, r12")
+
+	TEST_RR(    "sxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxth	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6bcf47a	@ sxtah	pc,r12, r10, ror #8")
+
+	TEST_R(     "rev16	r0, r",0,   VAL1,"")
+	TEST_R(     "rev16	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6bfffbc	@ rev16	pc, r12")
+
+	TEST_RR(    "uxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb16	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6ccf47a	@ uxtab16	pc,r12, r10, ror #8")
+
+	TEST_R(     "usat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "usat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".word 0xe6ecff3c	@ usat16	pc, #12, r12")
+	TEST_UNSUPPORTED(".word 0xe6ecef3f	@ usat16	r14, #12, pc")
+
+	TEST_RR(    "uxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6ecf47a	@ uxtab	pc,r12, r10, ror #8")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_R(     "rbit	r0, r",0,   VAL1,"")
+	TEST_R(     "rbit	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6ffff3c	@ rbit	pc, r12")
+#endif
+
+	TEST_RR(    "uxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxth	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6fff077	@ uxth	pc, r7")
+	TEST_UNSUPPORTED(".word 0xe6ff807f	@ uxth	r8, pc")
+	TEST_UNSUPPORTED(".word 0xe6fcf47a	@ uxtah	pc, r12, r10, ror #8")
+	TEST_UNSUPPORTED(".word 0xe6fce47f	@ uxtah	r14, r12, pc, ror #8")
+
+	TEST_R(     "revsh	r0, r",0,   VAL1,"")
+	TEST_R(     "revsh	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6ffff3c	@ revsh	pc, r12")
+	TEST_UNSUPPORTED(".word 0xe6ffef3f	@ revsh	r14, pc")
+
+	TEST_UNSUPPORTED(".word 0xe6900070") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe69fff7f") /* Unallocated space */
+
+	TEST_UNSUPPORTED(".word 0xe6d00070") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe6dfff7f") /* Unallocated space */
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_GROUP("Signed multiplies")
+
+	TEST_RRR(   "smlad	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlad	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a1c	@ smlad	pc, r12, r10, r8")
+	TEST_RRR(   "smladx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smladx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a3c	@ smladx	pc, r12, r10, r8")
+
+	TEST_RR(   "smuad	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smuad	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa1c	@ smuad	pc, r12, r10")
+	TEST_RR(   "smuadx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smuadx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa3c	@ smuadx	pc, r12, r10")
+
+	TEST_RRR(   "smlsd	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsd	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a5c	@ smlsd	pc, r12, r10, r8")
+	TEST_RRR(   "smlsdx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsdx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a7c	@ smlsdx	pc, r12, r10, r8")
+
+	TEST_RR(   "smusd	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smusd	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa5c	@ smusd	pc, r12, r10")
+	TEST_RR(   "smusdx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smusdx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa7c	@ smusdx	pc, r12, r10")
+
+	TEST_RRRR( "smlald	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlald	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_UNSUPPORTED(".word 0xe74af819	@ smlald	pc, r10, r9, r8")
+	TEST_UNSUPPORTED(".word 0xe74fb819	@ smlald	r11, pc, r9, r8")
+	TEST_UNSUPPORTED(".word 0xe74ab81f	@ smlald	r11, r10, pc, r8")
+	TEST_UNSUPPORTED(".word 0xe74abf19	@ smlald	r11, r10, r9, pc")
+
+	TEST_RRRR( "smlaldx	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlaldx	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_UNSUPPORTED(".word 0xe74af839	@ smlaldx	pc, r10, r9, r8")
+	TEST_UNSUPPORTED(".word 0xe74fb839	@ smlaldx	r11, pc, r9, r8")
+
+	TEST_RRR(  "smmla	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmla	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8a1c	@ smmla	pc, r12, r10, r8")
+	TEST_RRR(  "smmlar	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmlar	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8a3c	@ smmlar	pc, r12, r10, r8")
+
+	TEST_RR(   "smmul	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(   "smmul	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_UNSUPPORTED(".word 0xe75ffa1c	@ smmul	pc, r12, r10")
+	TEST_RR(   "smmulr	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(   "smmulr	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_UNSUPPORTED(".word 0xe75ffa3c	@ smmulr	pc, r12, r10")
+
+	TEST_RRR(  "smmls	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmls	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8adc	@ smmls	pc, r12, r10, r8")
+	TEST_RRR(  "smmlsr	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmlsr	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8afc	@ smmlsr	pc, r12, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe75e8aff	@ smmlsr	r14, pc, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe75e8ffc	@ smmlsr	r14, r12, pc, r8")
+	TEST_UNSUPPORTED(".word 0xe75efafc	@ smmlsr	r14, r12, r10, pc")
+
+	TEST_RR(   "usad8	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(   "usad8	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_UNSUPPORTED(".word 0xe75ffa1c	@ usad8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe75efa1f	@ usad8	r14, pc, r10")
+	TEST_UNSUPPORTED(".word 0xe75eff1c	@ usad8	r14, r12, pc")
+
+	TEST_RRR(  "usada8	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL3,"")
+	TEST_RRR(  "usada8	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL3,"")
+	TEST_UNSUPPORTED(".word 0xe78f8a1c	@ usada8	pc, r12, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe78e8a1f	@ usada8	r14, pc, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe78e8f1c	@ usada8	r14, r12, pc, r8")
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_GROUP("Bit Field")
+
+	TEST_R(     "sbfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "sbfxeq	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "sbfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".word 0xe7aff45c	@ sbfx	pc, r12, #8, #16")
+
+	TEST_R(     "ubfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "ubfxcs	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "ubfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".word 0xe7eff45c	@ ubfx	pc, r12, #8, #16")
+	TEST_UNSUPPORTED(".word 0xe7efc45f	@ ubfx	r12, pc, #8, #16")
+
+	TEST_R(     "bfc	r",0, VAL1,", #4, #20")
+	TEST_R(     "bfcvs	r",14,VAL2,", #4, #20")
+	TEST_R(     "bfc	r",7, VAL1,", #0, #31")
+	TEST_R(     "bfc	r",8, VAL2,", #0, #31")
+	TEST_UNSUPPORTED(".word 0xe7def01f	@ bfc	pc, #0, #31");
+
+	TEST_RR(    "bfi	r",0, VAL1,", r",0  , VAL2,", #0, #31")
+	TEST_RR(    "bfipl	r",12,VAL1,", r",14 , VAL2,", #4, #20")
+	TEST_UNSUPPORTED(".word 0xe7d7f21e	@ bfi	pc, r14, #4, #20")
+
+	TEST_UNSUPPORTED(".word 0x07f000f0")  /* Permanently UNDEFINED */
+	TEST_UNSUPPORTED(".word 0x07ffffff")  /* Permanently UNDEFINED */
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+	TEST_GROUP("Branch, branch with link, and block data transfer")
+
+	TEST_P(   "stmda	r",0, 16*4,", {r0}")
+	TEST_P(   "stmeqda	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmneda	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmda	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmda	r",13,0,   "!, {pc}")
+
+	TEST_P(   "ldmda	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmcsda	r",4, 15*4,", {r0-r15}")
+	TEST_BF_P("ldmccda	r",7, 15*4,"!, {r8-r15}")
+	TEST_P(   "ldmda	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmda	r",14,15*4,"!, {pc}")
+
+	TEST_P(   "stmia	r",0, 16*4,", {r0}")
+	TEST_P(   "stmmiia	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmplia	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmia	r",14,0,   "!, {pc}")
+
+	TEST_P(   "ldmia	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmvsia	r",4, 0,   ", {r0-r15}")
+	TEST_BF_P("ldmvcia	r",7, 8*4, "!, {r8-r15}")
+	TEST_P(   "ldmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmia	r",14,15*4,"!, {pc}")
+
+	TEST_P(   "stmdb	r",0, 16*4,", {r0}")
+	TEST_P(   "stmhidb	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmlsdb	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmdb	r",13,4,   "!, {pc}")
+
+	TEST_P(   "ldmdb	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmgedb	r",4, 16*4,", {r0-r15}")
+	TEST_BF_P("ldmltdb	r",7, 16*4,"!, {r8-r15}")
+	TEST_P(   "ldmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmdb	r",14,16*4,"!, {pc}")
+
+	TEST_P(   "stmib	r",0, 16*4,", {r0}")
+	TEST_P(   "stmgtib	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmleib	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmib	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmib	r",13,-4,  "!, {pc}")
+
+	TEST_P(   "ldmib	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmeqib	r",4, -4,", {r0-r15}")
+	TEST_BF_P("ldmneib	r",7, 7*4,"!, {r8-r15}")
+	TEST_P(   "ldmib	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmib	r",14,14*4,"!, {pc}")
+
+	TEST_P(   "stmdb	r",13,16*4,"!, {r3-r12,lr}")
+	TEST_P(	  "stmeqdb	r",13,16*4,"!, {r3-r12}")
+	TEST_P(   "stmnedb	r",2, 16*4,", {r3-r12,lr}")
+	TEST_P(   "stmdb	r",13,16*4,"!, {r2-r12,lr}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12,lr}")
+
+	TEST_BF_P("ldmia	r",13,5*4, "!, {r3-r12,pc}")
+	TEST_P(	  "ldmccia	r",13,5*4, "!, {r3-r12}")
+	TEST_BF_P("ldmcsia	r",2, 5*4, "!, {r3-r12,pc}")
+	TEST_BF_P("ldmia	r",13,4*4, "!, {r2-r12,pc}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12,lr}")
+
+#ifdef CONFIG_THUMB2_KERNEL
+	TEST_ARM_TO_THUMB_INTERWORK_P("ldmplia	r",0,15*4,", {pc}")
+	TEST_ARM_TO_THUMB_INTERWORK_P("ldmmiia	r",13,0,", {r0-r15}")
+#endif
+	TEST_BF("b	2f")
+	TEST_BF("bl	2f")
+	TEST_BB("b	2b")
+	TEST_BB("bl	2b")
+
+	TEST_BF("beq	2f")
+	TEST_BF("bleq	2f")
+	TEST_BB("bne	2b")
+	TEST_BB("blne	2b")
+
+	TEST_BF("bgt	2f")
+	TEST_BF("blgt	2f")
+	TEST_BB("blt	2b")
+	TEST_BB("bllt	2b")
+
+	TEST_GROUP("Supervisor Call, and coprocessor instructions")
+
+	/*
+	 * We can't really test these by executing them, so all
+	 * we can do is check that probes are, or are not allowed.
+	 * At the moment none are allowed...
+	 */
+#define TEST_COPROCESSOR(code) TEST_UNSUPPORTED(code)
+
+#define COPROCESSOR_INSTRUCTIONS_ST_LD(two,cc)					\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13], {1}")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13], {1}")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13], {1}")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13], {1}")			\
+										\
+	TEST_COPROCESSOR( "stc"two"	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "stc"two"	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"daf0001	@ stc"two"	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d2f0001	@ stc"two"	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"caf0001	@ stc"two"	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c2f0001	@ stc"two"	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "stc"two"	0, cr0, [r15], {1}")			\
+	TEST_COPROCESSOR( "stc"two"l	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "stc"two"l	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"def0001	@ stc"two"l	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d6f0001	@ stc"two"l	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"cef0001	@ stc"two"l	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c6f0001	@ stc"two"l	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "stc"two"l	0, cr0, [r15], {1}")			\
+	TEST_COPROCESSOR( "ldc"two"	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "ldc"two"	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"dbf0001	@ ldc"two"	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d3f0001	@ ldc"two"	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"cbf0001	@ ldc"two"	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c3f0001	@ ldc"two"	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "ldc"two"	0, cr0, [r15], {1}")			\
+	TEST_COPROCESSOR( "ldc"two"l	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "ldc"two"l	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"dff0001	@ ldc"two"l	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d7f0001	@ ldc"two"l	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"cff0001	@ ldc"two"l	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c7f0001	@ ldc"two"l	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "ldc"two"l	0, cr0, [r15], {1}")
+
+#define COPROCESSOR_INSTRUCTIONS_MC_MR(two,cc)					\
+										\
+	TEST_COPROCESSOR( "mcrr"two"	0, 15, r0, r14, cr0")			\
+	TEST_COPROCESSOR( "mcrr"two"	15, 0, r14, r0, cr15")			\
+	TEST_UNSUPPORTED(".word 0x"cc"c4f00f0	@ mcrr"two"	0, 15, r0, r15, cr0")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c40ff0f	@ mcrr"two"	15, 0, r15, r0, cr15")	\
+	TEST_COPROCESSOR( "mrrc"two"	0, 15, r0, r14, cr0")			\
+	TEST_COPROCESSOR( "mrrc"two"	15, 0, r14, r0, cr15")			\
+	TEST_UNSUPPORTED(".word 0x"cc"c5f00f0	@ mrrc"two"	0, 15, r0, r15, cr0")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c50ff0f	@ mrrc"two"	15, 0, r15, r0, cr15")	\
+	TEST_COPROCESSOR( "cdp"two"	15, 15, cr15, cr15, cr15, 7")		\
+	TEST_COPROCESSOR( "cdp"two"	0, 0, cr0, cr0, cr0, 0")		\
+	TEST_COPROCESSOR( "mcr"two"	15, 7, r15, cr15, cr15, 7")		\
+	TEST_COPROCESSOR( "mcr"two"	0, 0, r0, cr0, cr0, 0")			\
+	TEST_COPROCESSOR( "mrc"two"	15, 7, r15, cr15, cr15, 7")		\
+	TEST_COPROCESSOR( "mrc"two"	0, 0, r0, cr0, cr0, 0")
+
+	COPROCESSOR_INSTRUCTIONS_ST_LD("","e")
+	COPROCESSOR_INSTRUCTIONS_MC_MR("","e")
+	TEST_UNSUPPORTED("svc	0")
+	TEST_UNSUPPORTED("svc	0xffffff")
+
+	TEST_UNSUPPORTED("svc	0")
+
+	TEST_GROUP("Unconditional instruction")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("srsda	sp, 0x13")
+	TEST_UNSUPPORTED("srsdb	sp, 0x13")
+	TEST_UNSUPPORTED("srsia	sp, 0x13")
+	TEST_UNSUPPORTED("srsib	sp, 0x13")
+	TEST_UNSUPPORTED("srsda	sp!, 0x13")
+	TEST_UNSUPPORTED("srsdb	sp!, 0x13")
+	TEST_UNSUPPORTED("srsia	sp!, 0x13")
+	TEST_UNSUPPORTED("srsib	sp!, 0x13")
+
+	TEST_UNSUPPORTED("rfeda	sp")
+	TEST_UNSUPPORTED("rfedb	sp")
+	TEST_UNSUPPORTED("rfeia	sp")
+	TEST_UNSUPPORTED("rfeib	sp")
+	TEST_UNSUPPORTED("rfeda	sp!")
+	TEST_UNSUPPORTED("rfedb	sp!")
+	TEST_UNSUPPORTED("rfeia	sp!")
+	TEST_UNSUPPORTED("rfeib	sp!")
+	TEST_UNSUPPORTED(".word 0xf81d0a00	@ rfeda	pc")
+	TEST_UNSUPPORTED(".word 0xf91d0a00	@ rfedb	pc")
+	TEST_UNSUPPORTED(".word 0xf89d0a00	@ rfeia	pc")
+	TEST_UNSUPPORTED(".word 0xf99d0a00	@ rfeib	pc")
+	TEST_UNSUPPORTED(".word 0xf83d0a00	@ rfeda	pc!")
+	TEST_UNSUPPORTED(".word 0xf93d0a00	@ rfedb	pc!")
+	TEST_UNSUPPORTED(".word 0xf8bd0a00	@ rfeia	pc!")
+	TEST_UNSUPPORTED(".word 0xf9bd0a00	@ rfeib	pc!")
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_X(	"blx	__dummy_thumb_subroutine_even",
+		".thumb				\n\t"
+		".space 4			\n\t"
+		".type __dummy_thumb_subroutine_even, %%function \n\t"
+		"__dummy_thumb_subroutine_even:	\n\t"
+		"mov	r0, pc			\n\t"
+		"bx	lr			\n\t"
+		".arm				\n\t"
+	)
+	TEST(	"blx	__dummy_thumb_subroutine_even")
+
+	TEST_X(	"blx	__dummy_thumb_subroutine_odd",
+		".thumb				\n\t"
+		".space 2			\n\t"
+		".type __dummy_thumb_subroutine_odd, %%function	\n\t"
+		"__dummy_thumb_subroutine_odd:	\n\t"
+		"mov	r0, pc			\n\t"
+		"bx	lr			\n\t"
+		".arm				\n\t"
+	)
+	TEST(	"blx	__dummy_thumb_subroutine_odd")
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+	COPROCESSOR_INSTRUCTIONS_ST_LD("2","f")
+#if __LINUX_ARM_ARCH__ >= 6
+	COPROCESSOR_INSTRUCTIONS_MC_MR("2","f")
+#endif
+
+	TEST_GROUP("Miscellaneous instructions, memory hints, and Advanced SIMD instructions")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("cps	0x13")
+	TEST_UNSUPPORTED("cpsie	i")
+	TEST_UNSUPPORTED("cpsid	i")
+	TEST_UNSUPPORTED("cpsie	i,0x13")
+	TEST_UNSUPPORTED("cpsid	i,0x13")
+	TEST_UNSUPPORTED("setend	le")
+	TEST_UNSUPPORTED("setend	be")
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_P("pli	[r",0,0b,", #16]")
+	TEST(  "pli	[pc, #0]")
+	TEST_RR("pli	[r",12,0b,", r",0, 16,"]")
+	TEST_RR("pli	[r",0, 0b,", -r",12,16,", lsl #4]")
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 5
+	TEST_P("pld	[r",0,32,", #-16]")
+	TEST(  "pld	[pc, #0]")
+	TEST_PR("pld	[r",7, 24, ", r",0, 16,"]")
+	TEST_PR("pld	[r",8, 24, ", -r",12,16,", lsl #4]")
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_SUPPORTED(  ".word 0xf590f000	@ pldw [r0, #0]")
+	TEST_SUPPORTED(  ".word 0xf797f000	@ pldw	[r7, r0]")
+	TEST_SUPPORTED(  ".word 0xf798f18c	@ pldw	[r8, r12, lsl #3]");
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_UNSUPPORTED("clrex")
+	TEST_UNSUPPORTED("dsb")
+	TEST_UNSUPPORTED("dmb")
+	TEST_UNSUPPORTED("isb")
+#endif
+
+	verbose("\n");
+}
+

+ 1187 - 0
arch/arm/kernel/kprobes-test-thumb.c

@@ -0,0 +1,1187 @@
+/*
+ * arch/arm/kernel/kprobes-test-thumb.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "kprobes-test.h"
+
+
+#define TEST_ISA "16"
+
+#define DONT_TEST_IN_ITBLOCK(tests)			\
+	kprobe_test_flags |= TEST_FLAG_NO_ITBLOCK;	\
+	tests						\
+	kprobe_test_flags &= ~TEST_FLAG_NO_ITBLOCK;
+
+#define CONDITION_INSTRUCTIONS(cc_pos, tests)		\
+	kprobe_test_cc_position = cc_pos;		\
+	DONT_TEST_IN_ITBLOCK(tests)			\
+	kprobe_test_cc_position = 0;
+
+#define TEST_ITBLOCK(code)				\
+	kprobe_test_flags |= TEST_FLAG_FULL_ITBLOCK;	\
+	TESTCASE_START(code)				\
+	TEST_ARG_END("")				\
+	"50:	nop			\n\t"		\
+	"1:	"code"			\n\t"		\
+	"	mov r1, #0x11		\n\t"		\
+	"	mov r2, #0x22		\n\t"		\
+	"	mov r3, #0x33		\n\t"		\
+	"2:	nop			\n\t"		\
+	TESTCASE_END					\
+	kprobe_test_flags &= ~TEST_FLAG_FULL_ITBLOCK;
+
+#define TEST_THUMB_TO_ARM_INTERWORK_P(code1, reg, val, code2)	\
+	TESTCASE_START(code1 #reg code2)			\
+	TEST_ARG_PTR(reg, val)					\
+	TEST_ARG_REG(14, 99f+1)					\
+	TEST_ARG_MEM(15, 3f)					\
+	TEST_ARG_END("")					\
+	"	nop			\n\t" /* To align 1f */	\
+	"50:	nop			\n\t"			\
+	"1:	"code1 #reg code2"	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".arm				\n\t"			\
+	"3:	adr	lr, 2f+1	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".thumb				\n\t"			\
+	"2:	nop			\n\t"			\
+	TESTCASE_END
+
+
+void kprobe_thumb16_test_cases(void)
+{
+	kprobe_test_flags = TEST_FLAG_NARROW_INSTR;
+
+	TEST_GROUP("Shift (immediate), add, subtract, move, and compare")
+
+	TEST_R(    "lsls	r7, r",0,VAL1,", #5")
+	TEST_R(    "lsls	r0, r",7,VAL2,", #11")
+	TEST_R(    "lsrs	r7, r",0,VAL1,", #5")
+	TEST_R(    "lsrs	r0, r",7,VAL2,", #11")
+	TEST_R(    "asrs	r7, r",0,VAL1,", #5")
+	TEST_R(    "asrs	r0, r",7,VAL2,", #11")
+	TEST_RR(   "adds	r2, r",0,VAL1,", r",7,VAL2,"")
+	TEST_RR(   "adds	r5, r",7,VAL2,", r",0,VAL2,"")
+	TEST_RR(   "subs	r2, r",0,VAL1,", r",7,VAL2,"")
+	TEST_RR(   "subs	r5, r",7,VAL2,", r",0,VAL2,"")
+	TEST_R(    "adds	r7, r",0,VAL1,", #5")
+	TEST_R(    "adds	r0, r",7,VAL2,", #2")
+	TEST_R(    "subs	r7, r",0,VAL1,", #5")
+	TEST_R(    "subs	r0, r",7,VAL2,", #2")
+	TEST(      "movs.n	r0, #0x5f")
+	TEST(      "movs.n	r7, #0xa0")
+	TEST_R(    "cmp.n	r",0,0x5e, ", #0x5f")
+	TEST_R(    "cmp.n	r",5,0x15f,", #0x5f")
+	TEST_R(    "cmp.n	r",7,0xa0, ", #0xa0")
+	TEST_R(    "adds.n	r",0,VAL1,", #0x5f")
+	TEST_R(    "adds.n	r",7,VAL2,", #0xa0")
+	TEST_R(    "subs.n	r",0,VAL1,", #0x5f")
+	TEST_R(    "subs.n	r",7,VAL2,", #0xa0")
+
+	TEST_GROUP("16-bit Thumb data-processing instructions")
+
+#define DATA_PROCESSING16(op,val)			\
+	TEST_RR(   op"	r",0,VAL1,", r",7,val,"")	\
+	TEST_RR(   op"	r",7,VAL2,", r",0,val,"")
+
+	DATA_PROCESSING16("ands",0xf00f00ff)
+	DATA_PROCESSING16("eors",0xf00f00ff)
+	DATA_PROCESSING16("lsls",11)
+	DATA_PROCESSING16("lsrs",11)
+	DATA_PROCESSING16("asrs",11)
+	DATA_PROCESSING16("adcs",VAL2)
+	DATA_PROCESSING16("sbcs",VAL2)
+	DATA_PROCESSING16("rors",11)
+	DATA_PROCESSING16("tst",0xf00f00ff)
+	TEST_R("rsbs	r",0,VAL1,", #0")
+	TEST_R("rsbs	r",7,VAL2,", #0")
+	DATA_PROCESSING16("cmp",0xf00f00ff)
+	DATA_PROCESSING16("cmn",0xf00f00ff)
+	DATA_PROCESSING16("orrs",0xf00f00ff)
+	DATA_PROCESSING16("muls",VAL2)
+	DATA_PROCESSING16("bics",0xf00f00ff)
+	DATA_PROCESSING16("mvns",VAL2)
+
+	TEST_GROUP("Special data instructions and branch and exchange")
+
+	TEST_RR(  "add	r",0, VAL1,", r",7,VAL2,"")
+	TEST_RR(  "add	r",3, VAL2,", r",8,VAL3,"")
+	TEST_RR(  "add	r",8, VAL3,", r",0,VAL1,"")
+	TEST_R(   "add	sp"        ", r",8,-8,  "")
+	TEST_R(   "add	r",14,VAL1,", pc")
+	TEST_BF_R("add	pc"        ", r",0,2f-1f-8,"")
+	TEST_UNSUPPORTED(".short 0x44ff	@ add pc, pc")
+
+	TEST_RR(  "cmp	r",3,VAL1,", r",8,VAL2,"")
+	TEST_RR(  "cmp	r",8,VAL2,", r",0,VAL1,"")
+	TEST_R(   "cmp	sp"       ", r",8,-8,  "")
+
+	TEST_R(   "mov	r0, r",7,VAL2,"")
+	TEST_R(   "mov	r3, r",8,VAL3,"")
+	TEST_R(   "mov	r8, r",0,VAL1,"")
+	TEST_P(   "mov	sp, r",8,-8,  "")
+	TEST(     "mov	lr, pc")
+	TEST_BF_R("mov	pc, r",0,2f,  "")
+
+	TEST_BF_R("bx	r",0, 2f+1,"")
+	TEST_BF_R("bx	r",14,2f+1,"")
+	TESTCASE_START("bx	pc")
+		TEST_ARG_REG(14, 99f+1)
+		TEST_ARG_END("")
+		"	nop			\n\t" /* To align the bx pc*/
+		"50:	nop			\n\t"
+		"1:	bx	pc		\n\t"
+		"	bx	lr		\n\t"
+		".arm				\n\t"
+		"	adr	lr, 2f+1	\n\t"
+		"	bx	lr		\n\t"
+		".thumb				\n\t"
+		"2:	nop			\n\t"
+	TESTCASE_END
+
+	TEST_BF_R("blx	r",0, 2f+1,"")
+	TEST_BB_R("blx	r",14,2f+1,"")
+	TEST_UNSUPPORTED(".short 0x47f8	@ blx pc")
+
+	TEST_GROUP("Load from Literal Pool")
+
+	TEST_X( "ldr	r0, 3f",
+		".align					\n\t"
+		"3:	.word	"__stringify(VAL1))
+	TEST_X( "ldr	r7, 3f",
+		".space 128				\n\t"
+		".align					\n\t"
+		"3:	.word	"__stringify(VAL2))
+
+	TEST_GROUP("16-bit Thumb Load/store instructions")
+
+	TEST_RPR("str	r",0, VAL1,", [r",1, 24,", r",2,  48,"]")
+	TEST_RPR("str	r",7, VAL2,", [r",6, 24,", r",5,  48,"]")
+	TEST_RPR("strh	r",0, VAL1,", [r",1, 24,", r",2,  48,"]")
+	TEST_RPR("strh	r",7, VAL2,", [r",6, 24,", r",5,  48,"]")
+	TEST_RPR("strb	r",0, VAL1,", [r",1, 24,", r",2,  48,"]")
+	TEST_RPR("strb	r",7, VAL2,", [r",6, 24,", r",5,  48,"]")
+	TEST_PR( "ldrsb	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrsb	r7, [r",6, 24,", r",5,  50,"]")
+	TEST_PR( "ldr	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldr	r7, [r",6, 24,", r",5,  48,"]")
+	TEST_PR( "ldrh	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrh	r7, [r",6, 24,", r",5,  50,"]")
+	TEST_PR( "ldrb	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrb	r7, [r",6, 24,", r",5,  50,"]")
+	TEST_PR( "ldrsh	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrsh	r7, [r",6, 24,", r",5,  50,"]")
+
+	TEST_RP("str	r",0, VAL1,", [r",1, 24,", #120]")
+	TEST_RP("str	r",7, VAL2,", [r",6, 24,", #120]")
+	TEST_P( "ldr	r0, [r",1, 24,", #120]")
+	TEST_P( "ldr	r7, [r",6, 24,", #120]")
+	TEST_RP("strb	r",0, VAL1,", [r",1, 24,", #30]")
+	TEST_RP("strb	r",7, VAL2,", [r",6, 24,", #30]")
+	TEST_P( "ldrb	r0, [r",1, 24,", #30]")
+	TEST_P( "ldrb	r7, [r",6, 24,", #30]")
+	TEST_RP("strh	r",0, VAL1,", [r",1, 24,", #60]")
+	TEST_RP("strh	r",7, VAL2,", [r",6, 24,", #60]")
+	TEST_P( "ldrh	r0, [r",1, 24,", #60]")
+	TEST_P( "ldrh	r7, [r",6, 24,", #60]")
+
+	TEST_R( "str	r",0, VAL1,", [sp, #0]")
+	TEST_R( "str	r",7, VAL2,", [sp, #160]")
+	TEST(   "ldr	r0, [sp, #0]")
+	TEST(   "ldr	r7, [sp, #160]")
+
+	TEST_RP("str	r",0, VAL1,", [r",0, 24,"]")
+	TEST_P( "ldr	r0, [r",0, 24,"]")
+
+	TEST_GROUP("Generate PC-/SP-relative address")
+
+	TEST("add	r0, pc, #4")
+	TEST("add	r7, pc, #1020")
+	TEST("add	r0, sp, #4")
+	TEST("add	r7, sp, #1020")
+
+	TEST_GROUP("Miscellaneous 16-bit instructions")
+
+	TEST_UNSUPPORTED( "cpsie	i")
+	TEST_UNSUPPORTED( "cpsid	i")
+	TEST_UNSUPPORTED( "setend	le")
+	TEST_UNSUPPORTED( "setend	be")
+
+	TEST("add	sp, #"__stringify(TEST_MEMORY_SIZE)) /* Assumes TEST_MEMORY_SIZE < 0x400 */
+	TEST("sub	sp, #0x7f*4")
+
+DONT_TEST_IN_ITBLOCK(
+	TEST_BF_R(  "cbnz	r",0,0, ", 2f")
+	TEST_BF_R(  "cbz	r",2,-1,", 2f")
+	TEST_BF_RX( "cbnz	r",4,1, ", 2f",0x20)
+	TEST_BF_RX( "cbz	r",7,0, ", 2f",0x40)
+)
+	TEST_R("sxth	r0, r",7, HH1,"")
+	TEST_R("sxth	r7, r",0, HH2,"")
+	TEST_R("sxtb	r0, r",7, HH1,"")
+	TEST_R("sxtb	r7, r",0, HH2,"")
+	TEST_R("uxth	r0, r",7, HH1,"")
+	TEST_R("uxth	r7, r",0, HH2,"")
+	TEST_R("uxtb	r0, r",7, HH1,"")
+	TEST_R("uxtb	r7, r",0, HH2,"")
+	TEST_R("rev	r0, r",7, VAL1,"")
+	TEST_R("rev	r7, r",0, VAL2,"")
+	TEST_R("rev16	r0, r",7, VAL1,"")
+	TEST_R("rev16	r7, r",0, VAL2,"")
+	TEST_UNSUPPORTED(".short 0xba80")
+	TEST_UNSUPPORTED(".short 0xbabf")
+	TEST_R("revsh	r0, r",7, VAL1,"")
+	TEST_R("revsh	r7, r",0, VAL2,"")
+
+#define TEST_POPPC(code, offset)	\
+	TESTCASE_START(code)		\
+	TEST_ARG_PTR(13, offset)	\
+	TEST_ARG_END("")		\
+	TEST_BRANCH_F(code,0)		\
+	TESTCASE_END
+
+	TEST("push	{r0}")
+	TEST("push	{r7}")
+	TEST("push	{r14}")
+	TEST("push	{r0-r7,r14}")
+	TEST("push	{r0,r2,r4,r6,r14}")
+	TEST("push	{r1,r3,r5,r7}")
+	TEST("pop	{r0}")
+	TEST("pop	{r7}")
+	TEST("pop	{r0,r2,r4,r6}")
+	TEST_POPPC("pop	{pc}",15*4)
+	TEST_POPPC("pop	{r0-r7,pc}",7*4)
+	TEST_POPPC("pop	{r1,r3,r5,r7,pc}",11*4)
+	TEST_THUMB_TO_ARM_INTERWORK_P("pop	{pc}	@ ",13,15*4,"")
+	TEST_THUMB_TO_ARM_INTERWORK_P("pop	{r0-r7,pc}	@ ",13,7*4,"")
+
+	TEST_UNSUPPORTED("bkpt.n	0")
+	TEST_UNSUPPORTED("bkpt.n	255")
+
+	TEST_SUPPORTED("yield")
+	TEST("sev")
+	TEST("nop")
+	TEST("wfi")
+	TEST_SUPPORTED("wfe")
+	TEST_UNSUPPORTED(".short 0xbf50") /* Unassigned hints */
+	TEST_UNSUPPORTED(".short 0xbff0") /* Unassigned hints */
+
+#define TEST_IT(code, code2)			\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	"50:	nop			\n\t"	\
+	"1:	"code"			\n\t"	\
+	"	"code2"			\n\t"	\
+	"2:	nop			\n\t"	\
+	TESTCASE_END
+
+DONT_TEST_IN_ITBLOCK(
+	TEST_IT("it	eq","moveq r0,#0")
+	TEST_IT("it	vc","movvc r0,#0")
+	TEST_IT("it	le","movle r0,#0")
+	TEST_IT("ite	eq","moveq r0,#0\n\t  movne r1,#1")
+	TEST_IT("itet	vc","movvc r0,#0\n\t  movvs r1,#1\n\t  movvc r2,#2")
+	TEST_IT("itete	le","movle r0,#0\n\t  movgt r1,#1\n\t  movle r2,#2\n\t  movgt r3,#3")
+	TEST_IT("itttt	le","movle r0,#0\n\t  movle r1,#1\n\t  movle r2,#2\n\t  movle r3,#3")
+	TEST_IT("iteee	le","movle r0,#0\n\t  movgt r1,#1\n\t  movgt r2,#2\n\t  movgt r3,#3")
+)
+
+	TEST_GROUP("Load and store multiple")
+
+	TEST_P("ldmia	r",4, 16*4,"!, {r0,r7}")
+	TEST_P("ldmia	r",7, 16*4,"!, {r0-r6}")
+	TEST_P("stmia	r",4, 16*4,"!, {r0,r7}")
+	TEST_P("stmia	r",0, 16*4,"!, {r0-r7}")
+
+	TEST_GROUP("Conditional branch and Supervisor Call instructions")
+
+CONDITION_INSTRUCTIONS(8,
+	TEST_BF("beq	2f")
+	TEST_BB("bne	2b")
+	TEST_BF("bgt	2f")
+	TEST_BB("blt	2b")
+)
+	TEST_UNSUPPORTED(".short 0xde00")
+	TEST_UNSUPPORTED(".short 0xdeff")
+	TEST_UNSUPPORTED("svc	#0x00")
+	TEST_UNSUPPORTED("svc	#0xff")
+
+	TEST_GROUP("Unconditional branch")
+
+	TEST_BF(  "b	2f")
+	TEST_BB(  "b	2b")
+	TEST_BF_X("b	2f", 0x400)
+	TEST_BB_X("b	2b", 0x400)
+
+	TEST_GROUP("Testing instructions in IT blocks")
+
+	TEST_ITBLOCK("subs.n r0, r0")
+
+	verbose("\n");
+}
+
+
+void kprobe_thumb32_test_cases(void)
+{
+	kprobe_test_flags = 0;
+
+	TEST_GROUP("Load/store multiple")
+
+	TEST_UNSUPPORTED("rfedb	sp")
+	TEST_UNSUPPORTED("rfeia	sp")
+	TEST_UNSUPPORTED("rfedb	sp!")
+	TEST_UNSUPPORTED("rfeia	sp!")
+
+	TEST_P(   "stmia	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "stmia	r",4, 16*4,", {r0-r12,r14}")
+	TEST_P(   "stmia	r",7, 16*4,"!, {r8-r12,r14}")
+	TEST_P(   "stmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+
+	TEST_P(   "ldmia	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "ldmia	r",4, 0,   ", {r0-r12,r14}")
+	TEST_BF_P("ldmia	r",5, 8*4, "!, {r6-r12,r15}")
+	TEST_P(   "ldmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmia	r",14,14*4,"!, {r4,pc}")
+
+	TEST_P(   "stmdb	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "stmdb	r",4, 16*4,", {r0-r12,r14}")
+	TEST_P(   "stmdb	r",5, 16*4,"!, {r8-r12,r14}")
+	TEST_P(   "stmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+
+	TEST_P(   "ldmdb	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "ldmdb	r",4, 16*4,", {r0-r12,r14}")
+	TEST_BF_P("ldmdb	r",5, 16*4,"!, {r6-r12,r15}")
+	TEST_P(   "ldmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmdb	r",14,16*4,"!, {r4,pc}")
+
+	TEST_P(   "stmdb	r",13,16*4,"!, {r3-r12,lr}")
+	TEST_P(	  "stmdb	r",13,16*4,"!, {r3-r12}")
+	TEST_P(   "stmdb	r",2, 16*4,", {r3-r12,lr}")
+	TEST_P(   "stmdb	r",13,16*4,"!, {r2-r12,lr}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12,lr}")
+
+	TEST_BF_P("ldmia	r",13,5*4, "!, {r3-r12,pc}")
+	TEST_P(	  "ldmia	r",13,5*4, "!, {r3-r12}")
+	TEST_BF_P("ldmia	r",2, 5*4, "!, {r3-r12,pc}")
+	TEST_BF_P("ldmia	r",13,4*4, "!, {r2-r12,pc}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12,lr}")
+
+	TEST_THUMB_TO_ARM_INTERWORK_P("ldmia	r",0,14*4,", {r12,pc}")
+	TEST_THUMB_TO_ARM_INTERWORK_P("ldmia	r",13,2*4,", {r0-r12,pc}")
+
+	TEST_UNSUPPORTED(".short 0xe88f,0x0101	@ stmia	pc, {r0,r8}")
+	TEST_UNSUPPORTED(".short 0xe92f,0x5f00	@ stmdb	pc!, {r8-r12,r14}")
+	TEST_UNSUPPORTED(".short 0xe8bd,0xc000	@ ldmia	r13!, {r14,pc}")
+	TEST_UNSUPPORTED(".short 0xe93e,0xc000	@ ldmdb	r14!, {r14,pc}")
+	TEST_UNSUPPORTED(".short 0xe8a7,0x3f00	@ stmia	r7!, {r8-r12,sp}")
+	TEST_UNSUPPORTED(".short 0xe8a7,0x9f00	@ stmia	r7!, {r8-r12,pc}")
+	TEST_UNSUPPORTED(".short 0xe93e,0x2010	@ ldmdb	r14!, {r4,sp}")
+
+	TEST_GROUP("Load/store double or exclusive, table branch")
+
+	TEST_P(  "ldrd	r0, r1, [r",1, 24,", #-16]")
+	TEST(    "ldrd	r12, r14, [sp, #16]")
+	TEST_P(  "ldrd	r1, r0, [r",7, 24,", #-16]!")
+	TEST(    "ldrd	r14, r12, [sp, #16]!")
+	TEST_P(  "ldrd	r1, r0, [r",7, 24,"], #16")
+	TEST(    "ldrd	r7, r8, [sp], #-16")
+
+	TEST_X( "ldrd	r12, r14, 3f",
+		".align 3				\n\t"
+		"3:	.word	"__stringify(VAL1)"	\n\t"
+		"	.word	"__stringify(VAL2))
+
+	TEST_UNSUPPORTED(".short 0xe9ff,0xec04	@ ldrd	r14, r12, [pc, #16]!")
+	TEST_UNSUPPORTED(".short 0xe8ff,0xec04	@ ldrd	r14, r12, [pc], #16")
+	TEST_UNSUPPORTED(".short 0xe9d4,0xd800	@ ldrd	sp, r8, [r4]")
+	TEST_UNSUPPORTED(".short 0xe9d4,0xf800	@ ldrd	pc, r8, [r4]")
+	TEST_UNSUPPORTED(".short 0xe9d4,0x7d00	@ ldrd	r7, sp, [r4]")
+	TEST_UNSUPPORTED(".short 0xe9d4,0x7f00	@ ldrd	r7, pc, [r4]")
+
+	TEST_RRP("strd	r",0, VAL1,", r",1, VAL2,", [r",1, 24,", #-16]")
+	TEST_RR( "strd	r",12,VAL2,", r",14,VAL1,", [sp, #16]")
+	TEST_RRP("strd	r",1, VAL1,", r",0, VAL2,", [r",7, 24,", #-16]!")
+	TEST_RR( "strd	r",14,VAL2,", r",12,VAL1,", [sp, #16]!")
+	TEST_RRP("strd	r",1, VAL1,", r",0, VAL2,", [r",7, 24,"], #16")
+	TEST_RR( "strd	r",7, VAL2,", r",8, VAL1,", [sp], #-16")
+	TEST_UNSUPPORTED(".short 0xe9ef,0xec04	@ strd	r14, r12, [pc, #16]!")
+	TEST_UNSUPPORTED(".short 0xe8ef,0xec04	@ strd	r14, r12, [pc], #16")
+
+	TEST_RX("tbb	[pc, r",0, (9f-(1f+4)),"]",
+		"9:			\n\t"
+		".byte	(2f-1b-4)>>1	\n\t"
+		".byte	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RX("tbb	[pc, r",4, (9f-(1f+4)+1),"]",
+		"9:			\n\t"
+		".byte	(2f-1b-4)>>1	\n\t"
+		".byte	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RRX("tbb	[r",1,9f,", r",2,0,"]",
+		"9:			\n\t"
+		".byte	(2f-1b-4)>>1	\n\t"
+		".byte	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RX("tbh	[pc, r",7, (9f-(1f+4))>>1,"]",
+		"9:			\n\t"
+		".short	(2f-1b-4)>>1	\n\t"
+		".short	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RX("tbh	[pc, r",12, ((9f-(1f+4))>>1)+1,"]",
+		"9:			\n\t"
+		".short	(2f-1b-4)>>1	\n\t"
+		".short	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RRX("tbh	[r",1,9f, ", r",14,1,"]",
+		"9:			\n\t"
+		".short	(2f-1b-4)>>1	\n\t"
+		".short	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_UNSUPPORTED(".short 0xe8d1,0xf01f	@ tbh [r1, pc]")
+	TEST_UNSUPPORTED(".short 0xe8d1,0xf01d	@ tbh [r1, sp]")
+	TEST_UNSUPPORTED(".short 0xe8dd,0xf012	@ tbh [sp, r2]")
+
+	TEST_UNSUPPORTED("strexb	r0, r1, [r2]")
+	TEST_UNSUPPORTED("strexh	r0, r1, [r2]")
+	TEST_UNSUPPORTED("strexd	r0, r1, [r2]")
+	TEST_UNSUPPORTED("ldrexb	r0, [r1]")
+	TEST_UNSUPPORTED("ldrexh	r0, [r1]")
+	TEST_UNSUPPORTED("ldrexd	r0, [r1]")
+
+	TEST_GROUP("Data-processing (shifted register) and (modified immediate)")
+
+#define _DATA_PROCESSING32_DNM(op,s,val)					\
+	TEST_RR(op s".w	r0,  r",1, VAL1,", r",2, val, "")			\
+	TEST_RR(op s"	r1,  r",1, VAL1,", r",2, val, ", lsl #3")		\
+	TEST_RR(op s"	r2,  r",3, VAL1,", r",2, val, ", lsr #4")		\
+	TEST_RR(op s"	r3,  r",3, VAL1,", r",2, val, ", asr #5")		\
+	TEST_RR(op s"	r4,  r",5, VAL1,", r",2, N(val),", asr #6")		\
+	TEST_RR(op s"	r5,  r",5, VAL1,", r",2, val, ", ror #7")		\
+	TEST_RR(op s"	r8,  r",9, VAL1,", r",10,val, ", rrx")			\
+	TEST_R( op s"	r0,  r",11,VAL1,", #0x00010001")			\
+	TEST_R( op s"	r11, r",0, VAL1,", #0xf5000000")			\
+	TEST_R( op s"	r7,  r",8, VAL2,", #0x000af000")
+
+#define DATA_PROCESSING32_DNM(op,val)		\
+	_DATA_PROCESSING32_DNM(op,"",val)	\
+	_DATA_PROCESSING32_DNM(op,"s",val)
+
+#define DATA_PROCESSING32_NM(op,val)					\
+	TEST_RR(op".w	r",1, VAL1,", r",2, val, "")			\
+	TEST_RR(op"	r",1, VAL1,", r",2, val, ", lsl #3")		\
+	TEST_RR(op"	r",3, VAL1,", r",2, val, ", lsr #4")		\
+	TEST_RR(op"	r",3, VAL1,", r",2, val, ", asr #5")		\
+	TEST_RR(op"	r",5, VAL1,", r",2, N(val),", asr #6")		\
+	TEST_RR(op"	r",5, VAL1,", r",2, val, ", ror #7")		\
+	TEST_RR(op"	r",9, VAL1,", r",10,val, ", rrx")		\
+	TEST_R( op"	r",11,VAL1,", #0x00010001")			\
+	TEST_R( op"	r",0, VAL1,", #0xf5000000")			\
+	TEST_R( op"	r",8, VAL2,", #0x000af000")
+
+#define _DATA_PROCESSING32_DM(op,s,val)				\
+	TEST_R( op s".w	r0,  r",14, val, "")			\
+	TEST_R( op s"	r1,  r",12, val, ", lsl #3")		\
+	TEST_R( op s"	r2,  r",11, val, ", lsr #4")		\
+	TEST_R( op s"	r3,  r",10, val, ", asr #5")		\
+	TEST_R( op s"	r4,  r",9, N(val),", asr #6")		\
+	TEST_R( op s"	r5,  r",8, val, ", ror #7")		\
+	TEST_R( op s"	r8,  r",7,val, ", rrx")			\
+	TEST(   op s"	r0,  #0x00010001")			\
+	TEST(   op s"	r11, #0xf5000000")			\
+	TEST(   op s"	r7,  #0x000af000")			\
+	TEST(   op s"	r4,  #0x00005a00")
+
+#define DATA_PROCESSING32_DM(op,val)		\
+	_DATA_PROCESSING32_DM(op,"",val)	\
+	_DATA_PROCESSING32_DM(op,"s",val)
+
+	DATA_PROCESSING32_DNM("and",0xf00f00ff)
+	DATA_PROCESSING32_NM("tst",0xf00f00ff)
+	DATA_PROCESSING32_DNM("bic",0xf00f00ff)
+	DATA_PROCESSING32_DNM("orr",0xf00f00ff)
+	DATA_PROCESSING32_DM("mov",VAL2)
+	DATA_PROCESSING32_DNM("orn",0xf00f00ff)
+	DATA_PROCESSING32_DM("mvn",VAL2)
+	DATA_PROCESSING32_DNM("eor",0xf00f00ff)
+	DATA_PROCESSING32_NM("teq",0xf00f00ff)
+	DATA_PROCESSING32_DNM("add",VAL2)
+	DATA_PROCESSING32_NM("cmn",VAL2)
+	DATA_PROCESSING32_DNM("adc",VAL2)
+	DATA_PROCESSING32_DNM("sbc",VAL2)
+	DATA_PROCESSING32_DNM("sub",VAL2)
+	DATA_PROCESSING32_NM("cmp",VAL2)
+	DATA_PROCESSING32_DNM("rsb",VAL2)
+
+	TEST_RR("pkhbt	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR("pkhbt	r14,r",12, HH1,", r",10,HH2,", lsl #2")
+	TEST_RR("pkhtb	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR("pkhtb	r14,r",12, HH1,", r",10,HH2,", asr #2")
+
+	TEST_UNSUPPORTED(".short 0xea17,0x0f0d	@ tst.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xea17,0x0f0f	@ tst.w r7, pc")
+	TEST_UNSUPPORTED(".short 0xea1d,0x0f07	@ tst.w sp, r7")
+	TEST_UNSUPPORTED(".short 0xea1f,0x0f07	@ tst.w pc, r7")
+	TEST_UNSUPPORTED(".short 0xf01d,0x1f08	@ tst sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf01f,0x1f08	@ tst pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xea97,0x0f0d	@ teq.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xea97,0x0f0f	@ teq.w r7, pc")
+	TEST_UNSUPPORTED(".short 0xea9d,0x0f07	@ teq.w sp, r7")
+	TEST_UNSUPPORTED(".short 0xea9f,0x0f07	@ teq.w pc, r7")
+	TEST_UNSUPPORTED(".short 0xf09d,0x1f08	@ tst sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf09f,0x1f08	@ tst pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xeb17,0x0f0d	@ cmn.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xeb17,0x0f0f	@ cmn.w r7, pc")
+	TEST_P("cmn.w	sp, r",7,0,"")
+	TEST_UNSUPPORTED(".short 0xeb1f,0x0f07	@ cmn.w pc, r7")
+	TEST(  "cmn	sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf11f,0x1f08	@ cmn pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xebb7,0x0f0d	@ cmp.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xebb7,0x0f0f	@ cmp.w r7, pc")
+	TEST_P("cmp.w	sp, r",7,0,"")
+	TEST_UNSUPPORTED(".short 0xebbf,0x0f07	@ cmp.w pc, r7")
+	TEST(  "cmp	sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf1bf,0x1f08	@ cmp pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xea5f,0x070d	@ movs.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xea5f,0x070f	@ movs.w r7, pc")
+	TEST_UNSUPPORTED(".short 0xea5f,0x0d07	@ movs.w sp, r7")
+	TEST_UNSUPPORTED(".short 0xea4f,0x0f07	@ mov.w  pc, r7")
+	TEST_UNSUPPORTED(".short 0xf04f,0x1d08	@ mov sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf04f,0x1f08	@ mov pc, #0x00080008")
+
+	TEST_R("add.w	r0, sp, r",1, 4,"")
+	TEST_R("adds	r0, sp, r",1, 4,", asl #3")
+	TEST_R("add	r0, sp, r",1, 4,", asl #4")
+	TEST_R("add	r0, sp, r",1, 16,", ror #1")
+	TEST_R("add.w	sp, sp, r",1, 4,"")
+	TEST_R("add	sp, sp, r",1, 4,", asl #3")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x1d01	@ add sp, sp, r1, asl #4")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0d71	@ add sp, sp, r1, ror #1")
+	TEST(  "add.w	r0, sp, #24")
+	TEST(  "add.w	sp, sp, #24")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0f01	@ add pc, sp, r1")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x000f	@ add r0, sp, pc")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x000d	@ add r0, sp, sp")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0d0f	@ add sp, sp, pc")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0d0d	@ add sp, sp, sp")
+
+	TEST_R("sub.w	r0, sp, r",1, 4,"")
+	TEST_R("subs	r0, sp, r",1, 4,", asl #3")
+	TEST_R("sub	r0, sp, r",1, 4,", asl #4")
+	TEST_R("sub	r0, sp, r",1, 16,", ror #1")
+	TEST_R("sub.w	sp, sp, r",1, 4,"")
+	TEST_R("sub	sp, sp, r",1, 4,", asl #3")
+	TEST_UNSUPPORTED(".short 0xebad,0x1d01	@ sub sp, sp, r1, asl #4")
+	TEST_UNSUPPORTED(".short 0xebad,0x0d71	@ sub sp, sp, r1, ror #1")
+	TEST_UNSUPPORTED(".short 0xebad,0x0f01	@ sub pc, sp, r1")
+	TEST(  "sub.w	r0, sp, #24")
+	TEST(  "sub.w	sp, sp, #24")
+
+	TEST_UNSUPPORTED(".short 0xea02,0x010f	@ and r1, r2, pc")
+	TEST_UNSUPPORTED(".short 0xea0f,0x0103	@ and r1, pc, r3")
+	TEST_UNSUPPORTED(".short 0xea02,0x0f03	@ and pc, r2, r3")
+	TEST_UNSUPPORTED(".short 0xea02,0x010d	@ and r1, r2, sp")
+	TEST_UNSUPPORTED(".short 0xea0d,0x0103	@ and r1, sp, r3")
+	TEST_UNSUPPORTED(".short 0xea02,0x0d03	@ and sp, r2, r3")
+	TEST_UNSUPPORTED(".short 0xf00d,0x1108	@ and r1, sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf00f,0x1108	@ and r1, pc, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf002,0x1d08	@ and sp, r8, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf002,0x1f08	@ and pc, r8, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xeb02,0x010f	@ add r1, r2, pc")
+	TEST_UNSUPPORTED(".short 0xeb0f,0x0103	@ add r1, pc, r3")
+	TEST_UNSUPPORTED(".short 0xeb02,0x0f03	@ add pc, r2, r3")
+	TEST_UNSUPPORTED(".short 0xeb02,0x010d	@ add r1, r2, sp")
+	TEST_SUPPORTED(  ".short 0xeb0d,0x0103	@ add r1, sp, r3")
+	TEST_UNSUPPORTED(".short 0xeb02,0x0d03	@ add sp, r2, r3")
+	TEST_SUPPORTED(  ".short 0xf10d,0x1108	@ add r1, sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf10d,0x1f08	@ add pc, sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf10f,0x1108	@ add r1, pc, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf102,0x1d08	@ add sp, r8, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf102,0x1f08	@ add pc, r8, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xeaa0,0x0000")
+	TEST_UNSUPPORTED(".short 0xeaf0,0x0000")
+	TEST_UNSUPPORTED(".short 0xeb20,0x0000")
+	TEST_UNSUPPORTED(".short 0xeb80,0x0000")
+	TEST_UNSUPPORTED(".short 0xebe0,0x0000")
+
+	TEST_UNSUPPORTED(".short 0xf0a0,0x0000")
+	TEST_UNSUPPORTED(".short 0xf0c0,0x0000")
+	TEST_UNSUPPORTED(".short 0xf0f0,0x0000")
+	TEST_UNSUPPORTED(".short 0xf120,0x0000")
+	TEST_UNSUPPORTED(".short 0xf180,0x0000")
+	TEST_UNSUPPORTED(".short 0xf1e0,0x0000")
+
+	TEST_GROUP("Coprocessor instructions")
+
+	TEST_UNSUPPORTED(".short 0xec00,0x0000")
+	TEST_UNSUPPORTED(".short 0xeff0,0x0000")
+	TEST_UNSUPPORTED(".short 0xfc00,0x0000")
+	TEST_UNSUPPORTED(".short 0xfff0,0x0000")
+
+	TEST_GROUP("Data-processing (plain binary immediate)")
+
+	TEST_R("addw	r0,  r",1, VAL1,", #0x123")
+	TEST(  "addw	r14, sp, #0xf5a")
+	TEST(  "addw	sp, sp, #0x20")
+	TEST(  "addw	r7,  pc, #0x888")
+	TEST_UNSUPPORTED(".short 0xf20f,0x1f20	@ addw pc, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf20d,0x1f20	@ addw pc, sp, #0x120")
+	TEST_UNSUPPORTED(".short 0xf20f,0x1d20	@ addw sp, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf200,0x1d20	@ addw sp, r0, #0x120")
+
+	TEST_R("subw	r0,  r",1, VAL1,", #0x123")
+	TEST(  "subw	r14, sp, #0xf5a")
+	TEST(  "subw	sp, sp, #0x20")
+	TEST(  "subw	r7,  pc, #0x888")
+	TEST_UNSUPPORTED(".short 0xf2af,0x1f20	@ subw pc, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf2ad,0x1f20	@ subw pc, sp, #0x120")
+	TEST_UNSUPPORTED(".short 0xf2af,0x1d20	@ subw sp, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf2a0,0x1d20	@ subw sp, r0, #0x120")
+
+	TEST("movw	r0, #0")
+	TEST("movw	r0, #0xffff")
+	TEST("movw	lr, #0xffff")
+	TEST_UNSUPPORTED(".short 0xf240,0x0d00	@ movw sp, #0")
+	TEST_UNSUPPORTED(".short 0xf240,0x0f00	@ movw pc, #0")
+
+	TEST_R("movt	r",0, VAL1,", #0")
+	TEST_R("movt	r",0, VAL2,", #0xffff")
+	TEST_R("movt	r",14,VAL1,", #0xffff")
+	TEST_UNSUPPORTED(".short 0xf2c0,0x0d00	@ movt sp, #0")
+	TEST_UNSUPPORTED(".short 0xf2c0,0x0f00	@ movt pc, #0")
+
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".short 0xf30c,0x0d17	@ ssat	sp, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf30c,0x0f17	@ ssat	pc, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf30d,0x0c17	@ ssat	r12, #24, sp")
+	TEST_UNSUPPORTED(".short 0xf30f,0x0c17	@ ssat	r12, #24, pc")
+
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".short 0xf38c,0x0d17	@ usat	sp, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf38c,0x0f17	@ usat	pc, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf38d,0x0c17	@ usat	r12, #24, sp")
+	TEST_UNSUPPORTED(".short 0xf38f,0x0c17	@ usat	r12, #24, pc")
+
+	TEST_R(     "ssat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "ssat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".short 0xf32c,0x0d0b	@ ssat16	sp, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf32c,0x0f0b	@ ssat16	pc, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf32d,0x0c0b	@ ssat16	r12, #12, sp")
+	TEST_UNSUPPORTED(".short 0xf32f,0x0c0b	@ ssat16	r12, #12, pc")
+
+	TEST_R(     "usat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "usat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".short 0xf3ac,0x0d0b	@ usat16	sp, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf3ac,0x0f0b	@ usat16	pc, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf3ad,0x0c0b	@ usat16	r12, #12, sp")
+	TEST_UNSUPPORTED(".short 0xf3af,0x0c0b	@ usat16	r12, #12, pc")
+
+	TEST_R(     "sbfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "sbfx	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "sbfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".short 0xf34c,0x2d0f	@ sbfx	sp, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf34c,0x2f0f	@ sbfx	pc, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf34d,0x2c0f	@ sbfx	r12, sp, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf34f,0x2c0f	@ sbfx	r12, pc, #8, #16")
+
+	TEST_R(     "ubfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "ubfx	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "ubfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".short 0xf3cc,0x2d0f	@ ubfx	sp, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf3cc,0x2f0f	@ ubfx	pc, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf3cd,0x2c0f	@ ubfx	r12, sp, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf3cf,0x2c0f	@ ubfx	r12, pc, #8, #16")
+
+	TEST_R(     "bfc	r",0, VAL1,", #4, #20")
+	TEST_R(     "bfc	r",14,VAL2,", #4, #20")
+	TEST_R(     "bfc	r",7, VAL1,", #0, #31")
+	TEST_R(     "bfc	r",8, VAL2,", #0, #31")
+	TEST_UNSUPPORTED(".short 0xf36f,0x0d1e	@ bfc	sp, #0, #31")
+	TEST_UNSUPPORTED(".short 0xf36f,0x0f1e	@ bfc	pc, #0, #31")
+
+	TEST_RR(    "bfi	r",0, VAL1,", r",0  , VAL2,", #0, #31")
+	TEST_RR(    "bfi	r",12,VAL1,", r",14 , VAL2,", #4, #20")
+	TEST_UNSUPPORTED(".short 0xf36e,0x1d17	@ bfi	sp, r14, #4, #20")
+	TEST_UNSUPPORTED(".short 0xf36e,0x1f17	@ bfi	pc, r14, #4, #20")
+	TEST_UNSUPPORTED(".short 0xf36d,0x1e17	@ bfi	r14, sp, #4, #20")
+
+	TEST_GROUP("Branches and miscellaneous control")
+
+CONDITION_INSTRUCTIONS(22,
+	TEST_BF("beq.w	2f")
+	TEST_BB("bne.w	2b")
+	TEST_BF("bgt.w	2f")
+	TEST_BB("blt.w	2b")
+	TEST_BF_X("bpl.w	2f",0x1000)
+)
+
+	TEST_UNSUPPORTED("msr	cpsr, r0")
+	TEST_UNSUPPORTED("msr	cpsr_f, r1")
+	TEST_UNSUPPORTED("msr	spsr, r2")
+
+	TEST_UNSUPPORTED("cpsie.w	i")
+	TEST_UNSUPPORTED("cpsid.w	i")
+	TEST_UNSUPPORTED("cps	0x13")
+
+	TEST_SUPPORTED("yield.w")
+	TEST("sev.w")
+	TEST("nop.w")
+	TEST("wfi.w")
+	TEST_SUPPORTED("wfe.w")
+	TEST_UNSUPPORTED("dbg.w	#0")
+
+	TEST_UNSUPPORTED("clrex")
+	TEST_UNSUPPORTED("dsb")
+	TEST_UNSUPPORTED("dmb")
+	TEST_UNSUPPORTED("isb")
+
+	TEST_UNSUPPORTED("bxj	r0")
+
+	TEST_UNSUPPORTED("subs	pc, lr, #4")
+
+	TEST("mrs	r0, cpsr")
+	TEST("mrs	r14, cpsr")
+	TEST_UNSUPPORTED(".short 0xf3ef,0x8d00	@ mrs	sp, spsr")
+	TEST_UNSUPPORTED(".short 0xf3ef,0x8f00	@ mrs	pc, spsr")
+	TEST_UNSUPPORTED("mrs	r0, spsr")
+	TEST_UNSUPPORTED("mrs	lr, spsr")
+
+	TEST_UNSUPPORTED(".short 0xf7f0,0x8000 @ smc #0")
+
+	TEST_UNSUPPORTED(".short 0xf7f0,0xa000 @ undefeined")
+
+	TEST_BF(  "b.w	2f")
+	TEST_BB(  "b.w	2b")
+	TEST_BF_X("b.w	2f", 0x1000)
+
+	TEST_BF(  "bl.w	2f")
+	TEST_BB(  "bl.w	2b")
+	TEST_BB_X("bl.w	2b", 0x1000)
+
+	TEST_X(	"blx	__dummy_arm_subroutine",
+		".arm				\n\t"
+		".align				\n\t"
+		".type __dummy_arm_subroutine, %%function \n\t"
+		"__dummy_arm_subroutine:	\n\t"
+		"mov	r0, pc			\n\t"
+		"bx	lr			\n\t"
+		".thumb				\n\t"
+	)
+	TEST(	"blx	__dummy_arm_subroutine")
+
+	TEST_GROUP("Store single data item")
+
+#define SINGLE_STORE(size)							\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,-1024,", #1024]")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, -1024,", #1080]")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,256,  ", #-120]")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 256,  ", #-128]")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,24,  "], #120")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 24,  "], #128")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,24,  "], #-120")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 24,  "], #-128")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,24,   ", #120]!")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 24,   ", #128]!")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,256,  ", #-120]!")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 256,  ", #-128]!")		\
+	TEST_RPR("str"size".w	r",0, VAL1,", [r",1, 0,", r",2, 4,"]")		\
+	TEST_RPR("str"size"	r",14,VAL2,", [r",10,0,", r",11,4,", lsl #1]")	\
+	TEST_R(  "str"size".w	r",7, VAL1,", [sp, #24]")			\
+	TEST_RP( "str"size".w	r",0, VAL2,", [r",0,0, "]")			\
+	TEST_UNSUPPORTED("str"size"t	r0, [r1, #4]")
+
+	SINGLE_STORE("b")
+	SINGLE_STORE("h")
+	SINGLE_STORE("")
+
+	TEST("str	sp, [sp]")
+	TEST_UNSUPPORTED(".short 0xf8cf,0xe000	@ str	r14, [pc]")
+	TEST_UNSUPPORTED(".short 0xf8ce,0xf000	@ str	pc, [r14]")
+
+	TEST_GROUP("Advanced SIMD element or structure load/store instructions")
+
+	TEST_UNSUPPORTED(".short 0xf900,0x0000")
+	TEST_UNSUPPORTED(".short 0xf92f,0xffff")
+	TEST_UNSUPPORTED(".short 0xf980,0x0000")
+	TEST_UNSUPPORTED(".short 0xf9ef,0xffff")
+
+	TEST_GROUP("Load single data item and memory hints")
+
+#define SINGLE_LOAD(size)						\
+	TEST_P( "ldr"size"	r0, [r",11,-1024, ", #1024]")		\
+	TEST_P( "ldr"size"	r14, [r",1, -1024,", #1080]")		\
+	TEST_P( "ldr"size"	r0, [r",11,256,   ", #-120]")		\
+	TEST_P( "ldr"size"	r14, [r",1, 256,  ", #-128]")		\
+	TEST_P( "ldr"size"	r0, [r",11,24,   "], #120")		\
+	TEST_P( "ldr"size"	r14, [r",1, 24,  "], #128")		\
+	TEST_P( "ldr"size"	r0, [r",11,24,   "], #-120")		\
+	TEST_P( "ldr"size"	r14, [r",1,24,   "], #-128")		\
+	TEST_P( "ldr"size"	r0, [r",11,24,    ", #120]!")		\
+	TEST_P( "ldr"size"	r14, [r",1, 24,   ", #128]!")		\
+	TEST_P( "ldr"size"	r0, [r",11,256,   ", #-120]!")		\
+	TEST_P( "ldr"size"	r14, [r",1, 256,  ", #-128]!")		\
+	TEST_PR("ldr"size".w	r0, [r",1, 0,", r",2, 4,"]")		\
+	TEST_PR("ldr"size"	r14, [r",10,0,", r",11,4,", lsl #1]")	\
+	TEST_X( "ldr"size".w	r0, 3f",				\
+		".align 3				\n\t"		\
+		"3:	.word	"__stringify(VAL1))			\
+	TEST_X( "ldr"size".w	r14, 3f",				\
+		".align 3				\n\t"		\
+		"3:	.word	"__stringify(VAL2))			\
+	TEST(   "ldr"size".w	r7, 3b")				\
+	TEST(   "ldr"size".w	r7, [sp, #24]")				\
+	TEST_P( "ldr"size".w	r0, [r",0,0, "]")			\
+	TEST_UNSUPPORTED("ldr"size"t	r0, [r1, #4]")
+
+	SINGLE_LOAD("b")
+	SINGLE_LOAD("sb")
+	SINGLE_LOAD("h")
+	SINGLE_LOAD("sh")
+	SINGLE_LOAD("")
+
+	TEST_BF_P("ldr	pc, [r",14, 15*4,"]")
+	TEST_P(   "ldr	sp, [r",14, 13*4,"]")
+	TEST_BF_R("ldr	pc, [sp, r",14, 15*4,"]")
+	TEST_R(   "ldr	sp, [sp, r",14, 13*4,"]")
+	TEST_THUMB_TO_ARM_INTERWORK_P("ldr	pc, [r",0,0,", #15*4]")
+	TEST_SUPPORTED("ldr	sp, 99f")
+	TEST_SUPPORTED("ldr	pc, 99f")
+
+	TEST_UNSUPPORTED(".short 0xf854,0x700d	@ ldr	r7, [r4, sp]")
+	TEST_UNSUPPORTED(".short 0xf854,0x700f	@ ldr	r7, [r4, pc]")
+	TEST_UNSUPPORTED(".short 0xf814,0x700d	@ ldrb	r7, [r4, sp]")
+	TEST_UNSUPPORTED(".short 0xf814,0x700f	@ ldrb	r7, [r4, pc]")
+	TEST_UNSUPPORTED(".short 0xf89f,0xd004	@ ldrb	sp, 99f")
+	TEST_UNSUPPORTED(".short 0xf814,0xd008	@ ldrb	sp, [r4, r8]")
+	TEST_UNSUPPORTED(".short 0xf894,0xd000	@ ldrb	sp, [r4]")
+
+	TEST_UNSUPPORTED(".short 0xf860,0x0000") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf9ff,0xffff") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf950,0x0000") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf95f,0xffff") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf800,0x0800") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf97f,0xfaff") /* Unallocated space */
+
+	TEST(   "pli	[pc, #4]")
+	TEST(   "pli	[pc, #-4]")
+	TEST(   "pld	[pc, #4]")
+	TEST(   "pld	[pc, #-4]")
+
+	TEST_P( "pld	[r",0,-1024,", #1024]")
+	TEST(   ".short 0xf8b0,0xf400	@ pldw	[r0, #1024]")
+	TEST_P( "pli	[r",4, 0b,", #1024]")
+	TEST_P( "pld	[r",7, 120,", #-120]")
+	TEST(   ".short 0xf837,0xfc78	@ pldw	[r7, #-120]")
+	TEST_P( "pli	[r",11,120,", #-120]")
+	TEST(   "pld	[sp, #0]")
+
+	TEST_PR("pld	[r",7, 24, ", r",0, 16,"]")
+	TEST_PR("pld	[r",8, 24, ", r",12,16,", lsl #3]")
+	TEST_SUPPORTED(".short 0xf837,0xf000	@ pldw	[r7, r0]")
+	TEST_SUPPORTED(".short 0xf838,0xf03c	@ pldw	[r8, r12, lsl #3]");
+	TEST_RR("pli	[r",12,0b,", r",0, 16,"]")
+	TEST_RR("pli	[r",0, 0b,", r",12,16,", lsl #3]")
+	TEST_R( "pld	[sp, r",1, 16,"]")
+	TEST_UNSUPPORTED(".short 0xf817,0xf00d  @pld	[r7, sp]")
+	TEST_UNSUPPORTED(".short 0xf817,0xf00f  @pld	[r7, pc]")
+
+	TEST_GROUP("Data-processing (register)")
+
+#define SHIFTS32(op)					\
+	TEST_RR(op"	r0,  r",1, VAL1,", r",2, 3, "")	\
+	TEST_RR(op"	r14, r",12,VAL2,", r",11,10,"")
+
+	SHIFTS32("lsl")
+	SHIFTS32("lsls")
+	SHIFTS32("lsr")
+	SHIFTS32("lsrs")
+	SHIFTS32("asr")
+	SHIFTS32("asrs")
+	SHIFTS32("ror")
+	SHIFTS32("rors")
+
+	TEST_UNSUPPORTED(".short 0xfa01,0xff02	@ lsl	pc, r1, r2")
+	TEST_UNSUPPORTED(".short 0xfa01,0xfd02	@ lsl	sp, r1, r2")
+	TEST_UNSUPPORTED(".short 0xfa0f,0xf002	@ lsl	r0, pc, r2")
+	TEST_UNSUPPORTED(".short 0xfa0d,0xf002	@ lsl	r0, sp, r2")
+	TEST_UNSUPPORTED(".short 0xfa01,0xf00f	@ lsl	r0, r1, pc")
+	TEST_UNSUPPORTED(".short 0xfa01,0xf00d	@ lsl	r0, r1, sp")
+
+	TEST_RR(    "sxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxth	r8, r",7,  HH1,"")
+
+	TEST_UNSUPPORTED(".short 0xfa0f,0xff87	@ sxth	pc, r7");
+	TEST_UNSUPPORTED(".short 0xfa0f,0xfd87	@ sxth	sp, r7");
+	TEST_UNSUPPORTED(".short 0xfa0f,0xf88f	@ sxth	r8, pc");
+	TEST_UNSUPPORTED(".short 0xfa0f,0xf88d	@ sxth	r8, sp");
+
+	TEST_RR(    "uxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxth	r8, r",7,  HH1,"")
+
+	TEST_RR(    "sxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb16	r8, r",7,  HH1,"")
+
+	TEST_RR(    "uxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb16	r8, r",7,  HH1,"")
+
+	TEST_RR(    "sxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb	r8, r",7,  HH1,"")
+
+	TEST_RR(    "uxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb	r8, r",7,  HH1,"")
+
+	TEST_UNSUPPORTED(".short 0xfa60,0x00f0")
+	TEST_UNSUPPORTED(".short 0xfa7f,0xffff")
+
+#define PARALLEL_ADD_SUB(op)					\
+	TEST_RR(  op"add16	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"add16	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"asx	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"asx	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"sax	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"sax	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"sub16	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"sub16	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"add8	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"add8	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"sub8	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"sub8	r14, r",12,HH2,", r",10,HH1,"")
+
+	TEST_GROUP("Parallel addition and subtraction, signed")
+
+	PARALLEL_ADD_SUB("s")
+	PARALLEL_ADD_SUB("q")
+	PARALLEL_ADD_SUB("sh")
+
+	TEST_GROUP("Parallel addition and subtraction, unsigned")
+
+	PARALLEL_ADD_SUB("u")
+	PARALLEL_ADD_SUB("uq")
+	PARALLEL_ADD_SUB("uh")
+
+	TEST_GROUP("Miscellaneous operations")
+
+	TEST_RR("qadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qadd	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_RR("qsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qsub	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_RR("qdadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qdadd	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_RR("qdsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qdsub	lr, r",9, VAL2,", r",8, VAL1,"")
+
+	TEST_R("rev.w	r0, r",0,   VAL1,"")
+	TEST_R("rev	r14, r",12, VAL2,"")
+	TEST_R("rev16.w	r0, r",0,   VAL1,"")
+	TEST_R("rev16	r14, r",12, VAL2,"")
+	TEST_R("rbit	r0, r",0,   VAL1,"")
+	TEST_R("rbit	r14, r",12, VAL2,"")
+	TEST_R("revsh.w	r0, r",0,   VAL1,"")
+	TEST_R("revsh	r14, r",12, VAL2,"")
+
+	TEST_UNSUPPORTED(".short 0xfa9c,0xff8c	@ rev	pc, r12");
+	TEST_UNSUPPORTED(".short 0xfa9c,0xfd8c	@ rev	sp, r12");
+	TEST_UNSUPPORTED(".short 0xfa9f,0xfe8f	@ rev	r14, pc");
+	TEST_UNSUPPORTED(".short 0xfa9d,0xfe8d	@ rev	r14, sp");
+
+	TEST_RR("sel	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR("sel	r14, r",12,VAL1,", r",10, VAL2,"")
+
+	TEST_R("clz	r0, r",0, 0x0,"")
+	TEST_R("clz	r7, r",14,0x1,"")
+	TEST_R("clz	lr, r",7, 0xffffffff,"")
+
+	TEST_UNSUPPORTED(".short 0xfa80,0xf030") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfaff,0xff7f") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfab0,0xf000") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfaff,0xff7f") /* Unallocated space */
+
+	TEST_GROUP("Multiply, multiply accumulate, and absolute difference operations")
+
+	TEST_RR(    "mul	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "mul	r7, r",8, VAL2,", r",9, VAL2,"")
+	TEST_UNSUPPORTED(".short 0xfb08,0xff09	@ mul	pc, r8, r9")
+	TEST_UNSUPPORTED(".short 0xfb08,0xfd09	@ mul	sp, r8, r9")
+	TEST_UNSUPPORTED(".short 0xfb0f,0xf709	@ mul	r7, pc, r9")
+	TEST_UNSUPPORTED(".short 0xfb0d,0xf709	@ mul	r7, sp, r9")
+	TEST_UNSUPPORTED(".short 0xfb08,0xf70f	@ mul	r7, r8, pc")
+	TEST_UNSUPPORTED(".short 0xfb08,0xf70d	@ mul	r7, r8, sp")
+
+	TEST_RRR(   "mla	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "mla	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_UNSUPPORTED(".short 0xfb08,0xaf09	@ mla	pc, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xad09	@ mla	sp, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb0f,0xa709	@ mla	r7, pc, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb0d,0xa709	@ mla	r7, sp, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xa70f	@ mla	r7, r8, pc, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xa70d	@ mla	r7, r8, sp, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xd709	@ mla	r7, r8, r9, sp");
+
+	TEST_RRR(   "mls	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "mls	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+
+	TEST_RRR(   "smlabb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlabb	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlatb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlatb	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlabt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlabt	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlatt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlatt	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(    "smulbb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbb	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smultb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smultb	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smulbt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbt	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smultt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smultt	r7, r",8, VAL3,", r",9, VAL1,"")
+
+	TEST_RRR(   "smlad	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlad	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RRR(   "smladx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smladx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RR(    "smuad	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smuad	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_RR(    "smuadx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smuadx	r14, r",12,HH2,", r",10,HH1,"")
+
+	TEST_RRR(   "smlawb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlawb	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlawt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlawt	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(    "smulwb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwb	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smulwt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwt	r7, r",8, VAL3,", r",9, VAL1,"")
+
+	TEST_RRR(   "smlsd	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsd	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RRR(   "smlsdx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsdx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RR(    "smusd	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smusd	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_RR(    "smusdx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smusdx	r14, r",12,HH2,", r",10,HH1,"")
+
+	TEST_RRR(   "smmla	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmla	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_RRR(   "smmlar	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmlar	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_RR(    "smmul	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "smmul	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_RR(    "smmulr	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "smmulr	r14, r",12,VAL2,", r",10,VAL1,"")
+
+	TEST_RRR(   "smmls	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmls	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_RRR(   "smmlsr	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmlsr	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+
+	TEST_RRR(   "usada8	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL3,"")
+	TEST_RRR(   "usada8	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL3,"")
+	TEST_RR(    "usad8	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "usad8	r14, r",12,VAL2,", r",10,VAL1,"")
+
+	TEST_UNSUPPORTED(".short 0xfb00,0xf010") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb0f,0xff1f") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb70,0xf010") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb7f,0xff1f") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb70,0x0010") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb7f,0xff1f") /* Unallocated space */
+
+	TEST_GROUP("Long multiply, long multiply accumulate, and divide")
+
+	TEST_RR(   "smull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(   "smull	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_UNSUPPORTED(".short 0xfb89,0xf80a	@ smull	pc, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0xd80a	@ smull	sp, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0x7f0a	@ smull	r7, pc, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0x7d0a	@ smull	r7, sp, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb8f,0x780a	@ smull	r7, r8, pc, r10");
+	TEST_UNSUPPORTED(".short 0xfb8d,0x780a	@ smull	r7, r8, sp, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0x780f	@ smull	r7, r8, r9, pc");
+	TEST_UNSUPPORTED(".short 0xfb89,0x780d	@ smull	r7, r8, r9, sp");
+
+	TEST_RR(   "umull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(   "umull	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+
+	TEST_RRRR( "smlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlal	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+
+	TEST_RRRR( "smlalbb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlalbb	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "smlalbt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlalbt	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "smlaltb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlaltb	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "smlaltt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlaltt	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+
+	TEST_RRRR( "smlald	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlald	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_RRRR( "smlaldx	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlaldx	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+
+	TEST_RRRR( "smlsld	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlsld	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_RRRR( "smlsldx	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlsldx	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+
+	TEST_RRRR( "umlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "umlal	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "umaal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "umaal	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+
+	TEST_GROUP("Coprocessor instructions")
+
+	TEST_UNSUPPORTED(".short 0xfc00,0x0000")
+	TEST_UNSUPPORTED(".short 0xffff,0xffff")
+
+	TEST_GROUP("Testing instructions in IT blocks")
+
+	TEST_ITBLOCK("sub.w	r0, r0")
+
+	verbose("\n");
+}
+

+ 1748 - 0
arch/arm/kernel/kprobes-test.c

@@ -0,0 +1,1748 @@
+/*
+ * arch/arm/kernel/kprobes-test.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * This file contains test code for ARM kprobes.
+ *
+ * The top level function run_all_tests() executes tests for all of the
+ * supported instruction sets: ARM, 16-bit Thumb, and 32-bit Thumb. These tests
+ * fall into two categories; run_api_tests() checks basic functionality of the
+ * kprobes API, and run_test_cases() is a comprehensive test for kprobes
+ * instruction decoding and simulation.
+ *
+ * run_test_cases() first checks the kprobes decoding table for self consistency
+ * (using table_test()) then executes a series of test cases for each of the CPU
+ * instruction forms. coverage_start() and coverage_end() are used to verify
+ * that these test cases cover all of the possible combinations of instructions
+ * described by the kprobes decoding tables.
+ *
+ * The individual test cases are in kprobes-test-arm.c and kprobes-test-thumb.c
+ * which use the macros defined in kprobes-test.h. The rest of this
+ * documentation will describe the operation of the framework used by these
+ * test cases.
+ */
+
+/*
+ * TESTING METHODOLOGY
+ * -------------------
+ *
+ * The methodology used to test an ARM instruction 'test_insn' is to use
+ * inline assembler like:
+ *
+ * test_before: nop
+ * test_case:	test_insn
+ * test_after:	nop
+ *
+ * When the test case is run a kprobe is placed of each nop. The
+ * post-handler of the test_before probe is used to modify the saved CPU
+ * register context to that which we require for the test case. The
+ * pre-handler of the of the test_after probe saves a copy of the CPU
+ * register context. In this way we can execute test_insn with a specific
+ * register context and see the results afterwards.
+ *
+ * To actually test the kprobes instruction emulation we perform the above
+ * step a second time but with an additional kprobe on the test_case
+ * instruction itself. If the emulation is accurate then the results seen
+ * by the test_after probe will be identical to the first run which didn't
+ * have a probe on test_case.
+ *
+ * Each test case is run several times with a variety of variations in the
+ * flags value of stored in CPSR, and for Thumb code, different ITState.
+ *
+ * For instructions which can modify PC, a second test_after probe is used
+ * like this:
+ *
+ * test_before: nop
+ * test_case:	test_insn
+ * test_after:	nop
+ *		b test_done
+ * test_after2: nop
+ * test_done:
+ *
+ * The test case is constructed such that test_insn branches to
+ * test_after2, or, if testing a conditional instruction, it may just
+ * continue to test_after. The probes inserted at both locations let us
+ * determine which happened. A similar approach is used for testing
+ * backwards branches...
+ *
+ *		b test_before
+ *		b test_done  @ helps to cope with off by 1 branches
+ * test_after2: nop
+ *		b test_done
+ * test_before: nop
+ * test_case:	test_insn
+ * test_after:	nop
+ * test_done:
+ *
+ * The macros used to generate the assembler instructions describe above
+ * are TEST_INSTRUCTION, TEST_BRANCH_F (branch forwards) and TEST_BRANCH_B
+ * (branch backwards). In these, the local variables numbered 1, 50, 2 and
+ * 99 represent: test_before, test_case, test_after2 and test_done.
+ *
+ * FRAMEWORK
+ * ---------
+ *
+ * Each test case is wrapped between the pair of macros TESTCASE_START and
+ * TESTCASE_END. As well as performing the inline assembler boilerplate,
+ * these call out to the kprobes_test_case_start() and
+ * kprobes_test_case_end() functions which drive the execution of the test
+ * case. The specific arguments to use for each test case are stored as
+ * inline data constructed using the various TEST_ARG_* macros. Putting
+ * this all together, a simple test case may look like:
+ *
+ *	TESTCASE_START("Testing mov r0, r7")
+ *	TEST_ARG_REG(7, 0x12345678) // Set r7=0x12345678
+ *	TEST_ARG_END("")
+ *	TEST_INSTRUCTION("mov r0, r7")
+ *	TESTCASE_END
+ *
+ * Note, in practice the single convenience macro TEST_R would be used for this
+ * instead.
+ *
+ * The above would expand to assembler looking something like:
+ *
+ *	@ TESTCASE_START
+ *	bl	__kprobes_test_case_start
+ *	@ start of inline data...
+ *	.ascii "mov r0, r7"	@ text title for test case
+ *	.byte	0
+ *	.align	2
+ *
+ *	@ TEST_ARG_REG
+ *	.byte	ARG_TYPE_REG
+ *	.byte	7
+ *	.short	0
+ *	.word	0x1234567
+ *
+ *	@ TEST_ARG_END
+ *	.byte	ARG_TYPE_END
+ *	.byte	TEST_ISA	@ flags, including ISA being tested
+ *	.short	50f-0f		@ offset of 'test_before'
+ *	.short	2f-0f		@ offset of 'test_after2' (if relevent)
+ *	.short	99f-0f		@ offset of 'test_done'
+ *	@ start of test case code...
+ *	0:
+ *	.code	TEST_ISA	@ switch to ISA being tested
+ *
+ *	@ TEST_INSTRUCTION
+ *	50:	nop		@ location for 'test_before' probe
+ *	1:	mov r0, r7	@ the test case instruction 'test_insn'
+ *		nop		@ location for 'test_after' probe
+ *
+ *	// TESTCASE_END
+ *	2:
+ *	99:	bl __kprobes_test_case_end_##TEST_ISA
+ *	.code	NONMAL_ISA
+ *
+ * When the above is execute the following happens...
+ *
+ * __kprobes_test_case_start() is an assembler wrapper which sets up space
+ * for a stack buffer and calls the C function kprobes_test_case_start().
+ * This C function will do some initial processing of the inline data and
+ * setup some global state. It then inserts the test_before and test_after
+ * kprobes and returns a value which causes the assembler wrapper to jump
+ * to the start of the test case code, (local label '0').
+ *
+ * When the test case code executes, the test_before probe will be hit and
+ * test_before_post_handler will call setup_test_context(). This fills the
+ * stack buffer and CPU registers with a test pattern and then processes
+ * the test case arguments. In our example there is one TEST_ARG_REG which
+ * indicates that R7 should be loaded with the value 0x12345678.
+ *
+ * When the test_before probe ends, the test case continues and executes
+ * the "mov r0, r7" instruction. It then hits the test_after probe and the
+ * pre-handler for this (test_after_pre_handler) will save a copy of the
+ * CPU register context. This should now have R0 holding the same value as
+ * R7.
+ *
+ * Finally we get to the call to __kprobes_test_case_end_{32,16}. This is
+ * an assembler wrapper which switches back to the ISA used by the test
+ * code and calls the C function kprobes_test_case_end().
+ *
+ * For each run through the test case, test_case_run_count is incremented
+ * by one. For even runs, kprobes_test_case_end() saves a copy of the
+ * register and stack buffer contents from the test case just run. It then
+ * inserts a kprobe on the test case instruction 'test_insn' and returns a
+ * value to cause the test case code to be re-run.
+ *
+ * For odd numbered runs, kprobes_test_case_end() compares the register and
+ * stack buffer contents to those that were saved on the previous even
+ * numbered run (the one without the kprobe on test_insn). These should be
+ * the same if the kprobe instruction simulation routine is correct.
+ *
+ * The pair of test case runs is repeated with different combinations of
+ * flag values in CPSR and, for Thumb, different ITState. This is
+ * controlled by test_context_cpsr().
+ *
+ * BUILDING TEST CASES
+ * -------------------
+ *
+ *
+ * As an aid to building test cases, the stack buffer is initialised with
+ * some special values:
+ *
+ *   [SP+13*4]	Contains SP+120. This can be used to test instructions
+ *		which load a value into SP.
+ *
+ *   [SP+15*4]	When testing branching instructions using TEST_BRANCH_{F,B},
+ *		this holds the target address of the branch, 'test_after2'.
+ *		This can be used to test instructions which load a PC value
+ *		from memory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kprobes.h>
+
+#include "kprobes.h"
+#include "kprobes-test.h"
+
+
+#define BENCHMARKING	1
+
+
+/*
+ * Test basic API
+ */
+
+static bool test_regs_ok;
+static int test_func_instance;
+static int pre_handler_called;
+static int post_handler_called;
+static int jprobe_func_called;
+static int kretprobe_handler_called;
+
+#define FUNC_ARG1 0x12345678
+#define FUNC_ARG2 0xabcdef
+
+
+#ifndef CONFIG_THUMB2_KERNEL
+
+long arm_func(long r0, long r1);
+
+static void __used __naked __arm_kprobes_test_func(void)
+{
+	__asm__ __volatile__ (
+		".arm					\n\t"
+		".type arm_func, %%function		\n\t"
+		"arm_func:				\n\t"
+		"adds	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+		".code "NORMAL_ISA	 /* Back to Thumb if necessary */
+		: : : "r0", "r1", "cc"
+	);
+}
+
+#else /* CONFIG_THUMB2_KERNEL */
+
+long thumb16_func(long r0, long r1);
+long thumb32even_func(long r0, long r1);
+long thumb32odd_func(long r0, long r1);
+
+static void __used __naked __thumb_kprobes_test_funcs(void)
+{
+	__asm__ __volatile__ (
+		".type thumb16_func, %%function		\n\t"
+		"thumb16_func:				\n\t"
+		"adds.n	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+
+		".align					\n\t"
+		".type thumb32even_func, %%function	\n\t"
+		"thumb32even_func:			\n\t"
+		"adds.w	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+
+		".align					\n\t"
+		"nop.n					\n\t"
+		".type thumb32odd_func, %%function	\n\t"
+		"thumb32odd_func:			\n\t"
+		"adds.w	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+
+		: : : "r0", "r1", "cc"
+	);
+}
+
+#endif /* CONFIG_THUMB2_KERNEL */
+
+
+static int call_test_func(long (*func)(long, long), bool check_test_regs)
+{
+	long ret;
+
+	++test_func_instance;
+	test_regs_ok = false;
+
+	ret = (*func)(FUNC_ARG1, FUNC_ARG2);
+	if (ret != FUNC_ARG1 + FUNC_ARG2) {
+		pr_err("FAIL: call_test_func: func returned %lx\n", ret);
+		return false;
+	}
+
+	if (check_test_regs && !test_regs_ok) {
+		pr_err("FAIL: test regs not OK\n");
+		return false;
+	}
+
+	return true;
+}
+
+static int __kprobes pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	pre_handler_called = test_func_instance;
+	if (regs->ARM_r0 == FUNC_ARG1 && regs->ARM_r1 == FUNC_ARG2)
+		test_regs_ok = true;
+	return 0;
+}
+
+static void __kprobes post_handler(struct kprobe *p, struct pt_regs *regs,
+				unsigned long flags)
+{
+	post_handler_called = test_func_instance;
+	if (regs->ARM_r0 != FUNC_ARG1 + FUNC_ARG2 || regs->ARM_r1 != FUNC_ARG2)
+		test_regs_ok = false;
+}
+
+static struct kprobe the_kprobe = {
+	.addr		= 0,
+	.pre_handler	= pre_handler,
+	.post_handler	= post_handler
+};
+
+static int test_kprobe(long (*func)(long, long))
+{
+	int ret;
+
+	the_kprobe.addr = (kprobe_opcode_t *)func;
+	ret = register_kprobe(&the_kprobe);
+	if (ret < 0) {
+		pr_err("FAIL: register_kprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = call_test_func(func, true);
+
+	unregister_kprobe(&the_kprobe);
+	the_kprobe.flags = 0; /* Clear disable flag to allow reuse */
+
+	if (!ret)
+		return -EINVAL;
+	if (pre_handler_called != test_func_instance) {
+		pr_err("FAIL: kprobe pre_handler not called\n");
+		return -EINVAL;
+	}
+	if (post_handler_called != test_func_instance) {
+		pr_err("FAIL: kprobe post_handler not called\n");
+		return -EINVAL;
+	}
+	if (!call_test_func(func, false))
+		return -EINVAL;
+	if (pre_handler_called == test_func_instance ||
+				post_handler_called == test_func_instance) {
+		pr_err("FAIL: probe called after unregistering\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void __kprobes jprobe_func(long r0, long r1)
+{
+	jprobe_func_called = test_func_instance;
+	if (r0 == FUNC_ARG1 && r1 == FUNC_ARG2)
+		test_regs_ok = true;
+	jprobe_return();
+}
+
+static struct jprobe the_jprobe = {
+	.entry		= jprobe_func,
+};
+
+static int test_jprobe(long (*func)(long, long))
+{
+	int ret;
+
+	the_jprobe.kp.addr = (kprobe_opcode_t *)func;
+	ret = register_jprobe(&the_jprobe);
+	if (ret < 0) {
+		pr_err("FAIL: register_jprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = call_test_func(func, true);
+
+	unregister_jprobe(&the_jprobe);
+	the_jprobe.kp.flags = 0; /* Clear disable flag to allow reuse */
+
+	if (!ret)
+		return -EINVAL;
+	if (jprobe_func_called != test_func_instance) {
+		pr_err("FAIL: jprobe handler function not called\n");
+		return -EINVAL;
+	}
+	if (!call_test_func(func, false))
+		return -EINVAL;
+	if (jprobe_func_called == test_func_instance) {
+		pr_err("FAIL: probe called after unregistering\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __kprobes
+kretprobe_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	kretprobe_handler_called = test_func_instance;
+	if (regs_return_value(regs) == FUNC_ARG1 + FUNC_ARG2)
+		test_regs_ok = true;
+	return 0;
+}
+
+static struct kretprobe the_kretprobe = {
+	.handler	= kretprobe_handler,
+};
+
+static int test_kretprobe(long (*func)(long, long))
+{
+	int ret;
+
+	the_kretprobe.kp.addr = (kprobe_opcode_t *)func;
+	ret = register_kretprobe(&the_kretprobe);
+	if (ret < 0) {
+		pr_err("FAIL: register_kretprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = call_test_func(func, true);
+
+	unregister_kretprobe(&the_kretprobe);
+	the_kretprobe.kp.flags = 0; /* Clear disable flag to allow reuse */
+
+	if (!ret)
+		return -EINVAL;
+	if (kretprobe_handler_called != test_func_instance) {
+		pr_err("FAIL: kretprobe handler not called\n");
+		return -EINVAL;
+	}
+	if (!call_test_func(func, false))
+		return -EINVAL;
+	if (jprobe_func_called == test_func_instance) {
+		pr_err("FAIL: kretprobe called after unregistering\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int run_api_tests(long (*func)(long, long))
+{
+	int ret;
+
+	pr_info("    kprobe\n");
+	ret = test_kprobe(func);
+	if (ret < 0)
+		return ret;
+
+	pr_info("    jprobe\n");
+	ret = test_jprobe(func);
+	if (ret < 0)
+		return ret;
+
+	pr_info("    kretprobe\n");
+	ret = test_kretprobe(func);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+
+/*
+ * Benchmarking
+ */
+
+#if BENCHMARKING
+
+static void __naked benchmark_nop(void)
+{
+	__asm__ __volatile__ (
+		"nop		\n\t"
+		"bx	lr"
+	);
+}
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define wide ".w"
+#else
+#define wide
+#endif
+
+static void __naked benchmark_pushpop1(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r3-r11,lr}  \n\t"
+		"ldmia"wide"	sp!, {r3-r11,pc}"
+	);
+}
+
+static void __naked benchmark_pushpop2(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r0-r8,lr}  \n\t"
+		"ldmia"wide"	sp!, {r0-r8,pc}"
+	);
+}
+
+static void __naked benchmark_pushpop3(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r4,lr}  \n\t"
+		"ldmia"wide"	sp!, {r4,pc}"
+	);
+}
+
+static void __naked benchmark_pushpop4(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r0,lr}  \n\t"
+		"ldmia"wide"	sp!, {r0,pc}"
+	);
+}
+
+
+#ifdef CONFIG_THUMB2_KERNEL
+
+static void __naked benchmark_pushpop_thumb(void)
+{
+	__asm__ __volatile__ (
+		"push.n	{r0-r7,lr}  \n\t"
+		"pop.n	{r0-r7,pc}"
+	);
+}
+
+#endif
+
+static int __kprobes
+benchmark_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	return 0;
+}
+
+static int benchmark(void(*fn)(void))
+{
+	unsigned n, i, t, t0;
+
+	for (n = 1000; ; n *= 2) {
+		t0 = sched_clock();
+		for (i = n; i > 0; --i)
+			fn();
+		t = sched_clock() - t0;
+		if (t >= 250000000)
+			break; /* Stop once we took more than 0.25 seconds */
+	}
+	return t / n; /* Time for one iteration in nanoseconds */
+};
+
+static int kprobe_benchmark(void(*fn)(void), unsigned offset)
+{
+	struct kprobe k = {
+		.addr		= (kprobe_opcode_t *)((uintptr_t)fn + offset),
+		.pre_handler	= benchmark_pre_handler,
+	};
+
+	int ret = register_kprobe(&k);
+	if (ret < 0) {
+		pr_err("FAIL: register_kprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = benchmark(fn);
+
+	unregister_kprobe(&k);
+	return ret;
+};
+
+struct benchmarks {
+	void		(*fn)(void);
+	unsigned	offset;
+	const char	*title;
+};
+
+static int run_benchmarks(void)
+{
+	int ret;
+	struct benchmarks list[] = {
+		{&benchmark_nop, 0, "nop"},
+		/*
+		 * benchmark_pushpop{1,3} will have the optimised
+		 * instruction emulation, whilst benchmark_pushpop{2,4} will
+		 * be the equivalent unoptimised instructions.
+		 */
+		{&benchmark_pushpop1, 0, "stmdb	sp!, {r3-r11,lr}"},
+		{&benchmark_pushpop1, 4, "ldmia	sp!, {r3-r11,pc}"},
+		{&benchmark_pushpop2, 0, "stmdb	sp!, {r0-r8,lr}"},
+		{&benchmark_pushpop2, 4, "ldmia	sp!, {r0-r8,pc}"},
+		{&benchmark_pushpop3, 0, "stmdb	sp!, {r4,lr}"},
+		{&benchmark_pushpop3, 4, "ldmia	sp!, {r4,pc}"},
+		{&benchmark_pushpop4, 0, "stmdb	sp!, {r0,lr}"},
+		{&benchmark_pushpop4, 4, "ldmia	sp!, {r0,pc}"},
+#ifdef CONFIG_THUMB2_KERNEL
+		{&benchmark_pushpop_thumb, 0, "push.n	{r0-r7,lr}"},
+		{&benchmark_pushpop_thumb, 2, "pop.n	{r0-r7,pc}"},
+#endif
+		{0}
+	};
+
+	struct benchmarks *b;
+	for (b = list; b->fn; ++b) {
+		ret = kprobe_benchmark(b->fn, b->offset);
+		if (ret < 0)
+			return ret;
+		pr_info("    %dns for kprobe %s\n", ret, b->title);
+	}
+
+	pr_info("\n");
+	return 0;
+}
+
+#endif /* BENCHMARKING */
+
+
+/*
+ * Decoding table self-consistency tests
+ */
+
+static const int decode_struct_sizes[NUM_DECODE_TYPES] = {
+	[DECODE_TYPE_TABLE]	= sizeof(struct decode_table),
+	[DECODE_TYPE_CUSTOM]	= sizeof(struct decode_custom),
+	[DECODE_TYPE_SIMULATE]	= sizeof(struct decode_simulate),
+	[DECODE_TYPE_EMULATE]	= sizeof(struct decode_emulate),
+	[DECODE_TYPE_OR]	= sizeof(struct decode_or),
+	[DECODE_TYPE_REJECT]	= sizeof(struct decode_reject)
+};
+
+static int table_iter(const union decode_item *table,
+			int (*fn)(const struct decode_header *, void *),
+			void *args)
+{
+	const struct decode_header *h = (struct decode_header *)table;
+	int result;
+
+	for (;;) {
+		enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+
+		if (type == DECODE_TYPE_END)
+			return 0;
+
+		result = fn(h, args);
+		if (result)
+			return result;
+
+		h = (struct decode_header *)
+			((uintptr_t)h + decode_struct_sizes[type]);
+
+	}
+}
+
+static int table_test_fail(const struct decode_header *h, const char* message)
+{
+
+	pr_err("FAIL: kprobes test failure \"%s\" (mask %08x, value %08x)\n",
+					message, h->mask.bits, h->value.bits);
+	return -EINVAL;
+}
+
+struct table_test_args {
+	const union decode_item *root_table;
+	u32			parent_mask;
+	u32			parent_value;
+};
+
+static int table_test_fn(const struct decode_header *h, void *args)
+{
+	struct table_test_args *a = (struct table_test_args *)args;
+	enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+
+	if (h->value.bits & ~h->mask.bits)
+		return table_test_fail(h, "Match value has bits not in mask");
+
+	if ((h->mask.bits & a->parent_mask) != a->parent_mask)
+		return table_test_fail(h, "Mask has bits not in parent mask");
+
+	if ((h->value.bits ^ a->parent_value) & a->parent_mask)
+		return table_test_fail(h, "Value is inconsistent with parent");
+
+	if (type == DECODE_TYPE_TABLE) {
+		struct decode_table *d = (struct decode_table *)h;
+		struct table_test_args args2 = *a;
+		args2.parent_mask = h->mask.bits;
+		args2.parent_value = h->value.bits;
+		return table_iter(d->table.table, table_test_fn, &args2);
+	}
+
+	return 0;
+}
+
+static int table_test(const union decode_item *table)
+{
+	struct table_test_args args = {
+		.root_table	= table,
+		.parent_mask	= 0,
+		.parent_value	= 0
+	};
+	return table_iter(args.root_table, table_test_fn, &args);
+}
+
+
+/*
+ * Decoding table test coverage analysis
+ *
+ * coverage_start() builds a coverage_table which contains a list of
+ * coverage_entry's to match each entry in the specified kprobes instruction
+ * decoding table.
+ *
+ * When test cases are run, coverage_add() is called to process each case.
+ * This looks up the corresponding entry in the coverage_table and sets it as
+ * being matched, as well as clearing the regs flag appropriate for the test.
+ *
+ * After all test cases have been run, coverage_end() is called to check that
+ * all entries in coverage_table have been matched and that all regs flags are
+ * cleared. I.e. that all possible combinations of instructions described by
+ * the kprobes decoding tables have had a test case executed for them.
+ */
+
+bool coverage_fail;
+
+#define MAX_COVERAGE_ENTRIES 256
+
+struct coverage_entry {
+	const struct decode_header	*header;
+	unsigned			regs;
+	unsigned			nesting;
+	char				matched;
+};
+
+struct coverage_table {
+	struct coverage_entry	*base;
+	unsigned		num_entries;
+	unsigned		nesting;
+};
+
+struct coverage_table coverage;
+
+#define COVERAGE_ANY_REG	(1<<0)
+#define COVERAGE_SP		(1<<1)
+#define COVERAGE_PC		(1<<2)
+#define COVERAGE_PCWB		(1<<3)
+
+static const char coverage_register_lookup[16] = {
+	[REG_TYPE_ANY]		= COVERAGE_ANY_REG | COVERAGE_SP | COVERAGE_PC,
+	[REG_TYPE_SAMEAS16]	= COVERAGE_ANY_REG,
+	[REG_TYPE_SP]		= COVERAGE_SP,
+	[REG_TYPE_PC]		= COVERAGE_PC,
+	[REG_TYPE_NOSP]		= COVERAGE_ANY_REG | COVERAGE_SP,
+	[REG_TYPE_NOSPPC]	= COVERAGE_ANY_REG | COVERAGE_SP | COVERAGE_PC,
+	[REG_TYPE_NOPC]		= COVERAGE_ANY_REG | COVERAGE_PC,
+	[REG_TYPE_NOPCWB]	= COVERAGE_ANY_REG | COVERAGE_PC | COVERAGE_PCWB,
+	[REG_TYPE_NOPCX]	= COVERAGE_ANY_REG,
+	[REG_TYPE_NOSPPCX]	= COVERAGE_ANY_REG | COVERAGE_SP,
+};
+
+unsigned coverage_start_registers(const struct decode_header *h)
+{
+	unsigned regs = 0;
+	int i;
+	for (i = 0; i < 20; i += 4) {
+		int r = (h->type_regs.bits >> (DECODE_TYPE_BITS + i)) & 0xf;
+		regs |= coverage_register_lookup[r] << i;
+	}
+	return regs;
+}
+
+static int coverage_start_fn(const struct decode_header *h, void *args)
+{
+	struct coverage_table *coverage = (struct coverage_table *)args;
+	enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+	struct coverage_entry *entry = coverage->base + coverage->num_entries;
+
+	if (coverage->num_entries == MAX_COVERAGE_ENTRIES - 1) {
+		pr_err("FAIL: Out of space for test coverage data");
+		return -ENOMEM;
+	}
+
+	++coverage->num_entries;
+
+	entry->header = h;
+	entry->regs = coverage_start_registers(h);
+	entry->nesting = coverage->nesting;
+	entry->matched = false;
+
+	if (type == DECODE_TYPE_TABLE) {
+		struct decode_table *d = (struct decode_table *)h;
+		int ret;
+		++coverage->nesting;
+		ret = table_iter(d->table.table, coverage_start_fn, coverage);
+		--coverage->nesting;
+		return ret;
+	}
+
+	return 0;
+}
+
+static int coverage_start(const union decode_item *table)
+{
+	coverage.base = kmalloc(MAX_COVERAGE_ENTRIES *
+				sizeof(struct coverage_entry), GFP_KERNEL);
+	coverage.num_entries = 0;
+	coverage.nesting = 0;
+	return table_iter(table, coverage_start_fn, &coverage);
+}
+
+static void
+coverage_add_registers(struct coverage_entry *entry, kprobe_opcode_t insn)
+{
+	int regs = entry->header->type_regs.bits >> DECODE_TYPE_BITS;
+	int i;
+	for (i = 0; i < 20; i += 4) {
+		enum decode_reg_type reg_type = (regs >> i) & 0xf;
+		int reg = (insn >> i) & 0xf;
+		int flag;
+
+		if (!reg_type)
+			continue;
+
+		if (reg == 13)
+			flag = COVERAGE_SP;
+		else if (reg == 15)
+			flag = COVERAGE_PC;
+		else
+			flag = COVERAGE_ANY_REG;
+		entry->regs &= ~(flag << i);
+
+		switch (reg_type) {
+
+		case REG_TYPE_NONE:
+		case REG_TYPE_ANY:
+		case REG_TYPE_SAMEAS16:
+			break;
+
+		case REG_TYPE_SP:
+			if (reg != 13)
+				return;
+			break;
+
+		case REG_TYPE_PC:
+			if (reg != 15)
+				return;
+			break;
+
+		case REG_TYPE_NOSP:
+			if (reg == 13)
+				return;
+			break;
+
+		case REG_TYPE_NOSPPC:
+		case REG_TYPE_NOSPPCX:
+			if (reg == 13 || reg == 15)
+				return;
+			break;
+
+		case REG_TYPE_NOPCWB:
+			if (!is_writeback(insn))
+				break;
+			if (reg == 15) {
+				entry->regs &= ~(COVERAGE_PCWB << i);
+				return;
+			}
+			break;
+
+		case REG_TYPE_NOPC:
+		case REG_TYPE_NOPCX:
+			if (reg == 15)
+				return;
+			break;
+		}
+
+	}
+}
+
+static void coverage_add(kprobe_opcode_t insn)
+{
+	struct coverage_entry *entry = coverage.base;
+	struct coverage_entry *end = coverage.base + coverage.num_entries;
+	bool matched = false;
+	unsigned nesting = 0;
+
+	for (; entry < end; ++entry) {
+		const struct decode_header *h = entry->header;
+		enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+
+		if (entry->nesting > nesting)
+			continue; /* Skip sub-table we didn't match */
+
+		if (entry->nesting < nesting)
+			break; /* End of sub-table we were scanning */
+
+		if (!matched) {
+			if ((insn & h->mask.bits) != h->value.bits)
+				continue;
+			entry->matched = true;
+		}
+
+		switch (type) {
+
+		case DECODE_TYPE_TABLE:
+			++nesting;
+			break;
+
+		case DECODE_TYPE_CUSTOM:
+		case DECODE_TYPE_SIMULATE:
+		case DECODE_TYPE_EMULATE:
+			coverage_add_registers(entry, insn);
+			return;
+
+		case DECODE_TYPE_OR:
+			matched = true;
+			break;
+
+		case DECODE_TYPE_REJECT:
+		default:
+			return;
+		}
+
+	}
+}
+
+static void coverage_end(void)
+{
+	struct coverage_entry *entry = coverage.base;
+	struct coverage_entry *end = coverage.base + coverage.num_entries;
+
+	for (; entry < end; ++entry) {
+		u32 mask = entry->header->mask.bits;
+		u32 value = entry->header->value.bits;
+
+		if (entry->regs) {
+			pr_err("FAIL: Register test coverage missing for %08x %08x (%05x)\n",
+				mask, value, entry->regs);
+			coverage_fail = true;
+		}
+		if (!entry->matched) {
+			pr_err("FAIL: Test coverage entry missing for %08x %08x\n",
+				mask, value);
+			coverage_fail = true;
+		}
+	}
+
+	kfree(coverage.base);
+}
+
+
+/*
+ * Framework for instruction set test cases
+ */
+
+void __naked __kprobes_test_case_start(void)
+{
+	__asm__ __volatile__ (
+		"stmdb	sp!, {r4-r11}				\n\t"
+		"sub	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+		"bic	r0, lr, #1  @ r0 = inline title string	\n\t"
+		"mov	r1, sp					\n\t"
+		"bl	kprobes_test_case_start			\n\t"
+		"bx	r0					\n\t"
+	);
+}
+
+#ifndef CONFIG_THUMB2_KERNEL
+
+void __naked __kprobes_test_case_end_32(void)
+{
+	__asm__ __volatile__ (
+		"mov	r4, lr					\n\t"
+		"bl	kprobes_test_case_end			\n\t"
+		"cmp	r0, #0					\n\t"
+		"movne	pc, r0					\n\t"
+		"mov	r0, r4					\n\t"
+		"add	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+		"ldmia	sp!, {r4-r11}				\n\t"
+		"mov	pc, r0					\n\t"
+	);
+}
+
+#else /* CONFIG_THUMB2_KERNEL */
+
+void __naked __kprobes_test_case_end_16(void)
+{
+	__asm__ __volatile__ (
+		"mov	r4, lr					\n\t"
+		"bl	kprobes_test_case_end			\n\t"
+		"cmp	r0, #0					\n\t"
+		"bxne	r0					\n\t"
+		"mov	r0, r4					\n\t"
+		"add	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+		"ldmia	sp!, {r4-r11}				\n\t"
+		"bx	r0					\n\t"
+	);
+}
+
+void __naked __kprobes_test_case_end_32(void)
+{
+	__asm__ __volatile__ (
+		".arm						\n\t"
+		"orr	lr, lr, #1  @ will return to Thumb code	\n\t"
+		"ldr	pc, 1f					\n\t"
+		"1:						\n\t"
+		".word	__kprobes_test_case_end_16		\n\t"
+	);
+}
+
+#endif
+
+
+int kprobe_test_flags;
+int kprobe_test_cc_position;
+
+static int test_try_count;
+static int test_pass_count;
+static int test_fail_count;
+
+static struct pt_regs initial_regs;
+static struct pt_regs expected_regs;
+static struct pt_regs result_regs;
+
+static u32 expected_memory[TEST_MEMORY_SIZE/sizeof(u32)];
+
+static const char *current_title;
+static struct test_arg *current_args;
+static u32 *current_stack;
+static uintptr_t current_branch_target;
+
+static uintptr_t current_code_start;
+static kprobe_opcode_t current_instruction;
+
+
+#define TEST_CASE_PASSED -1
+#define TEST_CASE_FAILED -2
+
+static int test_case_run_count;
+static bool test_case_is_thumb;
+static int test_instance;
+
+/*
+ * We ignore the state of the imprecise abort disable flag (CPSR.A) because this
+ * can change randomly as the kernel doesn't take care to preserve or initialise
+ * this across context switches. Also, with Security Extentions, the flag may
+ * not be under control of the kernel; for this reason we ignore the state of
+ * the FIQ disable flag CPSR.F as well.
+ */
+#define PSR_IGNORE_BITS (PSR_A_BIT | PSR_F_BIT)
+
+static unsigned long test_check_cc(int cc, unsigned long cpsr)
+{
+	unsigned long temp;
+
+	switch (cc) {
+	case 0x0: /* eq */
+		return cpsr & PSR_Z_BIT;
+
+	case 0x1: /* ne */
+		return (~cpsr) & PSR_Z_BIT;
+
+	case 0x2: /* cs */
+		return cpsr & PSR_C_BIT;
+
+	case 0x3: /* cc */
+		return (~cpsr) & PSR_C_BIT;
+
+	case 0x4: /* mi */
+		return cpsr & PSR_N_BIT;
+
+	case 0x5: /* pl */
+		return (~cpsr) & PSR_N_BIT;
+
+	case 0x6: /* vs */
+		return cpsr & PSR_V_BIT;
+
+	case 0x7: /* vc */
+		return (~cpsr) & PSR_V_BIT;
+
+	case 0x8: /* hi */
+		cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+		return cpsr & PSR_C_BIT;
+
+	case 0x9: /* ls */
+		cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+		return (~cpsr) & PSR_C_BIT;
+
+	case 0xa: /* ge */
+		cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		return (~cpsr) & PSR_N_BIT;
+
+	case 0xb: /* lt */
+		cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		return cpsr & PSR_N_BIT;
+
+	case 0xc: /* gt */
+		temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		temp |= (cpsr << 1);	   /* PSR_N_BIT |= PSR_Z_BIT */
+		return (~temp) & PSR_N_BIT;
+
+	case 0xd: /* le */
+		temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		temp |= (cpsr << 1);	   /* PSR_N_BIT |= PSR_Z_BIT */
+		return temp & PSR_N_BIT;
+
+	case 0xe: /* al */
+	case 0xf: /* unconditional */
+		return true;
+	}
+	BUG();
+	return false;
+}
+
+static int is_last_scenario;
+static int probe_should_run; /* 0 = no, 1 = yes, -1 = unknown */
+static int memory_needs_checking;
+
+static unsigned long test_context_cpsr(int scenario)
+{
+	unsigned long cpsr;
+
+	probe_should_run = 1;
+
+	/* Default case is that we cycle through 16 combinations of flags */
+	cpsr  = (scenario & 0xf) << 28; /* N,Z,C,V flags */
+	cpsr |= (scenario & 0xf) << 16; /* GE flags */
+	cpsr |= (scenario & 0x1) << 27; /* Toggle Q flag */
+
+	if (!test_case_is_thumb) {
+		/* Testing ARM code */
+		probe_should_run = test_check_cc(current_instruction >> 28, cpsr) != 0;
+		if (scenario == 15)
+			is_last_scenario = true;
+
+	} else if (kprobe_test_flags & TEST_FLAG_NO_ITBLOCK) {
+		/* Testing Thumb code without setting ITSTATE */
+		if (kprobe_test_cc_position) {
+			int cc = (current_instruction >> kprobe_test_cc_position) & 0xf;
+			probe_should_run = test_check_cc(cc, cpsr) != 0;
+		}
+
+		if (scenario == 15)
+			is_last_scenario = true;
+
+	} else if (kprobe_test_flags & TEST_FLAG_FULL_ITBLOCK) {
+		/* Testing Thumb code with all combinations of ITSTATE */
+		unsigned x = (scenario >> 4);
+		unsigned cond_base = x % 7; /* ITSTATE<7:5> */
+		unsigned mask = x / 7 + 2;  /* ITSTATE<4:0>, bits reversed */
+
+		if (mask > 0x1f) {
+			/* Finish by testing state from instruction 'itt al' */
+			cond_base = 7;
+			mask = 0x4;
+			if ((scenario & 0xf) == 0xf)
+				is_last_scenario = true;
+		}
+
+		cpsr |= cond_base << 13;	/* ITSTATE<7:5> */
+		cpsr |= (mask & 0x1) << 12;	/* ITSTATE<4> */
+		cpsr |= (mask & 0x2) << 10;	/* ITSTATE<3> */
+		cpsr |= (mask & 0x4) << 8;	/* ITSTATE<2> */
+		cpsr |= (mask & 0x8) << 23;	/* ITSTATE<1> */
+		cpsr |= (mask & 0x10) << 21;	/* ITSTATE<0> */
+
+		probe_should_run = test_check_cc((cpsr >> 12) & 0xf, cpsr) != 0;
+
+	} else {
+		/* Testing Thumb code with several combinations of ITSTATE */
+		switch (scenario) {
+		case 16: /* Clear NZCV flags and 'it eq' state (false as Z=0) */
+			cpsr = 0x00000800;
+			probe_should_run = 0;
+			break;
+		case 17: /* Set NZCV flags and 'it vc' state (false as V=1) */
+			cpsr = 0xf0007800;
+			probe_should_run = 0;
+			break;
+		case 18: /* Clear NZCV flags and 'it ls' state (true as C=0) */
+			cpsr = 0x00009800;
+			break;
+		case 19: /* Set NZCV flags and 'it cs' state (true as C=1) */
+			cpsr = 0xf0002800;
+			is_last_scenario = true;
+			break;
+		}
+	}
+
+	return cpsr;
+}
+
+static void setup_test_context(struct pt_regs *regs)
+{
+	int scenario = test_case_run_count>>1;
+	unsigned long val;
+	struct test_arg *args;
+	int i;
+
+	is_last_scenario = false;
+	memory_needs_checking = false;
+
+	/* Initialise test memory on stack */
+	val = (scenario & 1) ? VALM : ~VALM;
+	for (i = 0; i < TEST_MEMORY_SIZE / sizeof(current_stack[0]); ++i)
+		current_stack[i] = val + (i << 8);
+	/* Put target of branch on stack for tests which load PC from memory */
+	if (current_branch_target)
+		current_stack[15] = current_branch_target;
+	/* Put a value for SP on stack for tests which load SP from memory */
+	current_stack[13] = (u32)current_stack + 120;
+
+	/* Initialise register values to their default state */
+	val = (scenario & 2) ? VALR : ~VALR;
+	for (i = 0; i < 13; ++i)
+		regs->uregs[i] = val ^ (i << 8);
+	regs->ARM_lr = val ^ (14 << 8);
+	regs->ARM_cpsr &= ~(APSR_MASK | PSR_IT_MASK);
+	regs->ARM_cpsr |= test_context_cpsr(scenario);
+
+	/* Perform testcase specific register setup  */
+	args = current_args;
+	for (; args[0].type != ARG_TYPE_END; ++args)
+		switch (args[0].type) {
+		case ARG_TYPE_REG: {
+			struct test_arg_regptr *arg =
+				(struct test_arg_regptr *)args;
+			regs->uregs[arg->reg] = arg->val;
+			break;
+		}
+		case ARG_TYPE_PTR: {
+			struct test_arg_regptr *arg =
+				(struct test_arg_regptr *)args;
+			regs->uregs[arg->reg] =
+				(unsigned long)current_stack + arg->val;
+			memory_needs_checking = true;
+			break;
+		}
+		case ARG_TYPE_MEM: {
+			struct test_arg_mem *arg = (struct test_arg_mem *)args;
+			current_stack[arg->index] = arg->val;
+			break;
+		}
+		default:
+			break;
+		}
+}
+
+struct test_probe {
+	struct kprobe	kprobe;
+	bool		registered;
+	int		hit;
+};
+
+static void unregister_test_probe(struct test_probe *probe)
+{
+	if (probe->registered) {
+		unregister_kprobe(&probe->kprobe);
+		probe->kprobe.flags = 0; /* Clear disable flag to allow reuse */
+	}
+	probe->registered = false;
+}
+
+static int register_test_probe(struct test_probe *probe)
+{
+	int ret;
+
+	if (probe->registered)
+		BUG();
+
+	ret = register_kprobe(&probe->kprobe);
+	if (ret >= 0) {
+		probe->registered = true;
+		probe->hit = -1;
+	}
+	return ret;
+}
+
+static int __kprobes
+test_before_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	container_of(p, struct test_probe, kprobe)->hit = test_instance;
+	return 0;
+}
+
+static void __kprobes
+test_before_post_handler(struct kprobe *p, struct pt_regs *regs,
+							unsigned long flags)
+{
+	setup_test_context(regs);
+	initial_regs = *regs;
+	initial_regs.ARM_cpsr &= ~PSR_IGNORE_BITS;
+}
+
+static int __kprobes
+test_case_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	container_of(p, struct test_probe, kprobe)->hit = test_instance;
+	return 0;
+}
+
+static int __kprobes
+test_after_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	if (container_of(p, struct test_probe, kprobe)->hit == test_instance)
+		return 0; /* Already run for this test instance */
+
+	result_regs = *regs;
+	result_regs.ARM_cpsr &= ~PSR_IGNORE_BITS;
+
+	/* Undo any changes done to SP by the test case */
+	regs->ARM_sp = (unsigned long)current_stack;
+
+	container_of(p, struct test_probe, kprobe)->hit = test_instance;
+	return 0;
+}
+
+static struct test_probe test_before_probe = {
+	.kprobe.pre_handler	= test_before_pre_handler,
+	.kprobe.post_handler	= test_before_post_handler,
+};
+
+static struct test_probe test_case_probe = {
+	.kprobe.pre_handler	= test_case_pre_handler,
+};
+
+static struct test_probe test_after_probe = {
+	.kprobe.pre_handler	= test_after_pre_handler,
+};
+
+static struct test_probe test_after2_probe = {
+	.kprobe.pre_handler	= test_after_pre_handler,
+};
+
+static void test_case_cleanup(void)
+{
+	unregister_test_probe(&test_before_probe);
+	unregister_test_probe(&test_case_probe);
+	unregister_test_probe(&test_after_probe);
+	unregister_test_probe(&test_after2_probe);
+}
+
+static void print_registers(struct pt_regs *regs)
+{
+	pr_err("r0  %08lx | r1  %08lx | r2  %08lx | r3  %08lx\n",
+		regs->ARM_r0, regs->ARM_r1, regs->ARM_r2, regs->ARM_r3);
+	pr_err("r4  %08lx | r5  %08lx | r6  %08lx | r7  %08lx\n",
+		regs->ARM_r4, regs->ARM_r5, regs->ARM_r6, regs->ARM_r7);
+	pr_err("r8  %08lx | r9  %08lx | r10 %08lx | r11 %08lx\n",
+		regs->ARM_r8, regs->ARM_r9, regs->ARM_r10, regs->ARM_fp);
+	pr_err("r12 %08lx | sp  %08lx | lr  %08lx | pc  %08lx\n",
+		regs->ARM_ip, regs->ARM_sp, regs->ARM_lr, regs->ARM_pc);
+	pr_err("cpsr %08lx\n", regs->ARM_cpsr);
+}
+
+static void print_memory(u32 *mem, size_t size)
+{
+	int i;
+	for (i = 0; i < size / sizeof(u32); i += 4)
+		pr_err("%08x %08x %08x %08x\n", mem[i], mem[i+1],
+						mem[i+2], mem[i+3]);
+}
+
+static size_t expected_memory_size(u32 *sp)
+{
+	size_t size = sizeof(expected_memory);
+	int offset = (uintptr_t)sp - (uintptr_t)current_stack;
+	if (offset > 0)
+		size -= offset;
+	return size;
+}
+
+static void test_case_failed(const char *message)
+{
+	test_case_cleanup();
+
+	pr_err("FAIL: %s\n", message);
+	pr_err("FAIL: Test %s\n", current_title);
+	pr_err("FAIL: Scenario %d\n", test_case_run_count >> 1);
+}
+
+static unsigned long next_instruction(unsigned long pc)
+{
+#ifdef CONFIG_THUMB2_KERNEL
+	if ((pc & 1) && !is_wide_instruction(*(u16 *)(pc - 1)))
+		return pc + 2;
+	else
+#endif
+	return pc + 4;
+}
+
+static uintptr_t __used kprobes_test_case_start(const char *title, void *stack)
+{
+	struct test_arg *args;
+	struct test_arg_end *end_arg;
+	unsigned long test_code;
+
+	args = (struct test_arg *)PTR_ALIGN(title + strlen(title) + 1, 4);
+
+	current_title = title;
+	current_args = args;
+	current_stack = stack;
+
+	++test_try_count;
+
+	while (args->type != ARG_TYPE_END)
+		++args;
+	end_arg = (struct test_arg_end *)args;
+
+	test_code = (unsigned long)(args + 1); /* Code starts after args */
+
+	test_case_is_thumb = end_arg->flags & ARG_FLAG_THUMB;
+	if (test_case_is_thumb)
+		test_code |= 1;
+
+	current_code_start = test_code;
+
+	current_branch_target = 0;
+	if (end_arg->branch_offset != end_arg->end_offset)
+		current_branch_target = test_code + end_arg->branch_offset;
+
+	test_code += end_arg->code_offset;
+	test_before_probe.kprobe.addr = (kprobe_opcode_t *)test_code;
+
+	test_code = next_instruction(test_code);
+	test_case_probe.kprobe.addr = (kprobe_opcode_t *)test_code;
+
+	if (test_case_is_thumb) {
+		u16 *p = (u16 *)(test_code & ~1);
+		current_instruction = p[0];
+		if (is_wide_instruction(current_instruction)) {
+			current_instruction <<= 16;
+			current_instruction |= p[1];
+		}
+	} else {
+		current_instruction = *(u32 *)test_code;
+	}
+
+	if (current_title[0] == '.')
+		verbose("%s\n", current_title);
+	else
+		verbose("%s\t@ %0*x\n", current_title,
+					test_case_is_thumb ? 4 : 8,
+					current_instruction);
+
+	test_code = next_instruction(test_code);
+	test_after_probe.kprobe.addr = (kprobe_opcode_t *)test_code;
+
+	if (kprobe_test_flags & TEST_FLAG_NARROW_INSTR) {
+		if (!test_case_is_thumb ||
+			is_wide_instruction(current_instruction)) {
+				test_case_failed("expected 16-bit instruction");
+				goto fail;
+		}
+	} else {
+		if (test_case_is_thumb &&
+			!is_wide_instruction(current_instruction)) {
+				test_case_failed("expected 32-bit instruction");
+				goto fail;
+		}
+	}
+
+	coverage_add(current_instruction);
+
+	if (end_arg->flags & ARG_FLAG_UNSUPPORTED) {
+		if (register_test_probe(&test_case_probe) < 0)
+			goto pass;
+		test_case_failed("registered probe for unsupported instruction");
+		goto fail;
+	}
+
+	if (end_arg->flags & ARG_FLAG_SUPPORTED) {
+		if (register_test_probe(&test_case_probe) >= 0)
+			goto pass;
+		test_case_failed("couldn't register probe for supported instruction");
+		goto fail;
+	}
+
+	if (register_test_probe(&test_before_probe) < 0) {
+		test_case_failed("register test_before_probe failed");
+		goto fail;
+	}
+	if (register_test_probe(&test_after_probe) < 0) {
+		test_case_failed("register test_after_probe failed");
+		goto fail;
+	}
+	if (current_branch_target) {
+		test_after2_probe.kprobe.addr =
+				(kprobe_opcode_t *)current_branch_target;
+		if (register_test_probe(&test_after2_probe) < 0) {
+			test_case_failed("register test_after2_probe failed");
+			goto fail;
+		}
+	}
+
+	/* Start first run of test case */
+	test_case_run_count = 0;
+	++test_instance;
+	return current_code_start;
+pass:
+	test_case_run_count = TEST_CASE_PASSED;
+	return (uintptr_t)test_after_probe.kprobe.addr;
+fail:
+	test_case_run_count = TEST_CASE_FAILED;
+	return (uintptr_t)test_after_probe.kprobe.addr;
+}
+
+static bool check_test_results(void)
+{
+	size_t mem_size = 0;
+	u32 *mem = 0;
+
+	if (memcmp(&expected_regs, &result_regs, sizeof(expected_regs))) {
+		test_case_failed("registers differ");
+		goto fail;
+	}
+
+	if (memory_needs_checking) {
+		mem = (u32 *)result_regs.ARM_sp;
+		mem_size = expected_memory_size(mem);
+		if (memcmp(expected_memory, mem, mem_size)) {
+			test_case_failed("test memory differs");
+			goto fail;
+		}
+	}
+
+	return true;
+
+fail:
+	pr_err("initial_regs:\n");
+	print_registers(&initial_regs);
+	pr_err("expected_regs:\n");
+	print_registers(&expected_regs);
+	pr_err("result_regs:\n");
+	print_registers(&result_regs);
+
+	if (mem) {
+		pr_err("current_stack=%p\n", current_stack);
+		pr_err("expected_memory:\n");
+		print_memory(expected_memory, mem_size);
+		pr_err("result_memory:\n");
+		print_memory(mem, mem_size);
+	}
+
+	return false;
+}
+
+static uintptr_t __used kprobes_test_case_end(void)
+{
+	if (test_case_run_count < 0) {
+		if (test_case_run_count == TEST_CASE_PASSED)
+			/* kprobes_test_case_start did all the needed testing */
+			goto pass;
+		else
+			/* kprobes_test_case_start failed */
+			goto fail;
+	}
+
+	if (test_before_probe.hit != test_instance) {
+		test_case_failed("test_before_handler not run");
+		goto fail;
+	}
+
+	if (test_after_probe.hit != test_instance &&
+				test_after2_probe.hit != test_instance) {
+		test_case_failed("test_after_handler not run");
+		goto fail;
+	}
+
+	/*
+	 * Even numbered test runs ran without a probe on the test case so
+	 * we can gather reference results. The subsequent odd numbered run
+	 * will have the probe inserted.
+	*/
+	if ((test_case_run_count & 1) == 0) {
+		/* Save results from run without probe */
+		u32 *mem = (u32 *)result_regs.ARM_sp;
+		expected_regs = result_regs;
+		memcpy(expected_memory, mem, expected_memory_size(mem));
+
+		/* Insert probe onto test case instruction */
+		if (register_test_probe(&test_case_probe) < 0) {
+			test_case_failed("register test_case_probe failed");
+			goto fail;
+		}
+	} else {
+		/* Check probe ran as expected */
+		if (probe_should_run == 1) {
+			if (test_case_probe.hit != test_instance) {
+				test_case_failed("test_case_handler not run");
+				goto fail;
+			}
+		} else if (probe_should_run == 0) {
+			if (test_case_probe.hit == test_instance) {
+				test_case_failed("test_case_handler ran");
+				goto fail;
+			}
+		}
+
+		/* Remove probe for any subsequent reference run */
+		unregister_test_probe(&test_case_probe);
+
+		if (!check_test_results())
+			goto fail;
+
+		if (is_last_scenario)
+			goto pass;
+	}
+
+	/* Do next test run */
+	++test_case_run_count;
+	++test_instance;
+	return current_code_start;
+fail:
+	++test_fail_count;
+	goto end;
+pass:
+	++test_pass_count;
+end:
+	test_case_cleanup();
+	return 0;
+}
+
+
+/*
+ * Top level test functions
+ */
+
+static int run_test_cases(void (*tests)(void), const union decode_item *table)
+{
+	int ret;
+
+	pr_info("    Check decoding tables\n");
+	ret = table_test(table);
+	if (ret)
+		return ret;
+
+	pr_info("    Run test cases\n");
+	ret = coverage_start(table);
+	if (ret)
+		return ret;
+
+	tests();
+
+	coverage_end();
+	return 0;
+}
+
+
+static int __init run_all_tests(void)
+{
+	int ret = 0;
+
+	pr_info("Begining kprobe tests...\n");
+
+#ifndef CONFIG_THUMB2_KERNEL
+
+	pr_info("Probe ARM code\n");
+	ret = run_api_tests(arm_func);
+	if (ret)
+		goto out;
+
+	pr_info("ARM instruction simulation\n");
+	ret = run_test_cases(kprobe_arm_test_cases, kprobe_decode_arm_table);
+	if (ret)
+		goto out;
+
+#else /* CONFIG_THUMB2_KERNEL */
+
+	pr_info("Probe 16-bit Thumb code\n");
+	ret = run_api_tests(thumb16_func);
+	if (ret)
+		goto out;
+
+	pr_info("Probe 32-bit Thumb code, even halfword\n");
+	ret = run_api_tests(thumb32even_func);
+	if (ret)
+		goto out;
+
+	pr_info("Probe 32-bit Thumb code, odd halfword\n");
+	ret = run_api_tests(thumb32odd_func);
+	if (ret)
+		goto out;
+
+	pr_info("16-bit Thumb instruction simulation\n");
+	ret = run_test_cases(kprobe_thumb16_test_cases,
+				kprobe_decode_thumb16_table);
+	if (ret)
+		goto out;
+
+	pr_info("32-bit Thumb instruction simulation\n");
+	ret = run_test_cases(kprobe_thumb32_test_cases,
+				kprobe_decode_thumb32_table);
+	if (ret)
+		goto out;
+#endif
+
+	pr_info("Total instruction simulation tests=%d, pass=%d fail=%d\n",
+		test_try_count, test_pass_count, test_fail_count);
+	if (test_fail_count) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+#if BENCHMARKING
+	pr_info("Benchmarks\n");
+	ret = run_benchmarks();
+	if (ret)
+		goto out;
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	/* We are able to run all test cases so coverage should be complete */
+	if (coverage_fail) {
+		pr_err("FAIL: Test coverage checks failed\n");
+		ret = -EINVAL;
+		goto out;
+	}
+#endif
+
+out:
+	if (ret == 0)
+		pr_info("Finished kprobe tests OK\n");
+	else
+		pr_err("kprobe tests failed\n");
+
+	return ret;
+}
+
+
+/*
+ * Module setup
+ */
+
+#ifdef MODULE
+
+static void __exit kprobe_test_exit(void)
+{
+}
+
+module_init(run_all_tests)
+module_exit(kprobe_test_exit)
+MODULE_LICENSE("GPL");
+
+#else /* !MODULE */
+
+late_initcall(run_all_tests);
+
+#endif

+ 392 - 0
arch/arm/kernel/kprobes-test.h

@@ -0,0 +1,392 @@
+/*
+ * arch/arm/kernel/kprobes-test.h
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define VERBOSE 0 /* Set to '1' for more logging of test cases */
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define NORMAL_ISA "16"
+#else
+#define NORMAL_ISA "32"
+#endif
+
+
+/* Flags used in kprobe_test_flags */
+#define TEST_FLAG_NO_ITBLOCK	(1<<0)
+#define TEST_FLAG_FULL_ITBLOCK	(1<<1)
+#define TEST_FLAG_NARROW_INSTR	(1<<2)
+
+extern int kprobe_test_flags;
+extern int kprobe_test_cc_position;
+
+
+#define TEST_MEMORY_SIZE 256
+
+
+/*
+ * Test case structures.
+ *
+ * The arguments given to test cases can be one of three types.
+ *
+ *   ARG_TYPE_REG
+ *	Load a register with the given value.
+ *
+ *   ARG_TYPE_PTR
+ *	Load a register with a pointer into the stack buffer (SP + given value).
+ *
+ *   ARG_TYPE_MEM
+ *	Store the given value into the stack buffer at [SP+index].
+ *
+ */
+
+#define	ARG_TYPE_END	0
+#define	ARG_TYPE_REG	1
+#define	ARG_TYPE_PTR	2
+#define	ARG_TYPE_MEM	3
+
+#define ARG_FLAG_UNSUPPORTED	0x01
+#define ARG_FLAG_SUPPORTED	0x02
+#define ARG_FLAG_THUMB		0x10	/* Must be 16 so TEST_ISA can be used */
+#define ARG_FLAG_ARM		0x20	/* Must be 32 so TEST_ISA can be used */
+
+struct test_arg {
+	u8	type;		/* ARG_TYPE_x */
+	u8	_padding[7];
+};
+
+struct test_arg_regptr {
+	u8	type;		/* ARG_TYPE_REG or ARG_TYPE_PTR */
+	u8	reg;
+	u8	_padding[2];
+	u32	val;
+};
+
+struct test_arg_mem {
+	u8	type;		/* ARG_TYPE_MEM */
+	u8	index;
+	u8	_padding[2];
+	u32	val;
+};
+
+struct test_arg_end {
+	u8	type;		/* ARG_TYPE_END */
+	u8	flags;		/* ARG_FLAG_x */
+	u16	code_offset;
+	u16	branch_offset;
+	u16	end_offset;
+};
+
+
+/*
+ * Building blocks for test cases.
+ *
+ * Each test case is wrapped between TESTCASE_START and TESTCASE_END.
+ *
+ * To specify arguments for a test case the TEST_ARG_{REG,PTR,MEM} macros are
+ * used followed by a terminating TEST_ARG_END.
+ *
+ * After this, the instruction to be tested is defined with TEST_INSTRUCTION.
+ * Or for branches, TEST_BRANCH_B and TEST_BRANCH_F (branch forwards/backwards).
+ *
+ * Some specific test cases may make use of other custom constructs.
+ */
+
+#if VERBOSE
+#define verbose(fmt, ...) pr_info(fmt, ##__VA_ARGS__)
+#else
+#define verbose(fmt, ...)
+#endif
+
+#define TEST_GROUP(title)					\
+	verbose("\n");						\
+	verbose(title"\n");					\
+	verbose("---------------------------------------------------------\n");
+
+#define TESTCASE_START(title)					\
+	__asm__ __volatile__ (					\
+	"bl	__kprobes_test_case_start		\n\t"	\
+	/* don't use .asciz here as 'title' may be */		\
+	/* multiple strings to be concatenated.  */		\
+	".ascii "#title"				\n\t"	\
+	".byte	0					\n\t"	\
+	".align	2					\n\t"
+
+#define	TEST_ARG_REG(reg, val)					\
+	".byte	"__stringify(ARG_TYPE_REG)"		\n\t"	\
+	".byte	"#reg"					\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
+#define	TEST_ARG_PTR(reg, val)					\
+	".byte	"__stringify(ARG_TYPE_PTR)"		\n\t"	\
+	".byte	"#reg"					\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
+#define	TEST_ARG_MEM(index, val)				\
+	".byte	"__stringify(ARG_TYPE_MEM)"		\n\t"	\
+	".byte	"#index"				\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
+#define	TEST_ARG_END(flags)					\
+	".byte	"__stringify(ARG_TYPE_END)"		\n\t"	\
+	".byte	"TEST_ISA flags"			\n\t"	\
+	".short	50f-0f					\n\t"	\
+	".short	2f-0f					\n\t"	\
+	".short	99f-0f					\n\t"	\
+	".code "TEST_ISA"				\n\t"	\
+	"0:						\n\t"
+
+#define TEST_INSTRUCTION(instruction)				\
+	"50:	nop					\n\t"	\
+	"1:	"instruction"				\n\t"	\
+	"	nop					\n\t"
+
+#define TEST_BRANCH_F(instruction, xtra_dist)			\
+	TEST_INSTRUCTION(instruction)				\
+	".if "#xtra_dist"				\n\t"	\
+	"	b	99f				\n\t"	\
+	".space "#xtra_dist"				\n\t"	\
+	".endif						\n\t"	\
+	"	b	99f				\n\t"	\
+	"2:	nop					\n\t"
+
+#define TEST_BRANCH_B(instruction, xtra_dist)			\
+	"	b	50f				\n\t"	\
+	"	b	99f				\n\t"	\
+	"2:	nop					\n\t"	\
+	"	b	99f				\n\t"	\
+	".if "#xtra_dist"				\n\t"	\
+	".space "#xtra_dist"				\n\t"	\
+	".endif						\n\t"	\
+	TEST_INSTRUCTION(instruction)
+
+#define TESTCASE_END						\
+	"2:						\n\t"	\
+	"99:						\n\t"	\
+	"	bl __kprobes_test_case_end_"TEST_ISA"	\n\t"	\
+	".code "NORMAL_ISA"				\n\t"	\
+	: :							\
+	: "r0", "r1", "r2", "r3", "ip", "lr", "memory", "cc"	\
+	);
+
+
+/*
+ * Macros to define test cases.
+ *
+ * Those of the form TEST_{R,P,M}* can be used to define test cases
+ * which take combinations of the three basic types of arguments. E.g.
+ *
+ *   TEST_R	One register argument
+ *   TEST_RR	Two register arguments
+ *   TEST_RPR	A register, a pointer, then a register argument
+ *
+ * For testing instructions which may branch, there are macros TEST_BF_*
+ * and TEST_BB_* for branching forwards and backwards.
+ *
+ * TEST_SUPPORTED and TEST_UNSUPPORTED don't cause the code to be executed,
+ * the just verify that a kprobe is or is not allowed on the given instruction.
+ */
+
+#define TEST(code)				\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_INSTRUCTION(code)			\
+	TESTCASE_END
+
+#define TEST_UNSUPPORTED(code)					\
+	TESTCASE_START(code)					\
+	TEST_ARG_END("|"__stringify(ARG_FLAG_UNSUPPORTED))	\
+	TEST_INSTRUCTION(code)					\
+	TESTCASE_END
+
+#define TEST_SUPPORTED(code)					\
+	TESTCASE_START(code)					\
+	TEST_ARG_END("|"__stringify(ARG_FLAG_SUPPORTED))	\
+	TEST_INSTRUCTION(code)					\
+	TESTCASE_END
+
+#define TEST_R(code1, reg, val, code2)			\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_INSTRUCTION(code1 #reg code2)		\
+	TESTCASE_END
+
+#define TEST_RR(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_REG(reg1, val1)				\
+	TEST_ARG_REG(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3)		\
+	TESTCASE_END
+
+#define TEST_RRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_RRRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4, reg4, val4)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4 #reg4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_REG(reg4, val4)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4 #reg4)	\
+	TESTCASE_END
+
+#define TEST_P(code1, reg1, val1, code2)	\
+	TESTCASE_START(code1 #reg1 code2)	\
+	TEST_ARG_PTR(reg1, val1)		\
+	TEST_ARG_END("")			\
+	TEST_INSTRUCTION(code1 #reg1 code2)	\
+	TESTCASE_END
+
+#define TEST_PR(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_PTR(reg1, val1)				\
+	TEST_ARG_REG(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3)		\
+	TESTCASE_END
+
+#define TEST_RP(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_REG(reg1, val1)				\
+	TEST_ARG_PTR(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3)		\
+	TESTCASE_END
+
+#define TEST_PRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_PTR(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_RPR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_PTR(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_RRP(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_PTR(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_BF_P(code1, reg1, val1, code2)	\
+	TESTCASE_START(code1 #reg1 code2)	\
+	TEST_ARG_PTR(reg1, val1)		\
+	TEST_ARG_END("")			\
+	TEST_BRANCH_F(code1 #reg1 code2, 0)	\
+	TESTCASE_END
+
+#define TEST_BF_X(code, xtra_dist)		\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_BRANCH_F(code, xtra_dist)		\
+	TESTCASE_END
+
+#define TEST_BB_X(code, xtra_dist)		\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_BRANCH_B(code, xtra_dist)		\
+	TESTCASE_END
+
+#define TEST_BF_RX(code1, reg, val, code2, xtra_dist)	\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_BRANCH_F(code1 #reg code2, xtra_dist)	\
+	TESTCASE_END
+
+#define TEST_BB_RX(code1, reg, val, code2, xtra_dist)	\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_BRANCH_B(code1 #reg code2, xtra_dist)	\
+	TESTCASE_END
+
+#define TEST_BF(code)	TEST_BF_X(code, 0)
+#define TEST_BB(code)	TEST_BB_X(code, 0)
+
+#define TEST_BF_R(code1, reg, val, code2) TEST_BF_RX(code1, reg, val, code2, 0)
+#define TEST_BB_R(code1, reg, val, code2) TEST_BB_RX(code1, reg, val, code2, 0)
+
+#define TEST_BF_RR(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_REG(reg1, val1)				\
+	TEST_ARG_REG(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_BRANCH_F(code1 #reg1 code2 #reg2 code3, 0)		\
+	TESTCASE_END
+
+#define TEST_X(code, codex)			\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_INSTRUCTION(code)			\
+	"	b	99f		\n\t"	\
+	"	"codex"			\n\t"	\
+	TESTCASE_END
+
+#define TEST_RX(code1, reg, val, code2, codex)		\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_INSTRUCTION(code1 __stringify(reg) code2)	\
+	"	b	99f		\n\t"		\
+	"	"codex"			\n\t"		\
+	TESTCASE_END
+
+#define TEST_RRX(code1, reg1, val1, code2, reg2, val2, code3, codex)		\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)				\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 __stringify(reg1) code2 __stringify(reg2) code3)	\
+	"	b	99f		\n\t"					\
+	"	"codex"			\n\t"					\
+	TESTCASE_END
+
+
+/* Various values used in test cases... */
+#define N(val)	(val ^ 0xffffffff)
+#define VAL1	0x12345678
+#define VAL2	N(VAL1)
+#define VAL3	0xa5f801
+#define VAL4	N(VAL3)
+#define VALM	0x456789ab
+#define VALR	0xdeaddead
+#define HH1	0x0123fecb
+#define HH2	0xa9874567
+
+
+#ifdef CONFIG_THUMB2_KERNEL
+void kprobe_thumb16_test_cases(void);
+void kprobe_thumb32_test_cases(void);
+#else
+void kprobe_arm_test_cases(void);
+#endif

+ 7 - 0
arch/arm/kernel/kprobes-thumb.c

@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
+#include <linux/module.h>
 
 #include "kprobes.h"
 
@@ -943,6 +944,9 @@ const union decode_item kprobe_decode_thumb32_table[] = {
 	 */
 	DECODE_END
 };
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(kprobe_decode_thumb32_table);
+#endif
 
 static void __kprobes
 t16_simulate_bxblx(struct kprobe *p, struct pt_regs *regs)
@@ -1423,6 +1427,9 @@ const union decode_item kprobe_decode_thumb16_table[] = {
 
 	DECODE_END
 };
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(kprobe_decode_thumb16_table);
+#endif
 
 static unsigned long __kprobes thumb_check_cc(unsigned long cpsr)
 {

+ 8 - 0
arch/arm/kernel/kprobes.h

@@ -413,6 +413,14 @@ struct decode_reject {
 	DECODE_HEADER(DECODE_TYPE_REJECT, _mask, _value, 0)
 
 
+#ifdef CONFIG_THUMB2_KERNEL
+extern const union decode_item kprobe_decode_thumb16_table[];
+extern const union decode_item kprobe_decode_thumb32_table[];
+#else
+extern const union decode_item kprobe_decode_arm_table[];
+#endif
+
+
 int kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
 			const union decode_item *table, bool thumb16);
 

+ 254 - 221
arch/arm/kernel/perf_event.c

@@ -12,6 +12,7 @@
  */
 #define pr_fmt(fmt) "hw perfevents: " fmt
 
+#include <linux/bitmap.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -26,16 +27,8 @@
 #include <asm/pmu.h>
 #include <asm/stacktrace.h>
 
-static struct platform_device *pmu_device;
-
-/*
- * Hardware lock to serialize accesses to PMU registers. Needed for the
- * read/modify/write sequences.
- */
-static DEFINE_RAW_SPINLOCK(pmu_lock);
-
 /*
- * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
+ * ARMv6 supports a maximum of 3 events, starting from index 0. If we add
  * another platform that supports more, we need to increase this to be the
  * largest of all platforms.
  *
@@ -43,62 +36,24 @@ static DEFINE_RAW_SPINLOCK(pmu_lock);
  *  cycle counter CCNT + 31 events counters CNT0..30.
  *  Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
  */
-#define ARMPMU_MAX_HWEVENTS		33
+#define ARMPMU_MAX_HWEVENTS		32
 
-/* The events for a given CPU. */
-struct cpu_hw_events {
-	/*
-	 * The events that are active on the CPU for the given index. Index 0
-	 * is reserved.
-	 */
-	struct perf_event	*events[ARMPMU_MAX_HWEVENTS];
-
-	/*
-	 * A 1 bit for an index indicates that the counter is being used for
-	 * an event. A 0 means that the counter can be used.
-	 */
-	unsigned long		used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
+static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
+static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
+static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
 
-	/*
-	 * A 1 bit for an index indicates that the counter is actively being
-	 * used.
-	 */
-	unsigned long		active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
-};
-static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
-
-struct arm_pmu {
-	enum arm_perf_pmu_ids id;
-	const char	*name;
-	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
-	void		(*enable)(struct hw_perf_event *evt, int idx);
-	void		(*disable)(struct hw_perf_event *evt, int idx);
-	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
-					 struct hw_perf_event *hwc);
-	u32		(*read_counter)(int idx);
-	void		(*write_counter)(int idx, u32 val);
-	void		(*start)(void);
-	void		(*stop)(void);
-	void		(*reset)(void *);
-	const unsigned	(*cache_map)[PERF_COUNT_HW_CACHE_MAX]
-				    [PERF_COUNT_HW_CACHE_OP_MAX]
-				    [PERF_COUNT_HW_CACHE_RESULT_MAX];
-	const unsigned	(*event_map)[PERF_COUNT_HW_MAX];
-	u32		raw_event_mask;
-	int		num_events;
-	u64		max_period;
-};
+#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
 
 /* Set at runtime when we know what CPU type we are. */
-static const struct arm_pmu *armpmu;
+static struct arm_pmu *cpu_pmu;
 
 enum arm_perf_pmu_ids
 armpmu_get_pmu_id(void)
 {
 	int id = -ENODEV;
 
-	if (armpmu != NULL)
-		id = armpmu->id;
+	if (cpu_pmu != NULL)
+		id = cpu_pmu->id;
 
 	return id;
 }
@@ -109,8 +64,8 @@ armpmu_get_max_events(void)
 {
 	int max_events = 0;
 
-	if (armpmu != NULL)
-		max_events = armpmu->num_events;
+	if (cpu_pmu != NULL)
+		max_events = cpu_pmu->num_events;
 
 	return max_events;
 }
@@ -130,7 +85,11 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
 #define CACHE_OP_UNSUPPORTED		0xFFFF
 
 static int
-armpmu_map_cache_event(u64 config)
+armpmu_map_cache_event(const unsigned (*cache_map)
+				      [PERF_COUNT_HW_CACHE_MAX]
+				      [PERF_COUNT_HW_CACHE_OP_MAX]
+				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
+		       u64 config)
 {
 	unsigned int cache_type, cache_op, cache_result, ret;
 
@@ -146,7 +105,7 @@ armpmu_map_cache_event(u64 config)
 	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 		return -EINVAL;
 
-	ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result];
+	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
 
 	if (ret == CACHE_OP_UNSUPPORTED)
 		return -ENOENT;
@@ -155,23 +114,46 @@ armpmu_map_cache_event(u64 config)
 }
 
 static int
-armpmu_map_event(u64 config)
+armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
 {
-	int mapping = (*armpmu->event_map)[config];
-	return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping;
+	int mapping = (*event_map)[config];
+	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
 }
 
 static int
-armpmu_map_raw_event(u64 config)
+armpmu_map_raw_event(u32 raw_event_mask, u64 config)
 {
-	return (int)(config & armpmu->raw_event_mask);
+	return (int)(config & raw_event_mask);
 }
 
-static int
+static int map_cpu_event(struct perf_event *event,
+			 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
+			 const unsigned (*cache_map)
+					[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX],
+			 u32 raw_event_mask)
+{
+	u64 config = event->attr.config;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		return armpmu_map_event(event_map, config);
+	case PERF_TYPE_HW_CACHE:
+		return armpmu_map_cache_event(cache_map, config);
+	case PERF_TYPE_RAW:
+		return armpmu_map_raw_event(raw_event_mask, config);
+	}
+
+	return -ENOENT;
+}
+
+int
 armpmu_event_set_period(struct perf_event *event,
 			struct hw_perf_event *hwc,
 			int idx)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	int ret = 0;
@@ -202,11 +184,12 @@ armpmu_event_set_period(struct perf_event *event,
 	return ret;
 }
 
-static u64
+u64
 armpmu_event_update(struct perf_event *event,
 		    struct hw_perf_event *hwc,
 		    int idx, int overflow)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	u64 delta, prev_raw_count, new_raw_count;
 
 again:
@@ -246,11 +229,9 @@ armpmu_read(struct perf_event *event)
 static void
 armpmu_stop(struct perf_event *event, int flags)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (!armpmu)
-		return;
-
 	/*
 	 * ARM pmu always has to update the counter, so ignore
 	 * PERF_EF_UPDATE, see comments in armpmu_start().
@@ -266,11 +247,9 @@ armpmu_stop(struct perf_event *event, int flags)
 static void
 armpmu_start(struct perf_event *event, int flags)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (!armpmu)
-		return;
-
 	/*
 	 * ARM pmu always has to reprogram the period, so ignore
 	 * PERF_EF_RELOAD, see the comment below.
@@ -293,16 +272,16 @@ armpmu_start(struct perf_event *event, int flags)
 static void
 armpmu_del(struct perf_event *event, int flags)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
 	WARN_ON(idx < 0);
 
-	clear_bit(idx, cpuc->active_mask);
 	armpmu_stop(event, PERF_EF_UPDATE);
-	cpuc->events[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
+	hw_events->events[idx] = NULL;
+	clear_bit(idx, hw_events->used_mask);
 
 	perf_event_update_userpage(event);
 }
@@ -310,7 +289,8 @@ armpmu_del(struct perf_event *event, int flags)
 static int
 armpmu_add(struct perf_event *event, int flags)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 	struct hw_perf_event *hwc = &event->hw;
 	int idx;
 	int err = 0;
@@ -318,7 +298,7 @@ armpmu_add(struct perf_event *event, int flags)
 	perf_pmu_disable(event->pmu);
 
 	/* If we don't have a space for the counter then finish early. */
-	idx = armpmu->get_event_idx(cpuc, hwc);
+	idx = armpmu->get_event_idx(hw_events, hwc);
 	if (idx < 0) {
 		err = idx;
 		goto out;
@@ -330,8 +310,7 @@ armpmu_add(struct perf_event *event, int flags)
 	 */
 	event->hw.idx = idx;
 	armpmu->disable(hwc, idx);
-	cpuc->events[idx] = event;
-	set_bit(idx, cpuc->active_mask);
+	hw_events->events[idx] = event;
 
 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 	if (flags & PERF_EF_START)
@@ -345,25 +324,25 @@ out:
 	return err;
 }
 
-static struct pmu pmu;
-
 static int
-validate_event(struct cpu_hw_events *cpuc,
+validate_event(struct pmu_hw_events *hw_events,
 	       struct perf_event *event)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event fake_event = event->hw;
+	struct pmu *leader_pmu = event->group_leader->pmu;
 
-	if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
+	if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF)
 		return 1;
 
-	return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
+	return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
 }
 
 static int
 validate_group(struct perf_event *event)
 {
 	struct perf_event *sibling, *leader = event->group_leader;
-	struct cpu_hw_events fake_pmu;
+	struct pmu_hw_events fake_pmu;
 
 	memset(&fake_pmu, 0, sizeof(fake_pmu));
 
@@ -383,110 +362,119 @@ validate_group(struct perf_event *event)
 
 static irqreturn_t armpmu_platform_irq(int irq, void *dev)
 {
-	struct arm_pmu_platdata *plat = dev_get_platdata(&pmu_device->dev);
+	struct arm_pmu *armpmu = (struct arm_pmu *) dev;
+	struct platform_device *plat_device = armpmu->plat_device;
+	struct arm_pmu_platdata *plat = dev_get_platdata(&plat_device->dev);
 
 	return plat->handle_irq(irq, dev, armpmu->handle_irq);
 }
 
+static void
+armpmu_release_hardware(struct arm_pmu *armpmu)
+{
+	int i, irq, irqs;
+	struct platform_device *pmu_device = armpmu->plat_device;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+
+	for (i = 0; i < irqs; ++i) {
+		if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs))
+			continue;
+		irq = platform_get_irq(pmu_device, i);
+		if (irq >= 0)
+			free_irq(irq, armpmu);
+	}
+
+	release_pmu(armpmu->type);
+}
+
 static int
-armpmu_reserve_hardware(void)
+armpmu_reserve_hardware(struct arm_pmu *armpmu)
 {
 	struct arm_pmu_platdata *plat;
 	irq_handler_t handle_irq;
-	int i, err = -ENODEV, irq;
+	int i, err, irq, irqs;
+	struct platform_device *pmu_device = armpmu->plat_device;
 
-	pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU);
-	if (IS_ERR(pmu_device)) {
+	err = reserve_pmu(armpmu->type);
+	if (err) {
 		pr_warning("unable to reserve pmu\n");
-		return PTR_ERR(pmu_device);
+		return err;
 	}
 
-	init_pmu(ARM_PMU_DEVICE_CPU);
-
 	plat = dev_get_platdata(&pmu_device->dev);
 	if (plat && plat->handle_irq)
 		handle_irq = armpmu_platform_irq;
 	else
 		handle_irq = armpmu->handle_irq;
 
-	if (pmu_device->num_resources < 1) {
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+	if (irqs < 1) {
 		pr_err("no irqs for PMUs defined\n");
 		return -ENODEV;
 	}
 
-	for (i = 0; i < pmu_device->num_resources; ++i) {
+	for (i = 0; i < irqs; ++i) {
+		err = 0;
 		irq = platform_get_irq(pmu_device, i);
 		if (irq < 0)
 			continue;
 
+		/*
+		 * If we have a single PMU interrupt that we can't shift,
+		 * assume that we're running on a uniprocessor machine and
+		 * continue. Otherwise, continue without this interrupt.
+		 */
+		if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
+				    irq, i);
+			continue;
+		}
+
 		err = request_irq(irq, handle_irq,
 				  IRQF_DISABLED | IRQF_NOBALANCING,
-				  "armpmu", NULL);
+				  "arm-pmu", armpmu);
 		if (err) {
-			pr_warning("unable to request IRQ%d for ARM perf "
-				"counters\n", irq);
-			break;
+			pr_err("unable to request IRQ%d for ARM PMU counters\n",
+				irq);
+			armpmu_release_hardware(armpmu);
+			return err;
 		}
-	}
 
-	if (err) {
-		for (i = i - 1; i >= 0; --i) {
-			irq = platform_get_irq(pmu_device, i);
-			if (irq >= 0)
-				free_irq(irq, NULL);
-		}
-		release_pmu(ARM_PMU_DEVICE_CPU);
-		pmu_device = NULL;
+		cpumask_set_cpu(i, &armpmu->active_irqs);
 	}
 
-	return err;
+	return 0;
 }
 
 static void
-armpmu_release_hardware(void)
+hw_perf_event_destroy(struct perf_event *event)
 {
-	int i, irq;
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	atomic_t *active_events	 = &armpmu->active_events;
+	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
 
-	for (i = pmu_device->num_resources - 1; i >= 0; --i) {
-		irq = platform_get_irq(pmu_device, i);
-		if (irq >= 0)
-			free_irq(irq, NULL);
+	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
+		armpmu_release_hardware(armpmu);
+		mutex_unlock(pmu_reserve_mutex);
 	}
-	armpmu->stop();
-
-	release_pmu(ARM_PMU_DEVICE_CPU);
-	pmu_device = NULL;
 }
 
-static atomic_t active_events = ATOMIC_INIT(0);
-static DEFINE_MUTEX(pmu_reserve_mutex);
-
-static void
-hw_perf_event_destroy(struct perf_event *event)
+static int
+event_requires_mode_exclusion(struct perf_event_attr *attr)
 {
-	if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
-		armpmu_release_hardware();
-		mutex_unlock(&pmu_reserve_mutex);
-	}
+	return attr->exclude_idle || attr->exclude_user ||
+	       attr->exclude_kernel || attr->exclude_hv;
 }
 
 static int
 __hw_perf_event_init(struct perf_event *event)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int mapping, err;
 
-	/* Decode the generic type into an ARM event identifier. */
-	if (PERF_TYPE_HARDWARE == event->attr.type) {
-		mapping = armpmu_map_event(event->attr.config);
-	} else if (PERF_TYPE_HW_CACHE == event->attr.type) {
-		mapping = armpmu_map_cache_event(event->attr.config);
-	} else if (PERF_TYPE_RAW == event->attr.type) {
-		mapping = armpmu_map_raw_event(event->attr.config);
-	} else {
-		pr_debug("event type %x not supported\n", event->attr.type);
-		return -EOPNOTSUPP;
-	}
+	mapping = armpmu->map_event(event);
 
 	if (mapping < 0) {
 		pr_debug("event %x:%llx not supported\n", event->attr.type,
@@ -494,35 +482,32 @@ __hw_perf_event_init(struct perf_event *event)
 		return mapping;
 	}
 
+	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet. For SMP systems, each core has it's own PMU so we can't do any
+	 * clever allocation or constraints checking at this point.
+	 */
+	hwc->idx		= -1;
+	hwc->config_base	= 0;
+	hwc->config		= 0;
+	hwc->event_base		= 0;
+
 	/*
 	 * Check whether we need to exclude the counter from certain modes.
-	 * The ARM performance counters are on all of the time so if someone
-	 * has asked us for some excludes then we have to fail.
 	 */
-	if (event->attr.exclude_kernel || event->attr.exclude_user ||
-	    event->attr.exclude_hv || event->attr.exclude_idle) {
+	if ((!armpmu->set_event_filter ||
+	     armpmu->set_event_filter(hwc, &event->attr)) &&
+	     event_requires_mode_exclusion(&event->attr)) {
 		pr_debug("ARM performance counters do not support "
 			 "mode exclusion\n");
 		return -EPERM;
 	}
 
 	/*
-	 * We don't assign an index until we actually place the event onto
-	 * hardware. Use -1 to signify that we haven't decided where to put it
-	 * yet. For SMP systems, each core has it's own PMU so we can't do any
-	 * clever allocation or constraints checking at this point.
+	 * Store the event encoding into the config_base field.
 	 */
-	hwc->idx = -1;
-
-	/*
-	 * Store the event encoding into the config_base field. config and
-	 * event_base are unused as the only 2 things we need to know are
-	 * the event mapping and the counter to use. The counter to use is
-	 * also the indx and the config_base is the event type.
-	 */
-	hwc->config_base	    = (unsigned long)mapping;
-	hwc->config		    = 0;
-	hwc->event_base		    = 0;
+	hwc->config_base	    |= (unsigned long)mapping;
 
 	if (!hwc->sample_period) {
 		hwc->sample_period  = armpmu->max_period;
@@ -542,32 +527,23 @@ __hw_perf_event_init(struct perf_event *event)
 
 static int armpmu_event_init(struct perf_event *event)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	int err = 0;
+	atomic_t *active_events = &armpmu->active_events;
 
-	switch (event->attr.type) {
-	case PERF_TYPE_RAW:
-	case PERF_TYPE_HARDWARE:
-	case PERF_TYPE_HW_CACHE:
-		break;
-
-	default:
+	if (armpmu->map_event(event) == -ENOENT)
 		return -ENOENT;
-	}
-
-	if (!armpmu)
-		return -ENODEV;
 
 	event->destroy = hw_perf_event_destroy;
 
-	if (!atomic_inc_not_zero(&active_events)) {
-		mutex_lock(&pmu_reserve_mutex);
-		if (atomic_read(&active_events) == 0) {
-			err = armpmu_reserve_hardware();
-		}
+	if (!atomic_inc_not_zero(active_events)) {
+		mutex_lock(&armpmu->reserve_mutex);
+		if (atomic_read(active_events) == 0)
+			err = armpmu_reserve_hardware(armpmu);
 
 		if (!err)
-			atomic_inc(&active_events);
-		mutex_unlock(&pmu_reserve_mutex);
+			atomic_inc(active_events);
+		mutex_unlock(&armpmu->reserve_mutex);
 	}
 
 	if (err)
@@ -582,22 +558,9 @@ static int armpmu_event_init(struct perf_event *event)
 
 static void armpmu_enable(struct pmu *pmu)
 {
-	/* Enable all of the perf events on hardware. */
-	int idx, enabled = 0;
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-
-	if (!armpmu)
-		return;
-
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
-		struct perf_event *event = cpuc->events[idx];
-
-		if (!event)
-			continue;
-
-		armpmu->enable(&event->hw, idx);
-		enabled = 1;
-	}
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 
 	if (enabled)
 		armpmu->start();
@@ -605,20 +568,32 @@ static void armpmu_enable(struct pmu *pmu)
 
 static void armpmu_disable(struct pmu *pmu)
 {
-	if (armpmu)
-		armpmu->stop();
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	armpmu->stop();
 }
 
-static struct pmu pmu = {
-	.pmu_enable	= armpmu_enable,
-	.pmu_disable	= armpmu_disable,
-	.event_init	= armpmu_event_init,
-	.add		= armpmu_add,
-	.del		= armpmu_del,
-	.start		= armpmu_start,
-	.stop		= armpmu_stop,
-	.read		= armpmu_read,
-};
+static void __init armpmu_init(struct arm_pmu *armpmu)
+{
+	atomic_set(&armpmu->active_events, 0);
+	mutex_init(&armpmu->reserve_mutex);
+
+	armpmu->pmu = (struct pmu) {
+		.pmu_enable	= armpmu_enable,
+		.pmu_disable	= armpmu_disable,
+		.event_init	= armpmu_event_init,
+		.add		= armpmu_add,
+		.del		= armpmu_del,
+		.start		= armpmu_start,
+		.stop		= armpmu_stop,
+		.read		= armpmu_read,
+	};
+}
+
+int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type)
+{
+	armpmu_init(armpmu);
+	return perf_pmu_register(&armpmu->pmu, name, type);
+}
 
 /* Include the PMU-specific implementations. */
 #include "perf_event_xscale.c"
@@ -630,14 +605,72 @@ static struct pmu pmu = {
  * This requires SMP to be available, so exists as a separate initcall.
  */
 static int __init
-armpmu_reset(void)
+cpu_pmu_reset(void)
+{
+	if (cpu_pmu && cpu_pmu->reset)
+		return on_each_cpu(cpu_pmu->reset, NULL, 1);
+	return 0;
+}
+arch_initcall(cpu_pmu_reset);
+
+/*
+ * PMU platform driver and devicetree bindings.
+ */
+static struct of_device_id armpmu_of_device_ids[] = {
+	{.compatible = "arm,cortex-a9-pmu"},
+	{.compatible = "arm,cortex-a8-pmu"},
+	{.compatible = "arm,arm1136-pmu"},
+	{.compatible = "arm,arm1176-pmu"},
+	{},
+};
+
+static struct platform_device_id armpmu_plat_device_ids[] = {
+	{.name = "arm-pmu"},
+	{},
+};
+
+static int __devinit armpmu_device_probe(struct platform_device *pdev)
 {
-	if (armpmu && armpmu->reset)
-		return on_each_cpu(armpmu->reset, NULL, 1);
+	cpu_pmu->plat_device = pdev;
 	return 0;
 }
-arch_initcall(armpmu_reset);
 
+static struct platform_driver armpmu_driver = {
+	.driver		= {
+		.name	= "arm-pmu",
+		.of_match_table = armpmu_of_device_ids,
+	},
+	.probe		= armpmu_device_probe,
+	.id_table	= armpmu_plat_device_ids,
+};
+
+static int __init register_pmu_driver(void)
+{
+	return platform_driver_register(&armpmu_driver);
+}
+device_initcall(register_pmu_driver);
+
+static struct pmu_hw_events *armpmu_get_cpu_events(void)
+{
+	return &__get_cpu_var(cpu_hw_events);
+}
+
+static void __init cpu_pmu_init(struct arm_pmu *armpmu)
+{
+	int cpu;
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
+		events->events = per_cpu(hw_events, cpu);
+		events->used_mask = per_cpu(used_mask, cpu);
+		raw_spin_lock_init(&events->pmu_lock);
+	}
+	armpmu->get_hw_events = armpmu_get_cpu_events;
+	armpmu->type = ARM_PMU_DEVICE_CPU;
+}
+
+/*
+ * CPU PMU identification and registration.
+ */
 static int __init
 init_hw_perf_events(void)
 {
@@ -651,22 +684,22 @@ init_hw_perf_events(void)
 		case 0xB360:	/* ARM1136 */
 		case 0xB560:	/* ARM1156 */
 		case 0xB760:	/* ARM1176 */
-			armpmu = armv6pmu_init();
+			cpu_pmu = armv6pmu_init();
 			break;
 		case 0xB020:	/* ARM11mpcore */
-			armpmu = armv6mpcore_pmu_init();
+			cpu_pmu = armv6mpcore_pmu_init();
 			break;
 		case 0xC080:	/* Cortex-A8 */
-			armpmu = armv7_a8_pmu_init();
+			cpu_pmu = armv7_a8_pmu_init();
 			break;
 		case 0xC090:	/* Cortex-A9 */
-			armpmu = armv7_a9_pmu_init();
+			cpu_pmu = armv7_a9_pmu_init();
 			break;
 		case 0xC050:	/* Cortex-A5 */
-			armpmu = armv7_a5_pmu_init();
+			cpu_pmu = armv7_a5_pmu_init();
 			break;
 		case 0xC0F0:	/* Cortex-A15 */
-			armpmu = armv7_a15_pmu_init();
+			cpu_pmu = armv7_a15_pmu_init();
 			break;
 		}
 	/* Intel CPUs [xscale]. */
@@ -674,23 +707,23 @@ init_hw_perf_events(void)
 		part_number = (cpuid >> 13) & 0x7;
 		switch (part_number) {
 		case 1:
-			armpmu = xscale1pmu_init();
+			cpu_pmu = xscale1pmu_init();
 			break;
 		case 2:
-			armpmu = xscale2pmu_init();
+			cpu_pmu = xscale2pmu_init();
 			break;
 		}
 	}
 
-	if (armpmu) {
+	if (cpu_pmu) {
 		pr_info("enabled with %s PMU driver, %d counters available\n",
-			armpmu->name, armpmu->num_events);
+			cpu_pmu->name, cpu_pmu->num_events);
+		cpu_pmu_init(cpu_pmu);
+		armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
 	} else {
 		pr_info("no hardware support available\n");
 	}
 
-	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
-
 	return 0;
 }
 early_initcall(init_hw_perf_events);

+ 59 - 28
arch/arm/kernel/perf_event_v6.c

@@ -54,7 +54,7 @@ enum armv6_perf_types {
 };
 
 enum armv6_counters {
-	ARMV6_CYCLE_COUNTER = 1,
+	ARMV6_CYCLE_COUNTER = 0,
 	ARMV6_COUNTER0,
 	ARMV6_COUNTER1,
 };
@@ -433,6 +433,7 @@ armv6pmu_enable_event(struct hw_perf_event *hwc,
 		      int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= 0;
@@ -454,12 +455,29 @@ armv6pmu_enable_event(struct hw_perf_event *hwc,
 	 * Mask out the current event and set the counter to count the event
 	 * that we're interested in.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~mask;
 	val |= evt;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int counter_is_active(unsigned long pmcr, int idx)
+{
+	unsigned long mask = 0;
+	if (idx == ARMV6_CYCLE_COUNTER)
+		mask = ARMV6_PMCR_CCOUNT_IEN;
+	else if (idx == ARMV6_COUNTER0)
+		mask = ARMV6_PMCR_COUNT0_IEN;
+	else if (idx == ARMV6_COUNTER1)
+		mask = ARMV6_PMCR_COUNT1_IEN;
+
+	if (mask)
+		return pmcr & mask;
+
+	WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+	return 0;
 }
 
 static irqreturn_t
@@ -468,7 +486,7 @@ armv6pmu_handle_irq(int irq_num,
 {
 	unsigned long pmcr = armv6_pmcr_read();
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -487,11 +505,11 @@ armv6pmu_handle_irq(int irq_num,
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
+		if (!counter_is_active(pmcr, idx))
 			continue;
 
 		/*
@@ -508,7 +526,7 @@ armv6pmu_handle_irq(int irq_num,
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	/*
@@ -527,28 +545,30 @@ static void
 armv6pmu_start(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val |= ARMV6_PMCR_ENABLE;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 armv6pmu_stop(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~ARMV6_PMCR_ENABLE;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
-armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
+armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
 		       struct hw_perf_event *event)
 {
 	/* Always place a cycle counter into the cycle counter. */
@@ -578,6 +598,7 @@ armv6pmu_disable_event(struct hw_perf_event *hwc,
 		       int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= ARMV6_PMCR_CCOUNT_IEN;
@@ -598,12 +619,12 @@ armv6pmu_disable_event(struct hw_perf_event *hwc,
 	 * of ETM bus signal assertion cycles. The external reporting should
 	 * be disabled and so this should never increment.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~mask;
 	val |= evt;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
@@ -611,6 +632,7 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
 			      int idx)
 {
 	unsigned long val, mask, flags, evt = 0;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= ARMV6_PMCR_CCOUNT_IEN;
@@ -627,15 +649,21 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
 	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
 	 * simply disable the interrupt reporting.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~mask;
 	val |= evt;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int armv6_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv6_perf_map,
+				&armv6_perf_cache_map, 0xFF);
 }
 
-static const struct arm_pmu armv6pmu = {
+static struct arm_pmu armv6pmu = {
 	.id			= ARM_PERF_PMU_ID_V6,
 	.name			= "v6",
 	.handle_irq		= armv6pmu_handle_irq,
@@ -646,14 +674,12 @@ static const struct arm_pmu armv6pmu = {
 	.get_event_idx		= armv6pmu_get_event_idx,
 	.start			= armv6pmu_start,
 	.stop			= armv6pmu_stop,
-	.cache_map		= &armv6_perf_cache_map,
-	.event_map		= &armv6_perf_map,
-	.raw_event_mask		= 0xFF,
+	.map_event		= armv6_map_event,
 	.num_events		= 3,
 	.max_period		= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init armv6pmu_init(void)
+static struct arm_pmu *__init armv6pmu_init(void)
 {
 	return &armv6pmu;
 }
@@ -665,7 +691,14 @@ static const struct arm_pmu *__init armv6pmu_init(void)
  * disable the interrupt reporting and update the event. When unthrottling we
  * reset the period and enable the interrupt reporting.
  */
-static const struct arm_pmu armv6mpcore_pmu = {
+
+static int armv6mpcore_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv6mpcore_perf_map,
+				&armv6mpcore_perf_cache_map, 0xFF);
+}
+
+static struct arm_pmu armv6mpcore_pmu = {
 	.id			= ARM_PERF_PMU_ID_V6MP,
 	.name			= "v6mpcore",
 	.handle_irq		= armv6pmu_handle_irq,
@@ -676,24 +709,22 @@ static const struct arm_pmu armv6mpcore_pmu = {
 	.get_event_idx		= armv6pmu_get_event_idx,
 	.start			= armv6pmu_start,
 	.stop			= armv6pmu_stop,
-	.cache_map		= &armv6mpcore_perf_cache_map,
-	.event_map		= &armv6mpcore_perf_map,
-	.raw_event_mask		= 0xFF,
+	.map_event		= armv6mpcore_map_event,
 	.num_events		= 3,
 	.max_period		= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init armv6mpcore_pmu_init(void)
+static struct arm_pmu *__init armv6mpcore_pmu_init(void)
 {
 	return &armv6mpcore_pmu;
 }
 #else
-static const struct arm_pmu *__init armv6pmu_init(void)
+static struct arm_pmu *__init armv6pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv6mpcore_pmu_init(void)
+static struct arm_pmu *__init armv6mpcore_pmu_init(void)
 {
 	return NULL;
 }

+ 196 - 199
arch/arm/kernel/perf_event_v7.c

@@ -17,6 +17,9 @@
  */
 
 #ifdef CONFIG_CPU_V7
+
+static struct arm_pmu armv7pmu;
+
 /*
  * Common ARMv7 event types
  *
@@ -676,23 +679,24 @@ static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 };
 
 /*
- * Perf Events counters
+ * Perf Events' indices
  */
-enum armv7_counters {
-	ARMV7_CYCLE_COUNTER		= 1,	/* Cycle counter */
-	ARMV7_COUNTER0			= 2,	/* First event counter */
-};
+#define	ARMV7_IDX_CYCLE_COUNTER	0
+#define	ARMV7_IDX_COUNTER0	1
+#define	ARMV7_IDX_COUNTER_LAST	(ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+
+#define	ARMV7_MAX_COUNTERS	32
+#define	ARMV7_COUNTER_MASK	(ARMV7_MAX_COUNTERS - 1)
 
 /*
- * The cycle counter is ARMV7_CYCLE_COUNTER.
- * The first event counter is ARMV7_COUNTER0.
- * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
+ * ARMv7 low level PMNC access
  */
-#define	ARMV7_COUNTER_LAST	(ARMV7_COUNTER0 + armpmu->num_events - 1)
 
 /*
- * ARMv7 low level PMNC access
+ * Perf Event to low level counters mapping
  */
+#define	ARMV7_IDX_TO_COUNTER(x)	\
+	(((x) - ARMV7_IDX_COUNTER0) & ARMV7_COUNTER_MASK)
 
 /*
  * Per-CPU PMNC: config reg
@@ -708,103 +712,76 @@ enum armv7_counters {
 #define	ARMV7_PMNC_MASK		0x3f	 /* Mask for writable bits */
 
 /*
- * Available counters
- */
-#define ARMV7_CNT0		0	/* First event counter */
-#define ARMV7_CCNT		31	/* Cycle counter */
-
-/* Perf Event to low level counters mapping */
-#define ARMV7_EVENT_CNT_TO_CNTx	(ARMV7_COUNTER0 - ARMV7_CNT0)
-
-/*
- * CNTENS: counters enable reg
- */
-#define ARMV7_CNTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_CNTENS_C		(1 << ARMV7_CCNT)
-
-/*
- * CNTENC: counters disable reg
- */
-#define ARMV7_CNTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_CNTENC_C		(1 << ARMV7_CCNT)
-
-/*
- * INTENS: counters overflow interrupt enable reg
- */
-#define ARMV7_INTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_INTENS_C		(1 << ARMV7_CCNT)
-
-/*
- * INTENC: counters overflow interrupt disable reg
- */
-#define ARMV7_INTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_INTENC_C		(1 << ARMV7_CCNT)
-
-/*
- * EVTSEL: Event selection reg
+ * FLAG: counters overflow flag status reg
  */
-#define	ARMV7_EVTSEL_MASK	0xff		/* Mask for writable bits */
+#define	ARMV7_FLAG_MASK		0xffffffff	/* Mask for writable bits */
+#define	ARMV7_OVERFLOWED_MASK	ARMV7_FLAG_MASK
 
 /*
- * SELECT: Counter selection reg
+ * PMXEVTYPER: Event selection reg
  */
-#define	ARMV7_SELECT_MASK	0x1f		/* Mask for writable bits */
+#define	ARMV7_EVTYPE_MASK	0xc00000ff	/* Mask for writable bits */
+#define	ARMV7_EVTYPE_EVENT	0xff		/* Mask for EVENT bits */
 
 /*
- * FLAG: counters overflow flag status reg
+ * Event filters for PMUv2
  */
-#define ARMV7_FLAG_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_FLAG_C		(1 << ARMV7_CCNT)
-#define	ARMV7_FLAG_MASK		0xffffffff	/* Mask for writable bits */
-#define	ARMV7_OVERFLOWED_MASK	ARMV7_FLAG_MASK
+#define	ARMV7_EXCLUDE_PL1	(1 << 31)
+#define	ARMV7_EXCLUDE_USER	(1 << 30)
+#define	ARMV7_INCLUDE_HYP	(1 << 27)
 
-static inline unsigned long armv7_pmnc_read(void)
+static inline u32 armv7_pmnc_read(void)
 {
 	u32 val;
 	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
 	return val;
 }
 
-static inline void armv7_pmnc_write(unsigned long val)
+static inline void armv7_pmnc_write(u32 val)
 {
 	val &= ARMV7_PMNC_MASK;
 	isb();
 	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
 }
 
-static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
+static inline int armv7_pmnc_has_overflowed(u32 pmnc)
 {
 	return pmnc & ARMV7_OVERFLOWED_MASK;
 }
 
-static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
-					enum armv7_counters counter)
+static inline int armv7_pmnc_counter_valid(int idx)
+{
+	return idx >= ARMV7_IDX_CYCLE_COUNTER && idx <= ARMV7_IDX_COUNTER_LAST;
+}
+
+static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
 {
 	int ret = 0;
+	u32 counter;
 
-	if (counter == ARMV7_CYCLE_COUNTER)
-		ret = pmnc & ARMV7_FLAG_C;
-	else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
-		ret = pmnc & ARMV7_FLAG_P(counter);
-	else
+	if (!armv7_pmnc_counter_valid(idx)) {
 		pr_err("CPU%u checking wrong counter %d overflow status\n",
-			smp_processor_id(), counter);
+			smp_processor_id(), idx);
+	} else {
+		counter = ARMV7_IDX_TO_COUNTER(idx);
+		ret = pmnc & BIT(counter);
+	}
 
 	return ret;
 }
 
-static inline int armv7_pmnc_select_counter(unsigned int idx)
+static inline int armv7_pmnc_select_counter(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
-		pr_err("CPU%u selecting wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u selecting wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
-	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
 	isb();
 
 	return idx;
@@ -812,124 +789,95 @@ static inline int armv7_pmnc_select_counter(unsigned int idx)
 
 static inline u32 armv7pmu_read_counter(int idx)
 {
-	unsigned long value = 0;
+	u32 value = 0;
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
-	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
-		if (armv7_pmnc_select_counter(idx) == idx)
-			asm volatile("mrc p15, 0, %0, c9, c13, 2"
-				     : "=r" (value));
-	} else
+	if (!armv7_pmnc_counter_valid(idx))
 		pr_err("CPU%u reading wrong counter %d\n",
 			smp_processor_id(), idx);
+	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
+		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
+	else if (armv7_pmnc_select_counter(idx) == idx)
+		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
 
 	return value;
 }
 
 static inline void armv7pmu_write_counter(int idx, u32 value)
 {
-	if (idx == ARMV7_CYCLE_COUNTER)
-		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
-	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
-		if (armv7_pmnc_select_counter(idx) == idx)
-			asm volatile("mcr p15, 0, %0, c9, c13, 2"
-				     : : "r" (value));
-	} else
+	if (!armv7_pmnc_counter_valid(idx))
 		pr_err("CPU%u writing wrong counter %d\n",
 			smp_processor_id(), idx);
+	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
+		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
+	else if (armv7_pmnc_select_counter(idx) == idx)
+		asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
 }
 
-static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
+static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
 {
 	if (armv7_pmnc_select_counter(idx) == idx) {
-		val &= ARMV7_EVTSEL_MASK;
+		val &= ARMV7_EVTYPE_MASK;
 		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
 	}
 }
 
-static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
+static inline int armv7_pmnc_enable_counter(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u enabling wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_CNTENS_C;
-	else
-		val = ARMV7_CNTENS_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
 	return idx;
 }
 
-static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
+static inline int armv7_pmnc_disable_counter(int idx)
 {
-	u32 val;
-
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u disabling wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_CNTENC_C;
-	else
-		val = ARMV7_CNTENC_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
 	return idx;
 }
 
-static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
+static inline int armv7_pmnc_enable_intens(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u enabling wrong PMNC counter"
-			" interrupt enable %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_INTENS_C;
-	else
-		val = ARMV7_INTENS_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
 	return idx;
 }
 
-static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
+static inline int armv7_pmnc_disable_intens(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u disabling wrong PMNC counter"
-			" interrupt enable %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_INTENC_C;
-	else
-		val = ARMV7_INTENC_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
 	return idx;
 }
 
@@ -973,14 +921,14 @@ static void armv7_pmnc_dump_regs(void)
 	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
 	printk(KERN_INFO "CCNT  =0x%08x\n", val);
 
-	for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
+	for (cnt = ARMV7_IDX_COUNTER0; cnt <= ARMV7_IDX_COUNTER_LAST; cnt++) {
 		armv7_pmnc_select_counter(cnt);
 		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
 		printk(KERN_INFO "CNT[%d] count =0x%08x\n",
-			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
+			ARMV7_IDX_TO_COUNTER(cnt), val);
 		asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
 		printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
-			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
+			ARMV7_IDX_TO_COUNTER(cnt), val);
 	}
 }
 #endif
@@ -988,12 +936,13 @@ static void armv7_pmnc_dump_regs(void)
 static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	/*
 	 * Enable counter and interrupt, and set the counter to count
 	 * the event that we're interested in.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
 	/*
 	 * Disable counter
@@ -1002,9 +951,10 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 
 	/*
 	 * Set event (if destined for PMNx counters)
-	 * We don't need to set the event if it's a cycle count
+	 * We only need to set the event for the cycle counter if we
+	 * have the ability to perform event filtering.
 	 */
-	if (idx != ARMV7_CYCLE_COUNTER)
+	if (armv7pmu.set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
 		armv7_pmnc_write_evtsel(idx, hwc->config_base);
 
 	/*
@@ -1017,17 +967,18 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	 */
 	armv7_pmnc_enable_counter(idx);
 
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	/*
 	 * Disable counter and interrupt
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
 	/*
 	 * Disable counter
@@ -1039,14 +990,14 @@ static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
 	 */
 	armv7_pmnc_disable_intens(idx);
 
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 {
-	unsigned long pmnc;
+	u32 pmnc;
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -1069,13 +1020,10 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
 		/*
 		 * We have a single interrupt for all counters. Check that
 		 * each counter has overflowed before we process it.
@@ -1090,7 +1038,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	/*
@@ -1108,61 +1056,114 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 static void armv7pmu_start(void)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Enable all counters */
 	armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv7pmu_stop(void)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Disable all counters */
 	armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
+static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
 				  struct hw_perf_event *event)
 {
 	int idx;
+	unsigned long evtype = event->config_base & ARMV7_EVTYPE_EVENT;
 
 	/* Always place a cycle counter into the cycle counter. */
-	if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
-		if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
+	if (evtype == ARMV7_PERFCTR_CPU_CYCLES) {
+		if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask))
 			return -EAGAIN;
 
-		return ARMV7_CYCLE_COUNTER;
-	} else {
-		/*
-		 * For anything other than a cycle counter, try and use
-		 * the events counters
-		 */
-		for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
-			if (!test_and_set_bit(idx, cpuc->used_mask))
-				return idx;
-		}
+		return ARMV7_IDX_CYCLE_COUNTER;
+	}
 
-		/* The counters are all in use. */
-		return -EAGAIN;
+	/*
+	 * For anything other than a cycle counter, try and use
+	 * the events counters
+	 */
+	for (idx = ARMV7_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
 	}
+
+	/* The counters are all in use. */
+	return -EAGAIN;
+}
+
+/*
+ * Add an event filter to a given event. This will only work for PMUv2 PMUs.
+ */
+static int armv7pmu_set_event_filter(struct hw_perf_event *event,
+				     struct perf_event_attr *attr)
+{
+	unsigned long config_base = 0;
+
+	if (attr->exclude_idle)
+		return -EPERM;
+	if (attr->exclude_user)
+		config_base |= ARMV7_EXCLUDE_USER;
+	if (attr->exclude_kernel)
+		config_base |= ARMV7_EXCLUDE_PL1;
+	if (!attr->exclude_hv)
+		config_base |= ARMV7_INCLUDE_HYP;
+
+	/*
+	 * Install the filter into config_base as this is used to
+	 * construct the event type.
+	 */
+	event->config_base = config_base;
+
+	return 0;
 }
 
 static void armv7pmu_reset(void *info)
 {
-	u32 idx, nb_cnt = armpmu->num_events;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
 
 	/* The counter and interrupt enable registers are unknown at reset. */
-	for (idx = 1; idx < nb_cnt; ++idx)
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
 		armv7pmu_disable_event(NULL, idx);
 
 	/* Initialize & Reset PMNC: C and P bits */
 	armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
 }
 
+static int armv7_a8_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a8_perf_map,
+				&armv7_a8_perf_cache_map, 0xFF);
+}
+
+static int armv7_a9_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a9_perf_map,
+				&armv7_a9_perf_cache_map, 0xFF);
+}
+
+static int armv7_a5_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a5_perf_map,
+				&armv7_a5_perf_cache_map, 0xFF);
+}
+
+static int armv7_a15_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a15_perf_map,
+				&armv7_a15_perf_cache_map, 0xFF);
+}
+
 static struct arm_pmu armv7pmu = {
 	.handle_irq		= armv7pmu_handle_irq,
 	.enable			= armv7pmu_enable_event,
@@ -1173,7 +1174,6 @@ static struct arm_pmu armv7pmu = {
 	.start			= armv7pmu_start,
 	.stop			= armv7pmu_stop,
 	.reset			= armv7pmu_reset,
-	.raw_event_mask		= 0xFF,
 	.max_period		= (1LLU << 32) - 1,
 };
 
@@ -1188,62 +1188,59 @@ static u32 __init armv7_read_num_pmnc_events(void)
 	return nb_cnt + 1;
 }
 
-static const struct arm_pmu *__init armv7_a8_pmu_init(void)
+static struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA8;
 	armv7pmu.name		= "ARMv7 Cortex-A8";
-	armv7pmu.cache_map	= &armv7_a8_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a8_perf_map;
+	armv7pmu.map_event	= armv7_a8_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 }
 
-static const struct arm_pmu *__init armv7_a9_pmu_init(void)
+static struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA9;
 	armv7pmu.name		= "ARMv7 Cortex-A9";
-	armv7pmu.cache_map	= &armv7_a9_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a9_perf_map;
+	armv7pmu.map_event	= armv7_a9_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 }
 
-static const struct arm_pmu *__init armv7_a5_pmu_init(void)
+static struct arm_pmu *__init armv7_a5_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA5;
 	armv7pmu.name		= "ARMv7 Cortex-A5";
-	armv7pmu.cache_map	= &armv7_a5_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a5_perf_map;
+	armv7pmu.map_event	= armv7_a5_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 }
 
-static const struct arm_pmu *__init armv7_a15_pmu_init(void)
+static struct arm_pmu *__init armv7_a15_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA15;
 	armv7pmu.name		= "ARMv7 Cortex-A15";
-	armv7pmu.cache_map	= &armv7_a15_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a15_perf_map;
+	armv7pmu.map_event	= armv7_a15_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
+	armv7pmu.set_event_filter = armv7pmu_set_event_filter;
 	return &armv7pmu;
 }
 #else
-static const struct arm_pmu *__init armv7_a8_pmu_init(void)
+static struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv7_a9_pmu_init(void)
+static struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv7_a5_pmu_init(void)
+static struct arm_pmu *__init armv7_a5_pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv7_a15_pmu_init(void)
+static struct arm_pmu *__init armv7_a15_pmu_init(void)
 {
 	return NULL;
 }

+ 47 - 43
arch/arm/kernel/perf_event_xscale.c

@@ -40,7 +40,7 @@ enum xscale_perf_types {
 };
 
 enum xscale_counters {
-	XSCALE_CYCLE_COUNTER	= 1,
+	XSCALE_CYCLE_COUNTER	= 0,
 	XSCALE_COUNTER0,
 	XSCALE_COUNTER1,
 	XSCALE_COUNTER2,
@@ -222,7 +222,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 {
 	unsigned long pmnc;
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -249,13 +249,10 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
 		if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
 			continue;
 
@@ -266,7 +263,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	irq_work_run();
@@ -284,6 +281,7 @@ static void
 xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	switch (idx) {
 	case XSCALE_CYCLE_COUNTER:
@@ -305,18 +303,19 @@ xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val &= ~mask;
 	val |= evt;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	switch (idx) {
 	case XSCALE_CYCLE_COUNTER:
@@ -336,16 +335,16 @@ xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val &= ~mask;
 	val |= evt;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
-xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
+xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
 			struct hw_perf_event *event)
 {
 	if (XSCALE_PERFCTR_CCNT == event->config_base) {
@@ -368,24 +367,26 @@ static void
 xscale1pmu_start(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val |= XSCALE_PMU_ENABLE;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale1pmu_stop(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val &= ~XSCALE_PMU_ENABLE;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static inline u32
@@ -424,7 +425,13 @@ xscale1pmu_write_counter(int counter, u32 val)
 	}
 }
 
-static const struct arm_pmu xscale1pmu = {
+static int xscale_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &xscale_perf_map,
+				&xscale_perf_cache_map, 0xFF);
+}
+
+static struct arm_pmu xscale1pmu = {
 	.id		= ARM_PERF_PMU_ID_XSCALE1,
 	.name		= "xscale1",
 	.handle_irq	= xscale1pmu_handle_irq,
@@ -435,14 +442,12 @@ static const struct arm_pmu xscale1pmu = {
 	.get_event_idx	= xscale1pmu_get_event_idx,
 	.start		= xscale1pmu_start,
 	.stop		= xscale1pmu_stop,
-	.cache_map	= &xscale_perf_cache_map,
-	.event_map	= &xscale_perf_map,
-	.raw_event_mask	= 0xFF,
+	.map_event	= xscale_map_event,
 	.num_events	= 3,
 	.max_period	= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init xscale1pmu_init(void)
+static struct arm_pmu *__init xscale1pmu_init(void)
 {
 	return &xscale1pmu;
 }
@@ -560,7 +565,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 {
 	unsigned long pmnc, of_flags;
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -581,13 +586,10 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
 		if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
 			continue;
 
@@ -598,7 +600,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	irq_work_run();
@@ -616,6 +618,7 @@ static void
 xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags, ien, evtsel;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	ien = xscale2pmu_read_int_enable();
 	evtsel = xscale2pmu_read_event_select();
@@ -649,16 +652,17 @@ xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	xscale2pmu_write_event_select(evtsel);
 	xscale2pmu_write_int_enable(ien);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags, ien, evtsel;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	ien = xscale2pmu_read_int_enable();
 	evtsel = xscale2pmu_read_event_select();
@@ -692,14 +696,14 @@ xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	xscale2pmu_write_event_select(evtsel);
 	xscale2pmu_write_int_enable(ien);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
-xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
+xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc,
 			struct hw_perf_event *event)
 {
 	int idx = xscale1pmu_get_event_idx(cpuc, event);
@@ -718,24 +722,26 @@ static void
 xscale2pmu_start(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
 	val |= XSCALE_PMU_ENABLE;
 	xscale2pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale2pmu_stop(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale2pmu_read_pmnc();
 	val &= ~XSCALE_PMU_ENABLE;
 	xscale2pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static inline u32
@@ -786,7 +792,7 @@ xscale2pmu_write_counter(int counter, u32 val)
 	}
 }
 
-static const struct arm_pmu xscale2pmu = {
+static struct arm_pmu xscale2pmu = {
 	.id		= ARM_PERF_PMU_ID_XSCALE2,
 	.name		= "xscale2",
 	.handle_irq	= xscale2pmu_handle_irq,
@@ -797,24 +803,22 @@ static const struct arm_pmu xscale2pmu = {
 	.get_event_idx	= xscale2pmu_get_event_idx,
 	.start		= xscale2pmu_start,
 	.stop		= xscale2pmu_stop,
-	.cache_map	= &xscale_perf_cache_map,
-	.event_map	= &xscale_perf_map,
-	.raw_event_mask	= 0xFF,
+	.map_event	= xscale_map_event,
 	.num_events	= 5,
 	.max_period	= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init xscale2pmu_init(void)
+static struct arm_pmu *__init xscale2pmu_init(void)
 {
 	return &xscale2pmu;
 }
 #else
-static const struct arm_pmu *__init xscale1pmu_init(void)
+static struct arm_pmu *__init xscale1pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init xscale2pmu_init(void)
+static struct arm_pmu *__init xscale2pmu_init(void)
 {
 	return NULL;
 }

+ 8 - 174
arch/arm/kernel/pmu.c

@@ -10,192 +10,26 @@
  *
  */
 
-#define pr_fmt(fmt) "PMU: " fmt
-
-#include <linux/cpumask.h>
 #include <linux/err.h>
-#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/platform_device.h>
 
 #include <asm/pmu.h>
 
-static volatile long pmu_lock;
-
-static struct platform_device *pmu_devices[ARM_NUM_PMU_DEVICES];
-
-static int __devinit pmu_register(struct platform_device *pdev,
-					enum arm_pmu_type type)
-{
-	if (type < 0 || type >= ARM_NUM_PMU_DEVICES) {
-		pr_warning("received registration request for unknown "
-				"PMU device type %d\n", type);
-		return -EINVAL;
-	}
-
-	if (pmu_devices[type]) {
-		pr_warning("rejecting duplicate registration of PMU device "
-			"type %d.", type);
-		return -ENOSPC;
-	}
-
-	pr_info("registered new PMU device of type %d\n", type);
-	pmu_devices[type] = pdev;
-	return 0;
-}
-
-#define OF_MATCH_PMU(_name, _type) {	\
-	.compatible = _name,		\
-	.data = (void *)_type,		\
-}
-
-#define OF_MATCH_CPU(name)	OF_MATCH_PMU(name, ARM_PMU_DEVICE_CPU)
-
-static struct of_device_id armpmu_of_device_ids[] = {
-	OF_MATCH_CPU("arm,cortex-a9-pmu"),
-	OF_MATCH_CPU("arm,cortex-a8-pmu"),
-	OF_MATCH_CPU("arm,arm1136-pmu"),
-	OF_MATCH_CPU("arm,arm1176-pmu"),
-	{},
-};
-
-#define PLAT_MATCH_PMU(_name, _type) {	\
-	.name		= _name,	\
-	.driver_data	= _type,	\
-}
-
-#define PLAT_MATCH_CPU(_name)	PLAT_MATCH_PMU(_name, ARM_PMU_DEVICE_CPU)
-
-static struct platform_device_id armpmu_plat_device_ids[] = {
-	PLAT_MATCH_CPU("arm-pmu"),
-	{},
-};
-
-enum arm_pmu_type armpmu_device_type(struct platform_device *pdev)
-{
-	const struct of_device_id	*of_id;
-	const struct platform_device_id *pdev_id;
-
-	/* provided by of_device_id table */
-	if (pdev->dev.of_node) {
-		of_id = of_match_device(armpmu_of_device_ids, &pdev->dev);
-		BUG_ON(!of_id);
-		return (enum arm_pmu_type)of_id->data;
-	}
-
-	/* Provided by platform_device_id table */
-	pdev_id = platform_get_device_id(pdev);
-	BUG_ON(!pdev_id);
-	return pdev_id->driver_data;
-}
-
-static int __devinit armpmu_device_probe(struct platform_device *pdev)
-{
-	return pmu_register(pdev, armpmu_device_type(pdev));
-}
-
-static struct platform_driver armpmu_driver = {
-	.driver		= {
-		.name	= "arm-pmu",
-		.of_match_table = armpmu_of_device_ids,
-	},
-	.probe		= armpmu_device_probe,
-	.id_table	= armpmu_plat_device_ids,
-};
-
-static int __init register_pmu_driver(void)
-{
-	return platform_driver_register(&armpmu_driver);
-}
-device_initcall(register_pmu_driver);
+/*
+ * PMU locking to ensure mutual exclusion between different subsystems.
+ */
+static unsigned long pmu_lock[BITS_TO_LONGS(ARM_NUM_PMU_DEVICES)];
 
-struct platform_device *
+int
 reserve_pmu(enum arm_pmu_type type)
 {
-	struct platform_device *pdev;
-
-	if (test_and_set_bit_lock(type, &pmu_lock)) {
-		pdev = ERR_PTR(-EBUSY);
-	} else if (pmu_devices[type] == NULL) {
-		clear_bit_unlock(type, &pmu_lock);
-		pdev = ERR_PTR(-ENODEV);
-	} else {
-		pdev = pmu_devices[type];
-	}
-
-	return pdev;
+	return test_and_set_bit_lock(type, pmu_lock) ? -EBUSY : 0;
 }
 EXPORT_SYMBOL_GPL(reserve_pmu);
 
-int
+void
 release_pmu(enum arm_pmu_type type)
 {
-	if (WARN_ON(!pmu_devices[type]))
-		return -EINVAL;
-	clear_bit_unlock(type, &pmu_lock);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(release_pmu);
-
-static int
-set_irq_affinity(int irq,
-		 unsigned int cpu)
-{
-#ifdef CONFIG_SMP
-	int err = irq_set_affinity(irq, cpumask_of(cpu));
-	if (err)
-		pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-			   irq, cpu);
-	return err;
-#else
-	return -EINVAL;
-#endif
-}
-
-static int
-init_cpu_pmu(void)
-{
-	int i, irqs, err = 0;
-	struct platform_device *pdev = pmu_devices[ARM_PMU_DEVICE_CPU];
-
-	if (!pdev)
-		return -ENODEV;
-
-	irqs = pdev->num_resources;
-
-	/*
-	 * If we have a single PMU interrupt that we can't shift, assume that
-	 * we're running on a uniprocessor machine and continue.
-	 */
-	if (irqs == 1 && !irq_can_set_affinity(platform_get_irq(pdev, 0)))
-		return 0;
-
-	for (i = 0; i < irqs; ++i) {
-		err = set_irq_affinity(platform_get_irq(pdev, i), i);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-int
-init_pmu(enum arm_pmu_type type)
-{
-	int err = 0;
-
-	switch (type) {
-	case ARM_PMU_DEVICE_CPU:
-		err = init_cpu_pmu();
-		break;
-	default:
-		pr_warning("attempt to initialise PMU of unknown "
-			   "type %d\n", type);
-		err = -EINVAL;
-	}
-
-	return err;
+	clear_bit_unlock(type, pmu_lock);
 }
-EXPORT_SYMBOL_GPL(init_pmu);

+ 2 - 19
arch/arm/kernel/setup.c

@@ -849,25 +849,8 @@ static struct machine_desc * __init setup_machine_tags(unsigned int nr)
 
 	if (__atags_pointer)
 		tags = phys_to_virt(__atags_pointer);
-	else if (mdesc->boot_params) {
-#ifdef CONFIG_MMU
-		/*
-		 * We still are executing with a minimal MMU mapping created
-		 * with the presumption that the machine default for this
-		 * is located in the first MB of RAM.  Anything else will
-		 * fault and silently hang the kernel at this point.
-		 */
-		if (mdesc->boot_params < PHYS_OFFSET ||
-		    mdesc->boot_params >= PHYS_OFFSET + SZ_1M) {
-			printk(KERN_WARNING
-			       "Default boot params at physical 0x%08lx out of reach\n",
-			       mdesc->boot_params);
-		} else
-#endif
-		{
-			tags = phys_to_virt(mdesc->boot_params);
-		}
-	}
+	else if (mdesc->atag_offset)
+		tags = (void *)(PAGE_OFFSET + mdesc->atag_offset);
 
 #if defined(CONFIG_DEPRECATED_PARAM_STRUCT)
 	/*

+ 27 - 58
arch/arm/kernel/sleep.S

@@ -8,92 +8,61 @@
 	.text
 
 /*
- * Save CPU state for a suspend
- *  r1 = v:p offset
- *  r2 = suspend function arg0
- *  r3 = suspend function
+ * Save CPU state for a suspend.  This saves the CPU general purpose
+ * registers, and allocates space on the kernel stack to save the CPU
+ * specific registers and some other data for resume.
+ *  r0 = suspend function arg0
+ *  r1 = suspend function
  */
 ENTRY(__cpu_suspend)
 	stmfd	sp!, {r4 - r11, lr}
 #ifdef MULTI_CPU
 	ldr	r10, =processor
-	ldr	r5, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
-	ldr	ip, [r10, #CPU_DO_RESUME] @ virtual resume function
+	ldr	r4, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
 #else
-	ldr	r5, =cpu_suspend_size
-	ldr	ip, =cpu_do_resume
+	ldr	r4, =cpu_suspend_size
 #endif
-	mov	r6, sp			@ current virtual SP
-	sub	sp, sp, r5		@ allocate CPU state on stack
-	mov	r0, sp			@ save pointer to CPU save block
-	add	ip, ip, r1		@ convert resume fn to phys
-	stmfd	sp!, {r1, r6, ip}	@ save v:p, virt SP, phys resume fn
-	ldr	r5, =sleep_save_sp
-	add	r6, sp, r1		@ convert SP to phys
-	stmfd	sp!, {r2, r3}		@ save suspend func arg and pointer
+	mov	r5, sp			@ current virtual SP
+	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
+	sub	sp, sp, r4		@ allocate CPU state on stack
+	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
+	add	r0, sp, #8		@ save pointer to save block
+	mov	r1, r4			@ size of save block
+	mov	r2, r5			@ virtual SP
+	ldr	r3, =sleep_save_sp
 #ifdef CONFIG_SMP
 	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
 	ALT_UP(mov lr, #0)
 	and	lr, lr, #15
-	str	r6, [r5, lr, lsl #2]	@ save phys SP
-#else
-	str	r6, [r5]		@ save phys SP
-#endif
-#ifdef MULTI_CPU
-	mov	lr, pc
-	ldr	pc, [r10, #CPU_DO_SUSPEND] @ save CPU state
-#else
-	bl	cpu_do_suspend
-#endif
-
-	@ flush data cache
-#ifdef MULTI_CACHE
-	ldr	r10, =cpu_cache
-	mov	lr, pc
-	ldr	pc, [r10, #CACHE_FLUSH_KERN_ALL]
-#else
-	bl	__cpuc_flush_kern_all
+	add	r3, r3, lr, lsl #2
 #endif
+	bl	__cpu_suspend_save
 	adr	lr, BSYM(cpu_suspend_abort)
 	ldmfd	sp!, {r0, pc}		@ call suspend fn
 ENDPROC(__cpu_suspend)
 	.ltorg
 
 cpu_suspend_abort:
-	ldmia	sp!, {r1 - r3}		@ pop v:p, virt SP, phys resume fn
+	ldmia	sp!, {r1 - r3}		@ pop phys pgd, virt SP, phys resume fn
+	teq	r0, #0
+	moveq	r0, #1			@ force non-zero value
 	mov	sp, r2
 	ldmfd	sp!, {r4 - r11, pc}
 ENDPROC(cpu_suspend_abort)
 
 /*
  * r0 = control register value
- * r1 = v:p offset (preserved by cpu_do_resume)
- * r2 = phys page table base
- * r3 = L1 section flags
  */
+	.align	5
 ENTRY(cpu_resume_mmu)
-	adr	r4, cpu_resume_turn_mmu_on
-	mov	r4, r4, lsr #20
-	orr	r3, r3, r4, lsl #20
-	ldr	r5, [r2, r4, lsl #2]	@ save old mapping
-	str	r3, [r2, r4, lsl #2]	@ setup 1:1 mapping for mmu code
-	sub	r2, r2, r1
 	ldr	r3, =cpu_resume_after_mmu
-	bic	r1, r0, #CR_C		@ ensure D-cache is disabled
-	b	cpu_resume_turn_mmu_on
-ENDPROC(cpu_resume_mmu)
-	.ltorg
-	.align	5
-cpu_resume_turn_mmu_on:
-	mcr	p15, 0, r1, c1, c0, 0	@ turn on MMU, I-cache, etc
-	mrc	p15, 0, r1, c0, c0, 0	@ read id reg
-	mov	r1, r1
-	mov	r1, r1
+	mcr	p15, 0, r0, c1, c0, 0	@ turn on MMU, I-cache, etc
+	mrc	p15, 0, r0, c0, c0, 0	@ read id reg
+	mov	r0, r0
+	mov	r0, r0
 	mov	pc, r3			@ jump to virtual address
-ENDPROC(cpu_resume_turn_mmu_on)
+ENDPROC(cpu_resume_mmu)
 cpu_resume_after_mmu:
-	str	r5, [r2, r4, lsl #2]	@ restore old mapping
-	mcr	p15, 0, r0, c1, c0, 0	@ turn on D-cache
 	bl	cpu_init		@ restore the und/abt/irq banked regs
 	mov	r0, #0			@ return zero on success
 	ldmfd	sp!, {r4 - r11, pc}
@@ -119,7 +88,7 @@ ENTRY(cpu_resume)
 	ldr	r0, sleep_save_sp	@ stack phys addr
 #endif
 	setmode	PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1  @ set SVC, irqs off
-	@ load v:p, stack, resume fn
+	@ load phys pgd, stack, resume fn
   ARM(	ldmia	r0!, {r1, sp, pc}	)
 THUMB(	ldmia	r0!, {r1, r2, r3}	)
 THUMB(	mov	sp, r2			)

+ 1 - 37
arch/arm/kernel/smp.c

@@ -460,10 +460,6 @@ u64 smp_irq_stat_cpu(unsigned int cpu)
 	for (i = 0; i < NR_IPI; i++)
 		sum += __get_irq_stat(cpu, ipi_irqs[i]);
 
-#ifdef CONFIG_LOCAL_TIMERS
-	sum += __get_irq_stat(cpu, local_timer_irqs);
-#endif
-
 	return sum;
 }
 
@@ -480,38 +476,6 @@ static void ipi_timer(void)
 	irq_exit();
 }
 
-#ifdef CONFIG_LOCAL_TIMERS
-asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs)
-{
-	handle_local_timer(regs);
-}
-
-void handle_local_timer(struct pt_regs *regs)
-{
-	struct pt_regs *old_regs = set_irq_regs(regs);
-	int cpu = smp_processor_id();
-
-	if (local_timer_ack()) {
-		__inc_irq_stat(cpu, local_timer_irqs);
-		ipi_timer();
-	}
-
-	set_irq_regs(old_regs);
-}
-
-void show_local_irqs(struct seq_file *p, int prec)
-{
-	unsigned int cpu;
-
-	seq_printf(p, "%*s: ", prec, "LOC");
-
-	for_each_present_cpu(cpu)
-		seq_printf(p, "%10u ", __get_irq_stat(cpu, local_timer_irqs));
-
-	seq_printf(p, " Local timer interrupts\n");
-}
-#endif
-
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 static void smp_timer_broadcast(const struct cpumask *mask)
 {
@@ -562,7 +526,7 @@ static void percpu_timer_stop(void)
 	unsigned int cpu = smp_processor_id();
 	struct clock_event_device *evt = &per_cpu(percpu_clockevent, cpu);
 
-	evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
+	local_timer_stop(evt);
 }
 #endif
 

+ 45 - 2
arch/arm/kernel/smp_twd.c

@@ -19,6 +19,7 @@
 #include <linux/io.h>
 
 #include <asm/smp_twd.h>
+#include <asm/localtimer.h>
 #include <asm/hardware/gic.h>
 
 /* set up by the platform code */
@@ -26,6 +27,8 @@ void __iomem *twd_base;
 
 static unsigned long twd_timer_rate;
 
+static struct clock_event_device __percpu **twd_evt;
+
 static void twd_set_mode(enum clock_event_mode mode,
 			struct clock_event_device *clk)
 {
@@ -80,6 +83,12 @@ int twd_timer_ack(void)
 	return 0;
 }
 
+void twd_timer_stop(struct clock_event_device *clk)
+{
+	twd_set_mode(CLOCK_EVT_MODE_UNUSED, clk);
+	disable_percpu_irq(clk->irq);
+}
+
 static void __cpuinit twd_calibrate_rate(void)
 {
 	unsigned long count;
@@ -119,11 +128,43 @@ static void __cpuinit twd_calibrate_rate(void)
 	}
 }
 
+static irqreturn_t twd_handler(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = *(struct clock_event_device **)dev_id;
+
+	if (twd_timer_ack()) {
+		evt->event_handler(evt);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
 /*
  * Setup the local clock events for a CPU.
  */
 void __cpuinit twd_timer_setup(struct clock_event_device *clk)
 {
+	struct clock_event_device **this_cpu_clk;
+
+	if (!twd_evt) {
+		int err;
+
+		twd_evt = alloc_percpu(struct clock_event_device *);
+		if (!twd_evt) {
+			pr_err("twd: can't allocate memory\n");
+			return;
+		}
+
+		err = request_percpu_irq(clk->irq, twd_handler,
+					 "twd", twd_evt);
+		if (err) {
+			pr_err("twd: can't register interrupt %d (%d)\n",
+			       clk->irq, err);
+			return;
+		}
+	}
+
 	twd_calibrate_rate();
 
 	clk->name = "local_timer";
@@ -137,8 +178,10 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk)
 	clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk);
 	clk->min_delta_ns = clockevent_delta2ns(0xf, clk);
 
+	this_cpu_clk = __this_cpu_ptr(twd_evt);
+	*this_cpu_clk = clk;
+
 	clockevents_register_device(clk);
 
-	/* Make sure our local interrupt controller has this enabled */
-	gic_enable_ppi(clk->irq);
+	enable_percpu_irq(clk->irq, 0);
 }

+ 72 - 0
arch/arm/kernel/suspend.c

@@ -0,0 +1,72 @@
+#include <linux/init.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/memory.h>
+#include <asm/suspend.h>
+#include <asm/tlbflush.h>
+
+static pgd_t *suspend_pgd;
+
+extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
+extern void cpu_resume_mmu(void);
+
+/*
+ * This is called by __cpu_suspend() to save the state, and do whatever
+ * flushing is required to ensure that when the CPU goes to sleep we have
+ * the necessary data available when the caches are not searched.
+ */
+void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
+{
+	*save_ptr = virt_to_phys(ptr);
+
+	/* This must correspond to the LDM in cpu_resume() assembly */
+	*ptr++ = virt_to_phys(suspend_pgd);
+	*ptr++ = sp;
+	*ptr++ = virt_to_phys(cpu_do_resume);
+
+	cpu_do_suspend(ptr);
+
+	flush_cache_all();
+	outer_clean_range(*save_ptr, *save_ptr + ptrsz);
+	outer_clean_range(virt_to_phys(save_ptr),
+			  virt_to_phys(save_ptr) + sizeof(*save_ptr));
+}
+
+/*
+ * Hide the first two arguments to __cpu_suspend - these are an implementation
+ * detail which platform code shouldn't have to know about.
+ */
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	struct mm_struct *mm = current->active_mm;
+	int ret;
+
+	if (!suspend_pgd)
+		return -EINVAL;
+
+	/*
+	 * Provide a temporary page table with an identity mapping for
+	 * the MMU-enable code, required for resuming.  On successful
+	 * resume (indicated by a zero return code), we need to switch
+	 * back to the correct page tables.
+	 */
+	ret = __cpu_suspend(arg, fn);
+	if (ret == 0) {
+		cpu_switch_mm(mm->pgd, mm);
+		local_flush_tlb_all();
+	}
+
+	return ret;
+}
+
+static int __init cpu_suspend_init(void)
+{
+	suspend_pgd = pgd_alloc(&init_mm);
+	if (suspend_pgd) {
+		unsigned long addr = virt_to_phys(cpu_resume_mmu);
+		identity_mapping_add(suspend_pgd, addr, addr + SECTION_SIZE);
+	}
+	return suspend_pgd ? 0 : -ENOMEM;
+}
+core_initcall(cpu_suspend_init);

+ 2 - 0
arch/arm/mach-at91/at91sam9g45.c

@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 #include <linux/pm.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/irq.h>
 #include <asm/mach/arch.h>
@@ -319,6 +320,7 @@ static void at91sam9g45_poweroff(void)
 static void __init at91sam9g45_map_io(void)
 {
 	at91_init_sram(0, AT91SAM9G45_SRAM_BASE, AT91SAM9G45_SRAM_SIZE);
+	init_consistent_dma_size(SZ_4M);
 }
 
 static void __init at91sam9g45_initialize(void)

+ 0 - 2
arch/arm/mach-at91/include/mach/at91sam9g45.h

@@ -128,8 +128,6 @@
 #define AT91SAM9G45_EHCI_BASE	0x00800000	/* USB Host controller (EHCI) */
 #define AT91SAM9G45_VDEC_BASE	0x00900000	/* Video Decoder Controller */
 
-#define CONSISTENT_DMA_SIZE	SZ_4M
-
 /*
  * DMA peripheral identifiers
  * for hardware handshaking interface

+ 1 - 1
arch/arm/mach-at91/include/mach/debug-macro.S

@@ -14,7 +14,7 @@
 #include <mach/hardware.h>
 #include <mach/at91_dbgu.h>
 
-	.macro	addruart, rp, rv
+	.macro	addruart, rp, rv, tmp
 	ldr	\rp, =(AT91_BASE_SYS + AT91_DBGU)	@ System peripherals (phys address)
 	ldr	\rv, =(AT91_VA_BASE_SYS	+ AT91_DBGU)	@ System peripherals (virt address)
 	.endm

+ 1 - 2
arch/arm/mach-bcmring/include/mach/hardware.h

@@ -22,7 +22,6 @@
 #define __ASM_ARCH_HARDWARE_H
 
 #include <asm/sizes.h>
-#include <mach/memory.h>
 #include <cfg_global.h>
 #include <mach/csp/mm_io.h>
 
@@ -31,7 +30,7 @@
  *  *_SIZE  is the size of the region
  *  *_BASE  is the virtual address
  */
-#define RAM_START               PLAT_PHYS_OFFSET
+#define RAM_START               PHYS_OFFSET
 
 #define RAM_SIZE                (CFG_GLOBAL_RAM_SIZE-CFG_GLOBAL_RAM_SIZE_RESERVED)
 #define RAM_BASE                PAGE_OFFSET

+ 0 - 33
arch/arm/mach-bcmring/include/mach/memory.h

@@ -1,33 +0,0 @@
-/*****************************************************************************
-* Copyright 2005 - 2008 Broadcom Corporation.  All rights reserved.
-*
-* Unless you and Broadcom execute a separate written software license
-* agreement governing use of this software, this software is licensed to you
-* under the terms of the GNU General Public License version 2, available at
-* http://www.broadcom.com/licenses/GPLv2.php (the "GPL").
-*
-* Notwithstanding the above, under no circumstances may you combine this
-* software in any way with any other Broadcom software provided under a
-* license other than the GPL, without Broadcom's express prior written
-* consent.
-*****************************************************************************/
-
-#ifndef __ASM_ARCH_MEMORY_H
-#define __ASM_ARCH_MEMORY_H
-
-#include <cfg_global.h>
-
-/*
- * Physical vs virtual RAM address space conversion.  These are
- * private definitions which should NOT be used outside memory.h
- * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
- */
-
-#define PLAT_PHYS_OFFSET CFG_GLOBAL_RAM_BASE
-
-/*
- * Maximum DMA memory allowed is 14M
- */
-#define CONSISTENT_DMA_SIZE (SZ_16M - SZ_2M)
-
-#endif

+ 3 - 0
arch/arm/mach-bcmring/mm.c

@@ -13,6 +13,7 @@
 *****************************************************************************/
 
 #include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
 #include <asm/mach/map.h>
 
 #include <mach/hardware.h>
@@ -53,4 +54,6 @@ void __init bcmring_map_io(void)
 {
 
 	iotable_init(bcmring_io_desc, ARRAY_SIZE(bcmring_io_desc));
+	/* Maximum DMA memory allowed is 14M */
+	init_consistent_dma_size(14 << 20);
 }

+ 1 - 1
arch/arm/mach-clps711x/autcpu12.c

@@ -64,7 +64,7 @@ void __init autcpu12_map_io(void)
 
 MACHINE_START(AUTCPU12, "autronix autcpu12")
 	/* Maintainer: Thomas Gleixner */
-	.boot_params	= 0xc0020000,
+	.atag_offset	= 0x20000,
 	.map_io		= autcpu12_map_io,
 	.init_irq	= clps711x_init_irq,
 	.timer		= &clps711x_timer,

+ 1 - 1
arch/arm/mach-clps711x/cdb89712.c

@@ -55,7 +55,7 @@ static void __init cdb89712_map_io(void)
 
 MACHINE_START(CDB89712, "Cirrus-CDB89712")
 	/* Maintainer: Ray Lehtiniemi */
-	.boot_params	= 0xc0000100,
+	.atag_offset	= 0x100,
 	.map_io		= cdb89712_map_io,
 	.init_irq	= clps711x_init_irq,
 	.timer		= &clps711x_timer,

+ 1 - 1
arch/arm/mach-clps711x/ceiva.c

@@ -56,7 +56,7 @@ static void __init ceiva_map_io(void)
 
 MACHINE_START(CEIVA, "CEIVA/Polaroid Photo MAX Digital Picture Frame")
 	/* Maintainer: Rob Scott */
-	.boot_params	= 0xc0000100,
+	.atag_offset	= 0x100,
 	.map_io		= ceiva_map_io,
 	.init_irq	= clps711x_init_irq,
 	.timer		= &clps711x_timer,

+ 1 - 1
arch/arm/mach-clps711x/clep7312.c

@@ -36,7 +36,7 @@ fixup_clep7312(struct tag *tags, char **cmdline, struct meminfo *mi)
 
 MACHINE_START(CLEP7212, "Cirrus Logic 7212/7312")
 	/* Maintainer: Nobody */
-	.boot_params	= 0xc0000100,
+	.atag_offset	= 0x0100,
 	.fixup		= fixup_clep7312,
 	.map_io		= clps711x_map_io,
 	.init_irq	= clps711x_init_irq,

+ 1 - 1
arch/arm/mach-clps711x/edb7211-arch.c

@@ -56,7 +56,7 @@ fixup_edb7211(struct tag *tags, char **cmdline, struct meminfo *mi)
 
 MACHINE_START(EDB7211, "CL-EDB7211 (EP7211 eval board)")
 	/* Maintainer: Jon McClintock */
-	.boot_params	= 0xc0020100,	/* 0xc0000000 - 0xc001ffff can be video RAM */
+	.atag_offset	= 0x20100,	/* 0xc0000000 - 0xc001ffff can be video RAM */
 	.fixup		= fixup_edb7211,
 	.map_io		= edb7211_map_io,
 	.reserve	= edb7211_reserve,

+ 0 - 1
arch/arm/mach-clps711x/fortunet.c

@@ -74,7 +74,6 @@ fortunet_fixup(struct tag *tags, char **cmdline, struct meminfo *mi)
 
 MACHINE_START(FORTUNET, "ARM-FortuNet")
 	/* Maintainer: FortuNet Inc. */
-	.boot_params	= 0x00000000,
 	.fixup		= fortunet_fixup,
 	.map_io		= clps711x_map_io,
 	.init_irq	= clps711x_init_irq,

+ 1 - 1
arch/arm/mach-clps711x/include/mach/debug-macro.S

@@ -14,7 +14,7 @@
 #include <mach/hardware.h>
 #include <asm/hardware/clps7111.h>
 
-		.macro	addruart, rp, rv
+		.macro	addruart, rp, rv, tmp
 #ifndef CONFIG_DEBUG_CLPS711X_UART2
 		mov	\rp, #0x0000	@ UART1
 #else

+ 1 - 1
arch/arm/mach-clps711x/p720t.c

@@ -88,7 +88,7 @@ static void __init p720t_map_io(void)
 
 MACHINE_START(P720T, "ARM-Prospector720T")
 	/* Maintainer: ARM Ltd/Deep Blue Solutions Ltd */
-	.boot_params	= 0xc0000100,
+	.atag_offset	= 0x100,
 	.fixup		= fixup_p720t,
 	.map_io		= p720t_map_io,
 	.init_irq	= clps711x_init_irq,

+ 1 - 1
arch/arm/mach-cns3xxx/cns3420vb.c

@@ -197,7 +197,7 @@ static void __init cns3420_map_io(void)
 }
 
 MACHINE_START(CNS3420VB, "Cavium Networks CNS3420 Validation Board")
-	.boot_params	= 0x00000100,
+	.atag_offset	= 0x100,
 	.map_io		= cns3420_map_io,
 	.init_irq	= cns3xxx_init_irq,
 	.timer		= &cns3xxx_timer,

+ 1 - 1
arch/arm/mach-cns3xxx/include/mach/debug-macro.S

@@ -10,7 +10,7 @@
  * published by the Free Software Foundation.
  */
 
-		.macro	addruart,rp,rv
+		.macro	addruart,rp,rv,tmp
 		mov	\rp, #0x00009000
 		orr	\rv, \rp, #0xf0000000	@ virtual base
 		orr	\rp, \rp, #0x10000000

+ 0 - 26
arch/arm/mach-cns3xxx/include/mach/memory.h

@@ -1,26 +0,0 @@
-/*
- * Copyright 2003 ARM Limited
- * Copyright 2008 Cavium Networks
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, Version 2, as
- * published by the Free Software Foundation.
- */
-
-#ifndef __MACH_MEMORY_H
-#define __MACH_MEMORY_H
-
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET		UL(0x00000000)
-
-#define __phys_to_bus(x)	((x) + PHYS_OFFSET)
-#define __bus_to_phys(x)	((x) - PHYS_OFFSET)
-
-#define __virt_to_bus(v)	__phys_to_bus(__virt_to_phys(v))
-#define __bus_to_virt(b)	__phys_to_virt(__bus_to_phys(b))
-#define __pfn_to_bus(p)		__phys_to_bus(__pfn_to_phys(p))
-#define __bus_to_pfn(b)		__phys_to_pfn(__bus_to_phys(b))
-
-#endif

+ 1 - 1
arch/arm/mach-davinci/board-da830-evm.c

@@ -676,7 +676,7 @@ static void __init da830_evm_map_io(void)
 }
 
 MACHINE_START(DAVINCI_DA830_EVM, "DaVinci DA830/OMAP-L137/AM17x EVM")
-	.boot_params	= (DA8XX_DDR_BASE + 0x100),
+	.atag_offset	= 0x100,
 	.map_io		= da830_evm_map_io,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,

+ 1 - 1
arch/arm/mach-davinci/board-da850-evm.c

@@ -1291,7 +1291,7 @@ static void __init da850_evm_map_io(void)
 }
 
 MACHINE_START(DAVINCI_DA850_EVM, "DaVinci DA850/OMAP-L138/AM18x EVM")
-	.boot_params	= (DA8XX_DDR_BASE + 0x100),
+	.atag_offset	= 0x100,
 	.map_io		= da850_evm_map_io,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,

+ 1 - 1
arch/arm/mach-davinci/board-dm355-evm.c

@@ -351,7 +351,7 @@ static __init void dm355_evm_init(void)
 }
 
 MACHINE_START(DAVINCI_DM355_EVM, "DaVinci DM355 EVM")
-	.boot_params  = (0x80000100),
+	.atag_offset  = 0x100,
 	.map_io	      = dm355_evm_map_io,
 	.init_irq     = davinci_irq_init,
 	.timer	      = &davinci_timer,

+ 1 - 1
arch/arm/mach-davinci/board-dm355-leopard.c

@@ -270,7 +270,7 @@ static __init void dm355_leopard_init(void)
 }
 
 MACHINE_START(DM355_LEOPARD, "DaVinci DM355 leopard")
-	.boot_params  = (0x80000100),
+	.atag_offset  = 0x100,
 	.map_io	      = dm355_leopard_map_io,
 	.init_irq     = davinci_irq_init,
 	.timer	      = &davinci_timer,

+ 1 - 1
arch/arm/mach-davinci/board-dm365-evm.c

@@ -612,7 +612,7 @@ static __init void dm365_evm_init(void)
 }
 
 MACHINE_START(DAVINCI_DM365_EVM, "DaVinci DM365 EVM")
-	.boot_params	= (0x80000100),
+	.atag_offset	= 0x100,
 	.map_io		= dm365_evm_map_io,
 	.init_irq	= davinci_irq_init,
 	.timer		= &davinci_timer,

+ 1 - 1
arch/arm/mach-davinci/board-dm644x-evm.c

@@ -712,7 +712,7 @@ static __init void davinci_evm_init(void)
 
 MACHINE_START(DAVINCI_EVM, "DaVinci DM644x EVM")
 	/* Maintainer: MontaVista Software <source@mvista.com> */
-	.boot_params  = (DAVINCI_DDR_BASE + 0x100),
+	.atag_offset  = 0x100,
 	.map_io	      = davinci_evm_map_io,
 	.init_irq     = davinci_irq_init,
 	.timer	      = &davinci_timer,

+ 2 - 2
arch/arm/mach-davinci/board-dm646x-evm.c

@@ -792,7 +792,7 @@ static __init void evm_init(void)
 }
 
 MACHINE_START(DAVINCI_DM6467_EVM, "DaVinci DM646x EVM")
-	.boot_params  = (0x80000100),
+	.atag_offset  = 0x100,
 	.map_io       = davinci_map_io,
 	.init_irq     = davinci_irq_init,
 	.timer        = &davinci_timer,
@@ -801,7 +801,7 @@ MACHINE_START(DAVINCI_DM6467_EVM, "DaVinci DM646x EVM")
 MACHINE_END
 
 MACHINE_START(DAVINCI_DM6467TEVM, "DaVinci DM6467T EVM")
-	.boot_params  = (0x80000100),
+	.atag_offset  = 0x100,
 	.map_io       = davinci_map_io,
 	.init_irq     = davinci_irq_init,
 	.timer        = &davinci_timer,

+ 1 - 1
arch/arm/mach-davinci/board-mityomapl138.c

@@ -566,7 +566,7 @@ static void __init mityomapl138_map_io(void)
 }
 
 MACHINE_START(MITYOMAPL138, "MityDSP-L138/MityARM-1808")
-	.boot_params	= (DA8XX_DDR_BASE + 0x100),
+	.atag_offset	= 0x100,
 	.map_io		= mityomapl138_map_io,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,

+ 1 - 1
arch/arm/mach-davinci/board-neuros-osd2.c

@@ -272,7 +272,7 @@ static __init void davinci_ntosd2_init(void)
 
 MACHINE_START(NEUROS_OSD2, "Neuros OSD2")
 	/* Maintainer: Neuros Technologies <neuros@groups.google.com> */
-	.boot_params	= (DAVINCI_DDR_BASE + 0x100),
+	.atag_offset	= 0x100,
 	.map_io		 = davinci_ntosd2_map_io,
 	.init_irq	= davinci_irq_init,
 	.timer		= &davinci_timer,

+ 1 - 1
arch/arm/mach-davinci/board-omapl138-hawk.c

@@ -338,7 +338,7 @@ static void __init omapl138_hawk_map_io(void)
 }
 
 MACHINE_START(OMAPL138_HAWKBOARD, "AM18x/OMAP-L138 Hawkboard")
-	.boot_params	= (DA8XX_DDR_BASE + 0x100),
+	.atag_offset	= 0x100,
 	.map_io		= omapl138_hawk_map_io,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,

+ 1 - 1
arch/arm/mach-davinci/board-sffsdr.c

@@ -151,7 +151,7 @@ static __init void davinci_sffsdr_init(void)
 
 MACHINE_START(SFFSDR, "Lyrtech SFFSDR")
 	/* Maintainer: Hugo Villeneuve hugo.villeneuve@lyrtech.com */
-	.boot_params  = (DAVINCI_DDR_BASE + 0x100),
+	.atag_offset  = 0x100,
 	.map_io	      = davinci_sffsdr_map_io,
 	.init_irq     = davinci_irq_init,
 	.timer	      = &davinci_timer,

+ 1 - 1
arch/arm/mach-davinci/board-tnetv107x-evm.c

@@ -277,7 +277,7 @@ console_initcall(tnetv107x_evm_console_init);
 #endif
 
 MACHINE_START(TNETV107X, "TNETV107X EVM")
-	.boot_params	= (TNETV107X_DDR_BASE + 0x100),
+	.atag_offset	= 0x100,
 	.map_io		= tnetv107x_init,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,

+ 3 - 0
arch/arm/mach-davinci/common.c

@@ -12,6 +12,7 @@
 #include <linux/io.h>
 #include <linux/etherdevice.h>
 #include <linux/davinci_emac.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/tlb.h>
 #include <asm/mach/map.h>
@@ -86,6 +87,8 @@ void __init davinci_common_init(struct davinci_soc_info *soc_info)
 		iotable_init(davinci_soc_info.io_desc,
 				davinci_soc_info.io_desc_num);
 
+	init_consistent_dma_size(14 << 20);
+
 	/*
 	 * Normally devicemaps_init() would flush caches and tlb after
 	 * mdesc->map_io(), but we must also do it here because of the CPU

+ 1 - 1
arch/arm/mach-davinci/cpuidle.c

@@ -19,7 +19,7 @@
 #include <asm/proc-fns.h>
 
 #include <mach/cpuidle.h>
-#include <mach/memory.h>
+#include <mach/ddr2.h>
 
 #define DAVINCI_CPUIDLE_MAX_STATES	2
 

+ 4 - 0
arch/arm/mach-davinci/include/mach/ddr2.h

@@ -0,0 +1,4 @@
+#define DDR2_SDRCR_OFFSET	0xc
+#define DDR2_SRPD_BIT		(1 << 23)
+#define DDR2_MCLKSTOPEN_BIT	(1 << 30)
+#define DDR2_LPMODEN_BIT	(1 << 31)

+ 23 - 29
arch/arm/mach-davinci/include/mach/debug-macro.S

@@ -18,56 +18,50 @@
 
 #include <linux/serial_reg.h>
 
-#include <asm/memory.h>
-
 #include <mach/serial.h>
 
 #define UART_SHIFT	2
 
-#define davinci_uart_v2p(x)	((x) - PAGE_OFFSET + PLAT_PHYS_OFFSET)
-#define davinci_uart_p2v(x)	((x) - PLAT_PHYS_OFFSET + PAGE_OFFSET)
-
 		.pushsection .data
 davinci_uart_phys:	.word	0
 davinci_uart_virt:	.word	0
 		.popsection
 
-		.macro addruart, rp, rv
+		.macro addruart, rp, rv, tmp
 
 		/* Use davinci_uart_phys/virt if already configured */
-10:		mrc	p15, 0, \rp, c1, c0
-		tst	\rp, #1			@ MMU enabled?
-		ldreq	\rp, =davinci_uart_v2p(davinci_uart_phys)
-		ldrne	\rp, =davinci_uart_phys
-		add	\rv, \rp, #4		@ davinci_uart_virt
-		ldr	\rp, [\rp, #0]
-		ldr	\rv, [\rv, #0]
+10:		adr	\rp, 99f		@ get effective addr of 99f
+		ldr	\rv, [\rp]		@ get absolute addr of 99f
+		sub	\rv, \rv, \rp		@ offset between the two
+		ldr	\rp, [\rp, #4]		@ abs addr of omap_uart_phys
+		sub	\tmp, \rp, \rv		@ make it effective
+		ldr	\rp, [\tmp, #0]		@ davinci_uart_phys
+		ldr	\rv, [\tmp, #4]		@ davinci_uart_virt
 		cmp	\rp, #0			@ is port configured?
 		cmpne	\rv, #0
-		bne	99f			@ already configured
+		bne	100f			@ already configured
 
 		/* Check the debug UART address set in uncompress.h */
-		mrc	p15, 0, \rp, c1, c0
-		tst	\rp, #1			@ MMU enabled?
+		and	\rp, pc, #0xff000000
+		ldr	\rv, =DAVINCI_UART_INFO_OFS
+		add	\rp, \rp, \rv
 
 		/* Copy uart phys address from decompressor uart info */
-		ldreq	\rv, =davinci_uart_v2p(davinci_uart_phys)
-		ldrne	\rv, =davinci_uart_phys
-		ldreq	\rp, =DAVINCI_UART_INFO
-		ldrne	\rp, =davinci_uart_p2v(DAVINCI_UART_INFO)
-		ldr	\rp, [\rp, #0]
-		str	\rp, [\rv]
+		ldr	\rv, [\rp, #0]
+		str	\rv, [\tmp, #0]
 
 		/* Copy uart virt address from decompressor uart info */
-		ldreq	\rv, =davinci_uart_v2p(davinci_uart_virt)
-		ldrne	\rv, =davinci_uart_virt
-		ldreq	\rp, =DAVINCI_UART_INFO
-		ldrne	\rp, =davinci_uart_p2v(DAVINCI_UART_INFO)
-		ldr	\rp, [\rp, #4]
-		str	\rp, [\rv]
+		ldr	\rv, [\rp, #4]
+		str	\rv, [\tmp, #4]
 
 		b	10b
-99:
+
+		.align
+99:		.word	.
+		.word	davinci_uart_phys
+		.ltorg
+
+100:
 		.endm
 
 		.macro	senduart,rd,rx

+ 0 - 44
arch/arm/mach-davinci/include/mach/memory.h

@@ -1,44 +0,0 @@
-/*
- * DaVinci memory space definitions
- *
- * Author: Kevin Hilman, MontaVista Software, Inc. <source@mvista.com>
- *
- * 2007 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-#ifndef __ASM_ARCH_MEMORY_H
-#define __ASM_ARCH_MEMORY_H
-
-/**************************************************************************
- * Included Files
- **************************************************************************/
-#include <asm/page.h>
-#include <asm/sizes.h>
-
-/**************************************************************************
- * Definitions
- **************************************************************************/
-#define DAVINCI_DDR_BASE	0x80000000
-#define DA8XX_DDR_BASE		0xc0000000
-
-#if defined(CONFIG_ARCH_DAVINCI_DA8XX) && defined(CONFIG_ARCH_DAVINCI_DMx)
-#error Cannot enable DaVinci and DA8XX platforms concurrently
-#elif defined(CONFIG_ARCH_DAVINCI_DA8XX)
-#define PLAT_PHYS_OFFSET DA8XX_DDR_BASE
-#else
-#define PLAT_PHYS_OFFSET DAVINCI_DDR_BASE
-#endif
-
-#define DDR2_SDRCR_OFFSET	0xc
-#define DDR2_SRPD_BIT		BIT(23)
-#define DDR2_MCLKSTOPEN_BIT	BIT(30)
-#define DDR2_LPMODEN_BIT	BIT(31)
-
-/*
- * Increase size of DMA-consistent memory region
- */
-#define CONSISTENT_DMA_SIZE (14<<20)
-
-#endif /* __ASM_ARCH_MEMORY_H */

+ 2 - 1
arch/arm/mach-davinci/include/mach/serial.h

@@ -21,8 +21,9 @@
  * macros in debug-macro.S.
  *
  * This area sits just below the page tables (see arch/arm/kernel/head.S).
+ * We define it as a relative offset from start of usable RAM.
  */
-#define DAVINCI_UART_INFO	(PLAT_PHYS_OFFSET + 0x3ff8)
+#define DAVINCI_UART_INFO_OFS	0x3ff8
 
 #define DAVINCI_UART0_BASE	(IO_PHYS + 0x20000)
 #define DAVINCI_UART1_BASE	(IO_PHYS + 0x20400)

+ 6 - 1
arch/arm/mach-davinci/include/mach/uncompress.h

@@ -43,7 +43,12 @@ static inline void flush(void)
 
 static inline void set_uart_info(u32 phys, void * __iomem virt)
 {
-	u32 *uart_info = (u32 *)(DAVINCI_UART_INFO);
+	/*
+	 * Get address of some.bss variable and round it down
+	 * a la CONFIG_AUTO_ZRELADDR.
+	 */
+	u32 ram_start = (u32)&uart & 0xf8000000;
+	u32 *uart_info = (u32 *)(ram_start + DAVINCI_UART_INFO_OFS);
 
 	uart = (u32 *)phys;
 	uart_info[0] = phys;

+ 1 - 1
arch/arm/mach-davinci/sleep.S

@@ -22,7 +22,7 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <mach/psc.h>
-#include <mach/memory.h>
+#include <mach/ddr2.h>
 
 #include "clock.h"
 

+ 1 - 1
arch/arm/mach-dove/cm-a510.c

@@ -87,7 +87,7 @@ static void __init cm_a510_init(void)
 }
 
 MACHINE_START(CM_A510, "Compulab CM-A510 Board")
-	.boot_params	= 0x00000100,
+	.atag_offset	= 0x100,
 	.init_machine	= cm_a510_init,
 	.map_io		= dove_map_io,
 	.init_early	= dove_init_early,

+ 1 - 1
arch/arm/mach-dove/dove-db-setup.c

@@ -94,7 +94,7 @@ static void __init dove_db_init(void)
 }
 
 MACHINE_START(DOVE_DB, "Marvell DB-MV88AP510-BP Development Board")
-	.boot_params	= 0x00000100,
+	.atag_offset	= 0x100,
 	.init_machine	= dove_db_init,
 	.map_io		= dove_map_io,
 	.init_early	= dove_init_early,

+ 1 - 1
arch/arm/mach-dove/include/mach/debug-macro.S

@@ -8,7 +8,7 @@
 
 #include <mach/bridge-regs.h>
 
-	.macro	addruart, rp, rv
+	.macro	addruart, rp, rv, tmp
 	ldr	\rp, =DOVE_SB_REGS_PHYS_BASE
 	ldr	\rv, =DOVE_SB_REGS_VIRT_BASE
 	orr	\rp, \rp, #0x00012000

+ 0 - 10
arch/arm/mach-dove/include/mach/memory.h

@@ -1,10 +0,0 @@
-/*
- * arch/arm/mach-dove/include/mach/memory.h
- */
-
-#ifndef __ASM_ARCH_MEMORY_H
-#define __ASM_ARCH_MEMORY_H
-
-#define PLAT_PHYS_OFFSET		UL(0x00000000)
-
-#endif

+ 1 - 1
arch/arm/mach-ebsa110/core.c

@@ -280,7 +280,7 @@ arch_initcall(ebsa110_init);
 
 MACHINE_START(EBSA110, "EBSA110")
 	/* Maintainer: Russell King */
-	.boot_params	= 0x00000400,
+	.atag_offset	= 0x400,
 	.reserve_lp0	= 1,
 	.reserve_lp2	= 1,
 	.soft_reboot	= 1,

+ 1 - 1
arch/arm/mach-ebsa110/include/mach/debug-macro.S

@@ -11,7 +11,7 @@
  *
 **/
 
-		.macro	addruart, rp, rv
+		.macro	addruart, rp, rv, tmp
 		mov	\rp, #0xf0000000
 		orr	\rp, \rp, #0x00000be0
 		mov	\rp, \rv

+ 1 - 1
arch/arm/mach-ep93xx/adssphere.c

@@ -33,7 +33,7 @@ static void __init adssphere_init_machine(void)
 
 MACHINE_START(ADSSPHERE, "ADS Sphere board")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,

+ 8 - 8
arch/arm/mach-ep93xx/edb93xx.c

@@ -241,7 +241,7 @@ static void __init edb93xx_init_machine(void)
 #ifdef CONFIG_MACH_EDB9301
 MACHINE_START(EDB9301, "Cirrus Logic EDB9301 Evaluation Board")
 	/* Maintainer: H Hartley Sweeten <hsweeten@visionengravers.com> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -252,7 +252,7 @@ MACHINE_END
 #ifdef CONFIG_MACH_EDB9302
 MACHINE_START(EDB9302, "Cirrus Logic EDB9302 Evaluation Board")
 	/* Maintainer: George Kashperko <george@chas.com.ua> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -263,7 +263,7 @@ MACHINE_END
 #ifdef CONFIG_MACH_EDB9302A
 MACHINE_START(EDB9302A, "Cirrus Logic EDB9302A Evaluation Board")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -274,7 +274,7 @@ MACHINE_END
 #ifdef CONFIG_MACH_EDB9307
 MACHINE_START(EDB9307, "Cirrus Logic EDB9307 Evaluation Board")
 	/* Maintainer: Herbert Valerio Riedel <hvr@gnu.org> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -285,7 +285,7 @@ MACHINE_END
 #ifdef CONFIG_MACH_EDB9307A
 MACHINE_START(EDB9307A, "Cirrus Logic EDB9307A Evaluation Board")
 	/* Maintainer: H Hartley Sweeten <hsweeten@visionengravers.com> */
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -296,7 +296,7 @@ MACHINE_END
 #ifdef CONFIG_MACH_EDB9312
 MACHINE_START(EDB9312, "Cirrus Logic EDB9312 Evaluation Board")
 	/* Maintainer: Toufeeq Hussain <toufeeq_hussain@infosys.com> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -307,7 +307,7 @@ MACHINE_END
 #ifdef CONFIG_MACH_EDB9315
 MACHINE_START(EDB9315, "Cirrus Logic EDB9315 Evaluation Board")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -318,7 +318,7 @@ MACHINE_END
 #ifdef CONFIG_MACH_EDB9315A
 MACHINE_START(EDB9315A, "Cirrus Logic EDB9315A Evaluation Board")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,

+ 1 - 1
arch/arm/mach-ep93xx/gesbc9312.c

@@ -33,7 +33,7 @@ static void __init gesbc9312_init_machine(void)
 
 MACHINE_START(GESBC9312, "Glomation GESBC-9312-sx")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,

+ 1 - 1
arch/arm/mach-ep93xx/include/mach/debug-macro.S

@@ -11,7 +11,7 @@
  */
 #include <mach/ep93xx-regs.h>
 
-		.macro	addruart, rp, rv
+		.macro	addruart, rp, rv, tmp
 		ldr	\rp, =EP93XX_APB_PHYS_BASE	@ Physical base
 		ldr	\rv, =EP93XX_APB_VIRT_BASE	@ virtual base
 		orr	\rp, \rp, #0x000c0000

+ 4 - 4
arch/arm/mach-ep93xx/micro9.c

@@ -77,7 +77,7 @@ static void __init micro9_init_machine(void)
 #ifdef CONFIG_MACH_MICRO9H
 MACHINE_START(MICRO9, "Contec Micro9-High")
 	/* Maintainer: Hubert Feurstein <hubert.feurstein@contec.at> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -88,7 +88,7 @@ MACHINE_END
 #ifdef CONFIG_MACH_MICRO9M
 MACHINE_START(MICRO9M, "Contec Micro9-Mid")
 	/* Maintainer: Hubert Feurstein <hubert.feurstein@contec.at> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_ASYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -99,7 +99,7 @@ MACHINE_END
 #ifdef CONFIG_MACH_MICRO9L
 MACHINE_START(MICRO9L, "Contec Micro9-Lite")
 	/* Maintainer: Hubert Feurstein <hubert.feurstein@contec.at> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -110,7 +110,7 @@ MACHINE_END
 #ifdef CONFIG_MACH_MICRO9S
 MACHINE_START(MICRO9S, "Contec Micro9-Slim")
 	/* Maintainer: Hubert Feurstein <hubert.feurstein@contec.at> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_ASYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,

+ 2 - 2
arch/arm/mach-ep93xx/simone.c

@@ -65,8 +65,8 @@ static void __init simone_init_machine(void)
 }
 
 MACHINE_START(SIM_ONE, "Simplemachines Sim.One Board")
-/* Maintainer: Ryan Mallon */
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	/* Maintainer: Ryan Mallon */
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,

+ 1 - 1
arch/arm/mach-ep93xx/snappercl15.c

@@ -163,7 +163,7 @@ static void __init snappercl15_init_machine(void)
 
 MACHINE_START(SNAPPER_CL15, "Bluewater Systems Snapper CL15")
 	/* Maintainer: Ryan Mallon */
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer 		= &ep93xx_timer,

+ 1 - 1
arch/arm/mach-ep93xx/ts72xx.c

@@ -257,7 +257,7 @@ static void __init ts72xx_init_machine(void)
 
 MACHINE_START(TS72XX, "Technologic Systems TS-72xx SBC")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ts72xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,

Beberapa file tidak ditampilkan karena terlalu banyak file yang berubah dalam diff ini