Эх сурвалжийг харах

Merge branch 'for-linus' of master.kernel.org:/home/rmk/linux-2.6-arm

* 'for-linus' of master.kernel.org:/home/rmk/linux-2.6-arm: (237 commits)
  ARM: 7004/1: fix traps.h compile warnings
  ARM: 6998/2: kernel: use proper memory barriers for bitops
  ARM: 6997/1: ep93xx: increase NR_BANKS to 16 for support of 128MB RAM
  ARM: Fix build errors caused by adding generic macros
  ARM: CPU hotplug: ensure we migrate all IRQs off a downed CPU
  ARM: CPU hotplug: pass in proper affinity mask on IRQ migration
  ARM: GIC: avoid routing interrupts to offline CPUs
  ARM: CPU hotplug: fix abuse of irqdesc->node
  ARM: 6981/2: mmci: adjust calculation of f_min
  ARM: 7000/1: LPAE: Use long long printk format for displaying the pud
  ARM: 6999/1: head, zImage: Always Enter the kernel in ARM state
  ARM: btc: avoid invalidating the branch target cache on kernel TLB maintanence
  ARM: ARM_DMA_ZONE_SIZE is no more
  ARM: mach-shark: move ARM_DMA_ZONE_SIZE to mdesc->dma_zone_size
  ARM: mach-sa1100: move ARM_DMA_ZONE_SIZE to mdesc->dma_zone_size
  ARM: mach-realview: move from ARM_DMA_ZONE_SIZE to mdesc->dma_zone_size
  ARM: mach-pxa: move from ARM_DMA_ZONE_SIZE to mdesc->dma_zone_size
  ARM: mach-ixp4xx: move from ARM_DMA_ZONE_SIZE to mdesc->dma_zone_size
  ARM: mach-h720x: move from ARM_DMA_ZONE_SIZE to mdesc->dma_zone_size
  ARM: mach-davinci: move from ARM_DMA_ZONE_SIZE to mdesc->dma_zone_size
  ...
Linus Torvalds 14 жил өмнө
parent
commit
b6844e8f64
100 өөрчлөгдсөн 5857 нэмэгдсэн , 2752 устгасан
  1. 5 0
      Documentation/arm/Booting
  2. 42 0
      Documentation/arm/SH-Mobile/zboot-rom-sdhi.txt
  3. 267 0
      Documentation/arm/kernel_user_helpers.txt
  4. 21 0
      Documentation/devicetree/bindings/arm/pmu.txt
  5. 29 10
      arch/arm/Kconfig
  6. 8 2
      arch/arm/boot/compressed/Makefile
  7. 6 6
      arch/arm/boot/compressed/head-shmobile.S
  8. 2 1
      arch/arm/boot/compressed/head.S
  9. 1 1
      arch/arm/boot/compressed/mmcif-sh7372.c
  10. 95 0
      arch/arm/boot/compressed/sdhi-sh7372.c
  11. 449 0
      arch/arm/boot/compressed/sdhi-shmobile.c
  12. 11 0
      arch/arm/boot/compressed/sdhi-shmobile.h
  13. 8 4
      arch/arm/boot/compressed/vmlinux.lds.in
  14. 83 110
      arch/arm/common/dmabounce.c
  15. 3 4
      arch/arm/common/gic.c
  16. 7 9
      arch/arm/common/it8152.c
  17. 31 29
      arch/arm/common/sa1111.c
  18. 9 0
      arch/arm/include/asm/assembler.h
  19. 2 2
      arch/arm/include/asm/bitops.h
  20. 13 75
      arch/arm/include/asm/dma-mapping.h
  21. 6 5
      arch/arm/include/asm/dma.h
  22. 7 7
      arch/arm/include/asm/entry-macro-multi.S
  23. 20 16
      arch/arm/include/asm/hwcap.h
  24. 7 21
      arch/arm/include/asm/kprobes.h
  25. 4 0
      arch/arm/include/asm/mach/arch.h
  26. 0 12
      arch/arm/include/asm/memory.h
  27. 2 0
      arch/arm/include/asm/perf_event.h
  28. 1 1
      arch/arm/include/asm/pmu.h
  29. 7 7
      arch/arm/include/asm/proc-fns.h
  30. 10 1
      arch/arm/include/asm/ptrace.h
  31. 4 0
      arch/arm/include/asm/scatterlist.h
  32. 6 2
      arch/arm/include/asm/setup.h
  33. 22 0
      arch/arm/include/asm/suspend.h
  34. 2 0
      arch/arm/include/asm/tcm.h
  35. 10 48
      arch/arm/include/asm/tlbflush.h
  36. 3 0
      arch/arm/include/asm/traps.h
  37. 6 1
      arch/arm/kernel/Makefile
  38. 3 0
      arch/arm/kernel/asm-offsets.c
  39. 204 319
      arch/arm/kernel/entry-armv.S
  40. 5 26
      arch/arm/kernel/entry-header.S
  41. 8 0
      arch/arm/kernel/head-nommu.S
  42. 8 0
      arch/arm/kernel/head.S
  43. 5 7
      arch/arm/kernel/hw_breakpoint.c
  44. 30 21
      arch/arm/kernel/irq.c
  45. 999 0
      arch/arm/kernel/kprobes-arm.c
  46. 577 0
      arch/arm/kernel/kprobes-common.c
  47. 0 1670
      arch/arm/kernel/kprobes-decode.c
  48. 1462 0
      arch/arm/kernel/kprobes-thumb.c
  49. 201 21
      arch/arm/kernel/kprobes.c
  50. 420 0
      arch/arm/kernel/kprobes.h
  51. 8 2
      arch/arm/kernel/perf_event.c
  52. 324 20
      arch/arm/kernel/perf_event_v7.c
  53. 70 17
      arch/arm/kernel/pmu.c
  54. 3 25
      arch/arm/kernel/ptrace.c
  55. 60 49
      arch/arm/kernel/setup.c
  56. 37 47
      arch/arm/kernel/sleep.S
  57. 9 2
      arch/arm/kernel/smp.c
  58. 2 0
      arch/arm/kernel/smp_scu.c
  59. 57 11
      arch/arm/kernel/tcm.c
  60. 16 1
      arch/arm/kernel/traps.c
  61. 66 60
      arch/arm/kernel/vmlinux.lds.S
  62. 0 4
      arch/arm/mach-bcmring/include/mach/entry-macro.S
  63. 1 0
      arch/arm/mach-davinci/board-da830-evm.c
  64. 1 0
      arch/arm/mach-davinci/board-da850-evm.c
  65. 1 0
      arch/arm/mach-davinci/board-dm355-evm.c
  66. 1 0
      arch/arm/mach-davinci/board-dm355-leopard.c
  67. 1 0
      arch/arm/mach-davinci/board-dm365-evm.c
  68. 1 0
      arch/arm/mach-davinci/board-dm644x-evm.c
  69. 2 0
      arch/arm/mach-davinci/board-dm646x-evm.c
  70. 1 0
      arch/arm/mach-davinci/board-mityomapl138.c
  71. 1 0
      arch/arm/mach-davinci/board-neuros-osd2.c
  72. 1 0
      arch/arm/mach-davinci/board-omapl138-hawk.c
  73. 1 0
      arch/arm/mach-davinci/board-sffsdr.c
  74. 1 0
      arch/arm/mach-davinci/board-tnetv107x-evm.c
  75. 0 3
      arch/arm/mach-davinci/include/mach/entry-macro.S
  76. 0 7
      arch/arm/mach-davinci/include/mach/memory.h
  77. 0 8
      arch/arm/mach-exynos4/platsmp.c
  78. 1 1
      arch/arm/mach-exynos4/pm.c
  79. 0 22
      arch/arm/mach-exynos4/sleep.S
  80. 1 0
      arch/arm/mach-h720x/h7201-eval.c
  81. 1 0
      arch/arm/mach-h720x/h7202-eval.c
  82. 0 3
      arch/arm/mach-h720x/include/mach/entry-macro.S
  83. 0 7
      arch/arm/mach-h720x/include/mach/memory.h
  84. 6 0
      arch/arm/mach-ixp4xx/avila-setup.c
  85. 6 6
      arch/arm/mach-ixp4xx/common-pci.c
  86. 3 0
      arch/arm/mach-ixp4xx/coyote-setup.c
  87. 3 0
      arch/arm/mach-ixp4xx/dsmg600-setup.c
  88. 3 0
      arch/arm/mach-ixp4xx/fsg-setup.c
  89. 3 0
      arch/arm/mach-ixp4xx/gateway7001-setup.c
  90. 3 0
      arch/arm/mach-ixp4xx/goramo_mlr.c
  91. 3 0
      arch/arm/mach-ixp4xx/gtwx5715-setup.c
  92. 0 4
      arch/arm/mach-ixp4xx/include/mach/memory.h
  93. 12 0
      arch/arm/mach-ixp4xx/ixdp425-setup.c
  94. 3 0
      arch/arm/mach-ixp4xx/nas100d-setup.c
  95. 3 0
      arch/arm/mach-ixp4xx/nslu2-setup.c
  96. 3 0
      arch/arm/mach-ixp4xx/vulcan-setup.c
  97. 3 0
      arch/arm/mach-ixp4xx/wg302v2-setup.c
  98. 0 4
      arch/arm/mach-lpc32xx/include/mach/entry-macro.S
  99. 0 8
      arch/arm/mach-msm/platsmp.c
  100. 4 3
      arch/arm/mach-omap2/control.c

+ 5 - 0
Documentation/arm/Booting

@@ -164,3 +164,8 @@ In either case, the following conditions must be met:
 - The boot loader is expected to call the kernel image by jumping
 - The boot loader is expected to call the kernel image by jumping
   directly to the first instruction of the kernel image.
   directly to the first instruction of the kernel image.
 
 
+  On CPUs supporting the ARM instruction set, the entry must be
+  made in ARM state, even for a Thumb-2 kernel.
+
+  On CPUs supporting only the Thumb instruction set such as
+  Cortex-M class CPUs, the entry must be made in Thumb state.

+ 42 - 0
Documentation/arm/SH-Mobile/zboot-rom-sdhi.txt

@@ -0,0 +1,42 @@
+ROM-able zImage boot from eSD
+-----------------------------
+
+An ROM-able zImage compiled with ZBOOT_ROM_SDHI may be written to eSD and
+SuperH Mobile ARM will to boot directly from the SDHI hardware block.
+
+This is achieved by the mask ROM loading the first portion of the image into
+MERAM and then jumping to it. This portion contains loader code which
+copies the entire image to SDRAM and jumps to it. From there the zImage
+boot code proceeds as normal, uncompressing the image into its final
+location and then jumping to it.
+
+This code has been tested on an mackerel board using the developer 1A eSD
+boot mode which is configured using the following jumper settings.
+
+   8 7 6 5 4 3 2 1
+   x|x|x|x| |x|x|
+S4 -+-+-+-+-+-+-+-
+    | | | |x| | |x on
+
+The eSD card needs to be present in SDHI slot 1 (CN7).
+As such S1 and S33 also need to be configured as per
+the notes in arch/arm/mach-shmobile/board-mackerel.c.
+
+A partial zImage must be written to physical partition #1 (boot)
+of the eSD at sector 0 in vrl4 format. A utility vrl4 is supplied to
+accomplish this.
+
+e.g.
+	vrl4 < zImage | dd of=/dev/sdX bs=512 count=17
+
+A full copy of _the same_ zImage should be written to physical partition #1
+(boot) of the eSD at sector 0. This should _not_ be in vrl4 format.
+
+	vrl4 < zImage | dd of=/dev/sdX bs=512
+
+Note: The commands above assume that the physical partition has been
+switched. No such facility currently exists in the Linux Kernel.
+
+Physical partitions are described in the eSD specification.  At the time of
+writing they are not the same as partitions that are typically configured
+using fdisk and visible through /proc/partitions

+ 267 - 0
Documentation/arm/kernel_user_helpers.txt

@@ -0,0 +1,267 @@
+Kernel-provided User Helpers
+============================
+
+These are segment of kernel provided user code reachable from user space
+at a fixed address in kernel memory.  This is used to provide user space
+with some operations which require kernel help because of unimplemented
+native feature and/or instructions in many ARM CPUs. The idea is for this
+code to be executed directly in user mode for best efficiency but which is
+too intimate with the kernel counter part to be left to user libraries.
+In fact this code might even differ from one CPU to another depending on
+the available instruction set, or whether it is a SMP systems. In other
+words, the kernel reserves the right to change this code as needed without
+warning. Only the entry points and their results as documented here are
+guaranteed to be stable.
+
+This is different from (but doesn't preclude) a full blown VDSO
+implementation, however a VDSO would prevent some assembly tricks with
+constants that allows for efficient branching to those code segments. And
+since those code segments only use a few cycles before returning to user
+code, the overhead of a VDSO indirect far call would add a measurable
+overhead to such minimalistic operations.
+
+User space is expected to bypass those helpers and implement those things
+inline (either in the code emitted directly by the compiler, or part of
+the implementation of a library call) when optimizing for a recent enough
+processor that has the necessary native support, but only if resulting
+binaries are already to be incompatible with earlier ARM processors due to
+useage of similar native instructions for other things.  In other words
+don't make binaries unable to run on earlier processors just for the sake
+of not using these kernel helpers if your compiled code is not going to
+use new instructions for other purpose.
+
+New helpers may be added over time, so an older kernel may be missing some
+helpers present in a newer kernel.  For this reason, programs must check
+the value of __kuser_helper_version (see below) before assuming that it is
+safe to call any particular helper.  This check should ideally be
+performed only once at process startup time, and execution aborted early
+if the required helpers are not provided by the kernel version that
+process is running on.
+
+kuser_helper_version
+--------------------
+
+Location:	0xffff0ffc
+
+Reference declaration:
+
+  extern int32_t __kuser_helper_version;
+
+Definition:
+
+  This field contains the number of helpers being implemented by the
+  running kernel.  User space may read this to determine the availability
+  of a particular helper.
+
+Usage example:
+
+#define __kuser_helper_version (*(int32_t *)0xffff0ffc)
+
+void check_kuser_version(void)
+{
+	if (__kuser_helper_version < 2) {
+		fprintf(stderr, "can't do atomic operations, kernel too old\n");
+		abort();
+	}
+}
+
+Notes:
+
+  User space may assume that the value of this field never changes
+  during the lifetime of any single process.  This means that this
+  field can be read once during the initialisation of a library or
+  startup phase of a program.
+
+kuser_get_tls
+-------------
+
+Location:	0xffff0fe0
+
+Reference prototype:
+
+  void * __kuser_get_tls(void);
+
+Input:
+
+  lr = return address
+
+Output:
+
+  r0 = TLS value
+
+Clobbered registers:
+
+  none
+
+Definition:
+
+  Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
+
+Usage example:
+
+typedef void * (__kuser_get_tls_t)(void);
+#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
+
+void foo()
+{
+	void *tls = __kuser_get_tls();
+	printf("TLS = %p\n", tls);
+}
+
+Notes:
+
+  - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).
+
+kuser_cmpxchg
+-------------
+
+Location:	0xffff0fc0
+
+Reference prototype:
+
+  int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
+
+Input:
+
+  r0 = oldval
+  r1 = newval
+  r2 = ptr
+  lr = return address
+
+Output:
+
+  r0 = success code (zero or non-zero)
+  C flag = set if r0 == 0, clear if r0 != 0
+
+Clobbered registers:
+
+  r3, ip, flags
+
+Definition:
+
+  Atomically store newval in *ptr only if *ptr is equal to oldval.
+  Return zero if *ptr was changed or non-zero if no exchange happened.
+  The C flag is also set if *ptr was changed to allow for assembly
+  optimization in the calling code.
+
+Usage example:
+
+typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
+#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
+
+int atomic_add(volatile int *ptr, int val)
+{
+	int old, new;
+
+	do {
+		old = *ptr;
+		new = old + val;
+	} while(__kuser_cmpxchg(old, new, ptr));
+
+	return new;
+}
+
+Notes:
+
+  - This routine already includes memory barriers as needed.
+
+  - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).
+
+kuser_memory_barrier
+--------------------
+
+Location:	0xffff0fa0
+
+Reference prototype:
+
+  void __kuser_memory_barrier(void);
+
+Input:
+
+  lr = return address
+
+Output:
+
+  none
+
+Clobbered registers:
+
+  none
+
+Definition:
+
+  Apply any needed memory barrier to preserve consistency with data modified
+  manually and __kuser_cmpxchg usage.
+
+Usage example:
+
+typedef void (__kuser_dmb_t)(void);
+#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
+
+Notes:
+
+  - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).
+
+kuser_cmpxchg64
+---------------
+
+Location:	0xffff0f60
+
+Reference prototype:
+
+  int __kuser_cmpxchg64(const int64_t *oldval,
+                        const int64_t *newval,
+                        volatile int64_t *ptr);
+
+Input:
+
+  r0 = pointer to oldval
+  r1 = pointer to newval
+  r2 = pointer to target value
+  lr = return address
+
+Output:
+
+  r0 = success code (zero or non-zero)
+  C flag = set if r0 == 0, clear if r0 != 0
+
+Clobbered registers:
+
+  r3, lr, flags
+
+Definition:
+
+  Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr
+  is equal to the 64-bit value pointed by *oldval.  Return zero if *ptr was
+  changed or non-zero if no exchange happened.
+
+  The C flag is also set if *ptr was changed to allow for assembly
+  optimization in the calling code.
+
+Usage example:
+
+typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
+                                  const int64_t *newval,
+                                  volatile int64_t *ptr);
+#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
+
+int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
+{
+	int64_t old, new;
+
+	do {
+		old = *ptr;
+		new = old + val;
+	} while(__kuser_cmpxchg64(&old, &new, ptr));
+
+	return new;
+}
+
+Notes:
+
+  - This routine already includes memory barriers as needed.
+
+  - Due to the length of this sequence, this spans 2 conventional kuser
+    "slots", therefore 0xffff0f80 is not used as a valid entry point.
+
+  - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).

+ 21 - 0
Documentation/devicetree/bindings/arm/pmu.txt

@@ -0,0 +1,21 @@
+* ARM Performance Monitor Units
+
+ARM cores often have a PMU for counting cpu and cache events like cache misses
+and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU
+representation in the device tree should be done as under:-
+
+Required properties:
+
+- compatible : should be one of
+	"arm,cortex-a9-pmu"
+	"arm,cortex-a8-pmu"
+	"arm,arm1176-pmu"
+	"arm,arm1136-pmu"
+- interrupts : 1 combined interrupt or 1 per core.
+
+Example:
+
+pmu {
+        compatible = "arm,cortex-a9-pmu";
+        interrupts = <100 101>;
+};

+ 29 - 10
arch/arm/Kconfig

@@ -10,7 +10,7 @@ config ARM
 	select GENERIC_ATOMIC64 if (CPU_V6 || !CPU_32v6K || !AEABI)
 	select GENERIC_ATOMIC64 if (CPU_V6 || !CPU_32v6K || !AEABI)
 	select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
 	select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_KGDB
-	select HAVE_KPROBES if (!XIP_KERNEL && !THUMB2_KERNEL)
+	select HAVE_KPROBES if !XIP_KERNEL
 	select HAVE_KRETPROBES if (HAVE_KPROBES)
 	select HAVE_KRETPROBES if (HAVE_KPROBES)
 	select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
 	select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
 	select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
 	select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
@@ -37,6 +37,9 @@ config ARM
 	  Europe.  There is an ARM Linux project with a web page at
 	  Europe.  There is an ARM Linux project with a web page at
 	  <http://www.arm.linux.org.uk/>.
 	  <http://www.arm.linux.org.uk/>.
 
 
+config ARM_HAS_SG_CHAIN
+	bool
+
 config HAVE_PWM
 config HAVE_PWM
 	bool
 	bool
 
 
@@ -1347,7 +1350,6 @@ config SMP_ON_UP
 
 
 config HAVE_ARM_SCU
 config HAVE_ARM_SCU
 	bool
 	bool
-	depends on SMP
 	help
 	help
 	  This option enables support for the ARM system coherency unit
 	  This option enables support for the ARM system coherency unit
 
 
@@ -1716,17 +1718,34 @@ config ZBOOT_ROM
 	  Say Y here if you intend to execute your compressed kernel image
 	  Say Y here if you intend to execute your compressed kernel image
 	  (zImage) directly from ROM or flash.  If unsure, say N.
 	  (zImage) directly from ROM or flash.  If unsure, say N.
 
 
+choice
+	prompt "Include SD/MMC loader in zImage (EXPERIMENTAL)"
+	depends on ZBOOT_ROM && ARCH_SH7372 && EXPERIMENTAL
+	default ZBOOT_ROM_NONE
+	help
+	  Include experimental SD/MMC loading code in the ROM-able zImage.
+	  With this enabled it is possible to write the the ROM-able zImage
+	  kernel image to an MMC or SD card and boot the kernel straight
+	  from the reset vector. At reset the processor Mask ROM will load
+	  the first part of the the ROM-able zImage which in turn loads the
+	  rest the kernel image to RAM.
+
+config ZBOOT_ROM_NONE
+	bool "No SD/MMC loader in zImage (EXPERIMENTAL)"
+	help
+	  Do not load image from SD or MMC
+
 config ZBOOT_ROM_MMCIF
 config ZBOOT_ROM_MMCIF
 	bool "Include MMCIF loader in zImage (EXPERIMENTAL)"
 	bool "Include MMCIF loader in zImage (EXPERIMENTAL)"
-	depends on ZBOOT_ROM && ARCH_SH7372 && EXPERIMENTAL
 	help
 	help
-	  Say Y here to include experimental MMCIF loading code in the
-	  ROM-able zImage. With this enabled it is possible to write the
-	  the ROM-able zImage kernel image to an MMC card and boot the
-	  kernel straight from the reset vector. At reset the processor
-	  Mask ROM will load the first part of the the ROM-able zImage
-	  which in turn loads the rest the kernel image to RAM using the
-	  MMCIF hardware block.
+	  Load image from MMCIF hardware block.
+
+config ZBOOT_ROM_SH_MOBILE_SDHI
+	bool "Include SuperH Mobile SDHI loader in zImage (EXPERIMENTAL)"
+	help
+	  Load image from SDHI hardware block
+
+endchoice
 
 
 config CMDLINE
 config CMDLINE
 	string "Default kernel command string"
 	string "Default kernel command string"

+ 8 - 2
arch/arm/boot/compressed/Makefile

@@ -6,13 +6,19 @@
 
 
 OBJS		=
 OBJS		=
 
 
-# Ensure that mmcif loader code appears early in the image
+# Ensure that MMCIF loader code appears early in the image
 # to minimise that number of bocks that have to be read in
 # to minimise that number of bocks that have to be read in
 # order to load it.
 # order to load it.
 ifeq ($(CONFIG_ZBOOT_ROM_MMCIF),y)
 ifeq ($(CONFIG_ZBOOT_ROM_MMCIF),y)
-ifeq ($(CONFIG_ARCH_SH7372),y)
 OBJS		+= mmcif-sh7372.o
 OBJS		+= mmcif-sh7372.o
 endif
 endif
+
+# Ensure that SDHI loader code appears early in the image
+# to minimise that number of bocks that have to be read in
+# order to load it.
+ifeq ($(CONFIG_ZBOOT_ROM_SH_MOBILE_SDHI),y)
+OBJS		+= sdhi-shmobile.o
+OBJS		+= sdhi-sh7372.o
 endif
 endif
 
 
 AFLAGS_head.o += -DTEXT_OFFSET=$(TEXT_OFFSET)
 AFLAGS_head.o += -DTEXT_OFFSET=$(TEXT_OFFSET)

+ 6 - 6
arch/arm/boot/compressed/head-shmobile.S

@@ -25,14 +25,14 @@
 	/* load board-specific initialization code */
 	/* load board-specific initialization code */
 #include <mach/zboot.h>
 #include <mach/zboot.h>
 
 
-#ifdef CONFIG_ZBOOT_ROM_MMCIF
-	/* Load image from MMC */
-	adr	sp, __tmp_stack + 128
+#if defined(CONFIG_ZBOOT_ROM_MMCIF) || defined(CONFIG_ZBOOT_ROM_SH_MOBILE_SDHI)
+	/* Load image from MMC/SD */
+	adr	sp, __tmp_stack + 256
 	ldr	r0, __image_start
 	ldr	r0, __image_start
 	ldr	r1, __image_end
 	ldr	r1, __image_end
 	subs	r1, r1, r0
 	subs	r1, r1, r0
 	ldr	r0, __load_base
 	ldr	r0, __load_base
-	bl	mmcif_loader
+	bl	mmc_loader
 
 
 	/* Jump to loaded code */
 	/* Jump to loaded code */
 	ldr	r0, __loaded
 	ldr	r0, __loaded
@@ -51,9 +51,9 @@ __loaded:
 	.long	__continue
 	.long	__continue
 	.align
 	.align
 __tmp_stack:
 __tmp_stack:
-	.space	128
+	.space	256
 __continue:
 __continue:
-#endif /* CONFIG_ZBOOT_ROM_MMCIF */
+#endif /* CONFIG_ZBOOT_ROM_MMC || CONFIG_ZBOOT_ROM_SH_MOBILE_SDHI */
 
 
 	b	1f
 	b	1f
 __atags:@ tag #1
 __atags:@ tag #1

+ 2 - 1
arch/arm/boot/compressed/head.S

@@ -353,7 +353,8 @@ not_relocated:	mov	r0, #0
 		mov	r0, #0			@ must be zero
 		mov	r0, #0			@ must be zero
 		mov	r1, r7			@ restore architecture number
 		mov	r1, r7			@ restore architecture number
 		mov	r2, r8			@ restore atags pointer
 		mov	r2, r8			@ restore atags pointer
-		mov	pc, r4			@ call kernel
+ ARM(		mov	pc, r4	)		@ call kernel
+ THUMB(		bx	r4	)		@ entry point is always ARM
 
 
 		.align	2
 		.align	2
 		.type	LC0, #object
 		.type	LC0, #object

+ 1 - 1
arch/arm/boot/compressed/mmcif-sh7372.c

@@ -40,7 +40,7 @@
  * to an MMC card
  * to an MMC card
  * # dd if=vrl4.out of=/dev/sdx bs=512 seek=1
  * # dd if=vrl4.out of=/dev/sdx bs=512 seek=1
  */
  */
-asmlinkage void mmcif_loader(unsigned char *buf, unsigned long len)
+asmlinkage void mmc_loader(unsigned char *buf, unsigned long len)
 {
 {
 	mmc_init_progress();
 	mmc_init_progress();
 	mmc_update_progress(MMC_PROGRESS_ENTER);
 	mmc_update_progress(MMC_PROGRESS_ENTER);

+ 95 - 0
arch/arm/boot/compressed/sdhi-sh7372.c

@@ -0,0 +1,95 @@
+/*
+ * SuperH Mobile SDHI
+ *
+ * Copyright (C) 2010 Magnus Damm
+ * Copyright (C) 2010 Kuninori Morimoto
+ * Copyright (C) 2010 Simon Horman
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Parts inspired by u-boot
+ */
+
+#include <linux/io.h>
+#include <mach/mmc.h>
+#include <linux/mmc/boot.h>
+#include <linux/mmc/tmio.h>
+
+#include "sdhi-shmobile.h"
+
+#define PORT179CR       0xe60520b3
+#define PORT180CR       0xe60520b4
+#define PORT181CR       0xe60520b5
+#define PORT182CR       0xe60520b6
+#define PORT183CR       0xe60520b7
+#define PORT184CR       0xe60520b8
+
+#define SMSTPCR3        0xe615013c
+
+#define CR_INPUT_ENABLE 0x10
+#define CR_FUNCTION1    0x01
+
+#define SDHI1_BASE	(void __iomem *)0xe6860000
+#define SDHI_BASE	SDHI1_BASE
+
+/*  SuperH Mobile SDHI loader
+ *
+ * loads the zImage from an SD card starting from block 0
+ * on physical partition 1
+ *
+ * The image must be start with a vrl4 header and
+ * the zImage must start at offset 512 of the image. That is,
+ * at block 1 (=byte 512) of physical partition 1
+ *
+ * Use the following line to write the vrl4 formated zImage
+ * to an SD card
+ * # dd if=vrl4.out of=/dev/sdx bs=512
+ */
+asmlinkage void mmc_loader(unsigned short *buf, unsigned long len)
+{
+	int high_capacity;
+
+	mmc_init_progress();
+
+	mmc_update_progress(MMC_PROGRESS_ENTER);
+        /* Initialise SDHI1 */
+        /* PORT184CR: GPIO_FN_SDHICMD1 Control */
+        __raw_writeb(CR_FUNCTION1, PORT184CR);
+        /* PORT179CR: GPIO_FN_SDHICLK1 Control */
+        __raw_writeb(CR_INPUT_ENABLE|CR_FUNCTION1, PORT179CR);
+        /* PORT181CR: GPIO_FN_SDHID1_3 Control */
+        __raw_writeb(CR_FUNCTION1, PORT183CR);
+        /* PORT182CR: GPIO_FN_SDHID1_2 Control */
+        __raw_writeb(CR_FUNCTION1, PORT182CR);
+        /* PORT183CR: GPIO_FN_SDHID1_1 Control */
+        __raw_writeb(CR_FUNCTION1, PORT181CR);
+        /* PORT180CR: GPIO_FN_SDHID1_0 Control */
+        __raw_writeb(CR_FUNCTION1, PORT180CR);
+
+        /* Enable clock to SDHI1 hardware block */
+        __raw_writel(__raw_readl(SMSTPCR3) & ~(1 << 13), SMSTPCR3);
+
+	/* setup SDHI hardware */
+	mmc_update_progress(MMC_PROGRESS_INIT);
+	high_capacity = sdhi_boot_init(SDHI_BASE);
+	if (high_capacity < 0)
+		goto err;
+
+	mmc_update_progress(MMC_PROGRESS_LOAD);
+	/* load kernel */
+	if (sdhi_boot_do_read(SDHI_BASE, high_capacity,
+			      0, /* Kernel is at block 1 */
+			      (len + TMIO_BBS - 1) / TMIO_BBS, buf))
+		goto err;
+
+        /* Disable clock to SDHI1 hardware block */
+        __raw_writel(__raw_readl(SMSTPCR3) & (1 << 13), SMSTPCR3);
+
+	mmc_update_progress(MMC_PROGRESS_DONE);
+
+	return;
+err:
+	for(;;);
+}

+ 449 - 0
arch/arm/boot/compressed/sdhi-shmobile.c

@@ -0,0 +1,449 @@
+/*
+ * SuperH Mobile SDHI
+ *
+ * Copyright (C) 2010 Magnus Damm
+ * Copyright (C) 2010 Kuninori Morimoto
+ * Copyright (C) 2010 Simon Horman
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Parts inspired by u-boot
+ */
+
+#include <linux/io.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/core.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/sd.h>
+#include <linux/mmc/tmio.h>
+#include <mach/sdhi.h>
+
+#define OCR_FASTBOOT		(1<<29)
+#define OCR_HCS			(1<<30)
+#define OCR_BUSY		(1<<31)
+
+#define RESP_CMD12		0x00000030
+
+static inline u16 sd_ctrl_read16(void __iomem *base, int addr)
+{
+        return __raw_readw(base + addr);
+}
+
+static inline u32 sd_ctrl_read32(void __iomem *base, int addr)
+{
+	return __raw_readw(base + addr) |
+	       __raw_readw(base + addr + 2) << 16;
+}
+
+static inline void sd_ctrl_write16(void __iomem *base, int addr, u16 val)
+{
+	__raw_writew(val, base + addr);
+}
+
+static inline void sd_ctrl_write32(void __iomem *base, int addr, u32 val)
+{
+	__raw_writew(val, base + addr);
+	__raw_writew(val >> 16, base + addr + 2);
+}
+
+#define ALL_ERROR (TMIO_STAT_CMD_IDX_ERR | TMIO_STAT_CRCFAIL |		\
+		   TMIO_STAT_STOPBIT_ERR | TMIO_STAT_DATATIMEOUT |	\
+		   TMIO_STAT_RXOVERFLOW | TMIO_STAT_TXUNDERRUN |	\
+		   TMIO_STAT_CMDTIMEOUT | TMIO_STAT_ILL_ACCESS |	\
+		   TMIO_STAT_ILL_FUNC)
+
+static int sdhi_intr(void __iomem *base)
+{
+	unsigned long state = sd_ctrl_read32(base, CTL_STATUS);
+
+	if (state & ALL_ERROR) {
+		sd_ctrl_write32(base, CTL_STATUS, ~ALL_ERROR);
+		sd_ctrl_write32(base, CTL_IRQ_MASK,
+				ALL_ERROR |
+				sd_ctrl_read32(base, CTL_IRQ_MASK));
+		return -EINVAL;
+	}
+	if (state & TMIO_STAT_CMDRESPEND) {
+		sd_ctrl_write32(base, CTL_STATUS, ~TMIO_STAT_CMDRESPEND);
+		sd_ctrl_write32(base, CTL_IRQ_MASK,
+				TMIO_STAT_CMDRESPEND |
+				sd_ctrl_read32(base, CTL_IRQ_MASK));
+		return 0;
+	}
+	if (state & TMIO_STAT_RXRDY) {
+		sd_ctrl_write32(base, CTL_STATUS, ~TMIO_STAT_RXRDY);
+		sd_ctrl_write32(base, CTL_IRQ_MASK,
+				TMIO_STAT_RXRDY | TMIO_STAT_TXUNDERRUN |
+				sd_ctrl_read32(base, CTL_IRQ_MASK));
+		return 0;
+	}
+	if (state & TMIO_STAT_DATAEND) {
+		sd_ctrl_write32(base, CTL_STATUS, ~TMIO_STAT_DATAEND);
+		sd_ctrl_write32(base, CTL_IRQ_MASK,
+				TMIO_STAT_DATAEND |
+				sd_ctrl_read32(base, CTL_IRQ_MASK));
+		return 0;
+	}
+
+	return -EAGAIN;
+}
+
+static int sdhi_boot_wait_resp_end(void __iomem *base)
+{
+	int err = -EAGAIN, timeout = 10000000;
+
+	while (timeout--) {
+		err = sdhi_intr(base);
+		if (err != -EAGAIN)
+			break;
+		udelay(1);
+	}
+
+	return err;
+}
+
+/* SDHI_CLK_CTRL */
+#define CLK_MMC_ENABLE                 (1 << 8)
+#define CLK_MMC_INIT                   (1 << 6)        /* clk / 256 */
+
+static void sdhi_boot_mmc_clk_stop(void __iomem *base)
+{
+	sd_ctrl_write16(base, CTL_CLK_AND_WAIT_CTL, 0x0000);
+	msleep(10);
+	sd_ctrl_write16(base, CTL_SD_CARD_CLK_CTL, ~CLK_MMC_ENABLE &
+		sd_ctrl_read16(base, CTL_SD_CARD_CLK_CTL));
+	msleep(10);
+}
+
+static void sdhi_boot_mmc_clk_start(void __iomem *base)
+{
+	sd_ctrl_write16(base, CTL_SD_CARD_CLK_CTL, CLK_MMC_ENABLE |
+		sd_ctrl_read16(base, CTL_SD_CARD_CLK_CTL));
+	msleep(10);
+	sd_ctrl_write16(base, CTL_CLK_AND_WAIT_CTL, CLK_MMC_ENABLE);
+	msleep(10);
+}
+
+static void sdhi_boot_reset(void __iomem *base)
+{
+	sd_ctrl_write16(base, CTL_RESET_SD, 0x0000);
+	msleep(10);
+	sd_ctrl_write16(base, CTL_RESET_SD, 0x0001);
+	msleep(10);
+}
+
+/* Set MMC clock / power.
+ * Note: This controller uses a simple divider scheme therefore it cannot
+ * run a MMC card at full speed (20MHz). The max clock is 24MHz on SD, but as
+ * MMC wont run that fast, it has to be clocked at 12MHz which is the next
+ * slowest setting.
+ */
+static int sdhi_boot_mmc_set_ios(void __iomem *base, struct mmc_ios *ios)
+{
+	if (sd_ctrl_read32(base, CTL_STATUS) & TMIO_STAT_CMD_BUSY)
+		return -EBUSY;
+
+	if (ios->clock)
+		sd_ctrl_write16(base, CTL_SD_CARD_CLK_CTL,
+				ios->clock | CLK_MMC_ENABLE);
+
+	/* Power sequence - OFF -> ON -> UP */
+	switch (ios->power_mode) {
+	case MMC_POWER_OFF: /* power down SD bus */
+		sdhi_boot_mmc_clk_stop(base);
+		break;
+	case MMC_POWER_ON: /* power up SD bus */
+		break;
+	case MMC_POWER_UP: /* start bus clock */
+		sdhi_boot_mmc_clk_start(base);
+		break;
+	}
+
+	switch (ios->bus_width) {
+	case MMC_BUS_WIDTH_1:
+		sd_ctrl_write16(base, CTL_SD_MEM_CARD_OPT, 0x80e0);
+	break;
+	case MMC_BUS_WIDTH_4:
+		sd_ctrl_write16(base, CTL_SD_MEM_CARD_OPT, 0x00e0);
+	break;
+	}
+
+	/* Let things settle. delay taken from winCE driver */
+	udelay(140);
+
+	return 0;
+}
+
+/* These are the bitmasks the tmio chip requires to implement the MMC response
+ * types. Note that R1 and R6 are the same in this scheme. */
+#define RESP_NONE      0x0300
+#define RESP_R1        0x0400
+#define RESP_R1B       0x0500
+#define RESP_R2        0x0600
+#define RESP_R3        0x0700
+#define DATA_PRESENT   0x0800
+#define TRANSFER_READ  0x1000
+
+static int sdhi_boot_request(void __iomem *base, struct mmc_command *cmd)
+{
+	int err, c = cmd->opcode;
+
+	switch (mmc_resp_type(cmd)) {
+	case MMC_RSP_NONE: c |= RESP_NONE; break;
+	case MMC_RSP_R1:   c |= RESP_R1;   break;
+	case MMC_RSP_R1B:  c |= RESP_R1B;  break;
+	case MMC_RSP_R2:   c |= RESP_R2;   break;
+	case MMC_RSP_R3:   c |= RESP_R3;   break;
+	default:
+		return -EINVAL;
+	}
+
+	/* No interrupts so this may not be cleared */
+	sd_ctrl_write32(base, CTL_STATUS, ~TMIO_STAT_CMDRESPEND);
+
+	sd_ctrl_write32(base, CTL_IRQ_MASK, TMIO_STAT_CMDRESPEND |
+			sd_ctrl_read32(base, CTL_IRQ_MASK));
+	sd_ctrl_write32(base, CTL_ARG_REG, cmd->arg);
+	sd_ctrl_write16(base, CTL_SD_CMD, c);
+
+
+	sd_ctrl_write32(base, CTL_IRQ_MASK,
+			~(TMIO_STAT_CMDRESPEND | ALL_ERROR) &
+			sd_ctrl_read32(base, CTL_IRQ_MASK));
+
+	err = sdhi_boot_wait_resp_end(base);
+	if (err)
+		return err;
+
+	cmd->resp[0] = sd_ctrl_read32(base, CTL_RESPONSE);
+
+	return 0;
+}
+
+static int sdhi_boot_do_read_single(void __iomem *base, int high_capacity,
+				    unsigned long block, unsigned short *buf)
+{
+	int err, i;
+
+	/* CMD17 - Read */
+	{
+		struct mmc_command cmd;
+
+		cmd.opcode = MMC_READ_SINGLE_BLOCK | \
+			     TRANSFER_READ | DATA_PRESENT;
+		if (high_capacity)
+			cmd.arg = block;
+		else
+			cmd.arg = block * TMIO_BBS;
+		cmd.flags = MMC_RSP_R1;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+	}
+
+	sd_ctrl_write32(base, CTL_IRQ_MASK,
+			~(TMIO_STAT_DATAEND | TMIO_STAT_RXRDY |
+			  TMIO_STAT_TXUNDERRUN) &
+			sd_ctrl_read32(base, CTL_IRQ_MASK));
+	err = sdhi_boot_wait_resp_end(base);
+	if (err)
+		return err;
+
+	sd_ctrl_write16(base, CTL_SD_XFER_LEN, TMIO_BBS);
+	for (i = 0; i < TMIO_BBS / sizeof(*buf); i++)
+		*buf++ = sd_ctrl_read16(base, RESP_CMD12);
+
+	err = sdhi_boot_wait_resp_end(base);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int sdhi_boot_do_read(void __iomem *base, int high_capacity,
+		      unsigned long offset, unsigned short count,
+		      unsigned short *buf)
+{
+	unsigned long i;
+	int err = 0;
+
+	for (i = 0; i < count; i++) {
+		err = sdhi_boot_do_read_single(base, high_capacity, offset + i,
+					       buf + (i * TMIO_BBS /
+						      sizeof(*buf)));
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+#define VOLTAGES (MMC_VDD_32_33 | MMC_VDD_33_34)
+
+int sdhi_boot_init(void __iomem *base)
+{
+	bool sd_v2 = false, sd_v1_0 = false;
+	unsigned short cid;
+	int err, high_capacity = 0;
+
+	sdhi_boot_mmc_clk_stop(base);
+	sdhi_boot_reset(base);
+
+	/* mmc0: clock 400000Hz busmode 1 powermode 2 cs 0 Vdd 21 width 0 timing 0 */
+	{
+		struct mmc_ios ios;
+		ios.power_mode = MMC_POWER_ON;
+		ios.bus_width = MMC_BUS_WIDTH_1;
+		ios.clock = CLK_MMC_INIT;
+		err = sdhi_boot_mmc_set_ios(base, &ios);
+		if (err)
+			return err;
+	}
+
+	/* CMD0 */
+	{
+		struct mmc_command cmd;
+		msleep(1);
+		cmd.opcode = MMC_GO_IDLE_STATE;
+		cmd.arg = 0;
+		cmd.flags = MMC_RSP_NONE;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+		msleep(2);
+	}
+
+	/* CMD8 - Test for SD version 2 */
+	{
+		struct mmc_command cmd;
+		cmd.opcode = SD_SEND_IF_COND;
+		cmd.arg = (VOLTAGES != 0) << 8 | 0xaa;
+		cmd.flags = MMC_RSP_R1;
+		err = sdhi_boot_request(base, &cmd); /* Ignore error */
+		if ((cmd.resp[0] & 0xff) == 0xaa)
+			sd_v2 = true;
+	}
+
+	/* CMD55 - Get OCR (SD) */
+	{
+		int timeout = 1000;
+		struct mmc_command cmd;
+
+		cmd.arg = 0;
+
+		do {
+			cmd.opcode = MMC_APP_CMD;
+			cmd.flags = MMC_RSP_R1;
+			cmd.arg = 0;
+			err = sdhi_boot_request(base, &cmd);
+			if (err)
+				break;
+
+			cmd.opcode = SD_APP_OP_COND;
+			cmd.flags = MMC_RSP_R3;
+			cmd.arg = (VOLTAGES & 0xff8000);
+			if (sd_v2)
+				cmd.arg |= OCR_HCS;
+			cmd.arg |= OCR_FASTBOOT;
+			err = sdhi_boot_request(base, &cmd);
+			if (err)
+				break;
+
+			msleep(1);
+		} while((!(cmd.resp[0] & OCR_BUSY)) && --timeout);
+
+		if (!err && timeout) {
+			if (!sd_v2)
+				sd_v1_0 = true;
+			high_capacity = (cmd.resp[0] & OCR_HCS) == OCR_HCS;
+		}
+	}
+
+	/* CMD1 - Get OCR (MMC) */
+	if (!sd_v2 && !sd_v1_0) {
+		int timeout = 1000;
+		struct mmc_command cmd;
+
+		do {
+			cmd.opcode = MMC_SEND_OP_COND;
+			cmd.arg = VOLTAGES | OCR_HCS;
+			cmd.flags = MMC_RSP_R3;
+			err = sdhi_boot_request(base, &cmd);
+			if (err)
+				return err;
+
+			msleep(1);
+		} while((!(cmd.resp[0] & OCR_BUSY)) && --timeout);
+
+		if (!timeout)
+			return -EAGAIN;
+
+		high_capacity = (cmd.resp[0] & OCR_HCS) == OCR_HCS;
+	}
+
+	/* CMD2 - Get CID */
+	{
+		struct mmc_command cmd;
+		cmd.opcode = MMC_ALL_SEND_CID;
+		cmd.arg = 0;
+		cmd.flags = MMC_RSP_R2;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+	}
+
+	/* CMD3
+	 * MMC: Set the relative address
+	 * SD:  Get the relative address
+	 * Also puts the card into the standby state
+	 */
+	{
+		struct mmc_command cmd;
+		cmd.opcode = MMC_SET_RELATIVE_ADDR;
+		cmd.arg = 0;
+		cmd.flags = MMC_RSP_R1;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+		cid = cmd.resp[0] >> 16;
+	}
+
+	/* CMD9 - Get CSD */
+	{
+		struct mmc_command cmd;
+		cmd.opcode = MMC_SEND_CSD;
+		cmd.arg = cid << 16;
+		cmd.flags = MMC_RSP_R2;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+	}
+
+	/* CMD7 - Select the card */
+	{
+		struct mmc_command cmd;
+		cmd.opcode = MMC_SELECT_CARD;
+		//cmd.arg = rca << 16;
+		cmd.arg = cid << 16;
+		//cmd.flags = MMC_RSP_R1B;
+		cmd.flags = MMC_RSP_R1;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+	}
+
+	/* CMD16 - Set the block size */
+	{
+		struct mmc_command cmd;
+		cmd.opcode = MMC_SET_BLOCKLEN;
+		cmd.arg = TMIO_BBS;
+		cmd.flags = MMC_RSP_R1;
+		err = sdhi_boot_request(base, &cmd);
+		if (err)
+			return err;
+	}
+
+	return high_capacity;
+}

+ 11 - 0
arch/arm/boot/compressed/sdhi-shmobile.h

@@ -0,0 +1,11 @@
+#ifndef SDHI_MOBILE_H
+#define SDHI_MOBILE_H
+
+#include <linux/compiler.h>
+
+int sdhi_boot_do_read(void __iomem *base, int high_capacity,
+		      unsigned long offset, unsigned short count,
+		      unsigned short *buf);
+int sdhi_boot_init(void __iomem *base);
+
+#endif

+ 8 - 4
arch/arm/boot/compressed/vmlinux.lds.in

@@ -33,20 +33,24 @@ SECTIONS
     *(.text.*)
     *(.text.*)
     *(.fixup)
     *(.fixup)
     *(.gnu.warning)
     *(.gnu.warning)
+    *(.glue_7t)
+    *(.glue_7)
+  }
+  .rodata : {
     *(.rodata)
     *(.rodata)
     *(.rodata.*)
     *(.rodata.*)
-    *(.glue_7)
-    *(.glue_7t)
+  }
+  .piggydata : {
     *(.piggydata)
     *(.piggydata)
-    . = ALIGN(4);
   }
   }
 
 
+  . = ALIGN(4);
   _etext = .;
   _etext = .;
 
 
+  .got.plt		: { *(.got.plt) }
   _got_start = .;
   _got_start = .;
   .got			: { *(.got) }
   .got			: { *(.got) }
   _got_end = .;
   _got_end = .;
-  .got.plt		: { *(.got.plt) }
   _edata = .;
   _edata = .;
 
 
   . = BSS_START;
   . = BSS_START;

+ 83 - 110
arch/arm/common/dmabounce.c

@@ -79,6 +79,8 @@ struct dmabounce_device_info {
 	struct dmabounce_pool	large;
 	struct dmabounce_pool	large;
 
 
 	rwlock_t lock;
 	rwlock_t lock;
+
+	int (*needs_bounce)(struct device *, dma_addr_t, size_t);
 };
 };
 
 
 #ifdef STATS
 #ifdef STATS
@@ -210,114 +212,91 @@ static struct safe_buffer *find_safe_buffer_dev(struct device *dev,
 	if (!dev || !dev->archdata.dmabounce)
 	if (!dev || !dev->archdata.dmabounce)
 		return NULL;
 		return NULL;
 	if (dma_mapping_error(dev, dma_addr)) {
 	if (dma_mapping_error(dev, dma_addr)) {
-		if (dev)
-			dev_err(dev, "Trying to %s invalid mapping\n", where);
-		else
-			pr_err("unknown device: Trying to %s invalid mapping\n", where);
+		dev_err(dev, "Trying to %s invalid mapping\n", where);
 		return NULL;
 		return NULL;
 	}
 	}
 	return find_safe_buffer(dev->archdata.dmabounce, dma_addr);
 	return find_safe_buffer(dev->archdata.dmabounce, dma_addr);
 }
 }
 
 
-static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size,
-		enum dma_data_direction dir)
+static int needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size)
 {
 {
-	struct dmabounce_device_info *device_info = dev->archdata.dmabounce;
-	dma_addr_t dma_addr;
-	int needs_bounce = 0;
-
-	if (device_info)
-		DO_STATS ( device_info->map_op_count++ );
-
-	dma_addr = virt_to_dma(dev, ptr);
+	if (!dev || !dev->archdata.dmabounce)
+		return 0;
 
 
 	if (dev->dma_mask) {
 	if (dev->dma_mask) {
-		unsigned long mask = *dev->dma_mask;
-		unsigned long limit;
+		unsigned long limit, mask = *dev->dma_mask;
 
 
 		limit = (mask + 1) & ~mask;
 		limit = (mask + 1) & ~mask;
 		if (limit && size > limit) {
 		if (limit && size > limit) {
 			dev_err(dev, "DMA mapping too big (requested %#x "
 			dev_err(dev, "DMA mapping too big (requested %#x "
 				"mask %#Lx)\n", size, *dev->dma_mask);
 				"mask %#Lx)\n", size, *dev->dma_mask);
-			return ~0;
+			return -E2BIG;
 		}
 		}
 
 
-		/*
-		 * Figure out if we need to bounce from the DMA mask.
-		 */
-		needs_bounce = (dma_addr | (dma_addr + size - 1)) & ~mask;
+		/* Figure out if we need to bounce from the DMA mask. */
+		if ((dma_addr | (dma_addr + size - 1)) & ~mask)
+			return 1;
 	}
 	}
 
 
-	if (device_info && (needs_bounce || dma_needs_bounce(dev, dma_addr, size))) {
-		struct safe_buffer *buf;
+	return !!dev->archdata.dmabounce->needs_bounce(dev, dma_addr, size);
+}
 
 
-		buf = alloc_safe_buffer(device_info, ptr, size, dir);
-		if (buf == 0) {
-			dev_err(dev, "%s: unable to map unsafe buffer %p!\n",
-			       __func__, ptr);
-			return ~0;
-		}
+static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size,
+		enum dma_data_direction dir)
+{
+	struct dmabounce_device_info *device_info = dev->archdata.dmabounce;
+	struct safe_buffer *buf;
 
 
-		dev_dbg(dev,
-			"%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
-			__func__, buf->ptr, virt_to_dma(dev, buf->ptr),
-			buf->safe, buf->safe_dma_addr);
+	if (device_info)
+		DO_STATS ( device_info->map_op_count++ );
 
 
-		if ((dir == DMA_TO_DEVICE) ||
-		    (dir == DMA_BIDIRECTIONAL)) {
-			dev_dbg(dev, "%s: copy unsafe %p to safe %p, size %d\n",
-				__func__, ptr, buf->safe, size);
-			memcpy(buf->safe, ptr, size);
-		}
-		ptr = buf->safe;
+	buf = alloc_safe_buffer(device_info, ptr, size, dir);
+	if (buf == NULL) {
+		dev_err(dev, "%s: unable to map unsafe buffer %p!\n",
+		       __func__, ptr);
+		return ~0;
+	}
 
 
-		dma_addr = buf->safe_dma_addr;
-	} else {
-		/*
-		 * We don't need to sync the DMA buffer since
-		 * it was allocated via the coherent allocators.
-		 */
-		__dma_single_cpu_to_dev(ptr, size, dir);
+	dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
+		__func__, buf->ptr, virt_to_dma(dev, buf->ptr),
+		buf->safe, buf->safe_dma_addr);
+
+	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) {
+		dev_dbg(dev, "%s: copy unsafe %p to safe %p, size %d\n",
+			__func__, ptr, buf->safe, size);
+		memcpy(buf->safe, ptr, size);
 	}
 	}
 
 
-	return dma_addr;
+	return buf->safe_dma_addr;
 }
 }
 
 
-static inline void unmap_single(struct device *dev, dma_addr_t dma_addr,
+static inline void unmap_single(struct device *dev, struct safe_buffer *buf,
 		size_t size, enum dma_data_direction dir)
 		size_t size, enum dma_data_direction dir)
 {
 {
-	struct safe_buffer *buf = find_safe_buffer_dev(dev, dma_addr, "unmap");
-
-	if (buf) {
-		BUG_ON(buf->size != size);
-		BUG_ON(buf->direction != dir);
+	BUG_ON(buf->size != size);
+	BUG_ON(buf->direction != dir);
 
 
-		dev_dbg(dev,
-			"%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
-			__func__, buf->ptr, virt_to_dma(dev, buf->ptr),
-			buf->safe, buf->safe_dma_addr);
+	dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
+		__func__, buf->ptr, virt_to_dma(dev, buf->ptr),
+		buf->safe, buf->safe_dma_addr);
 
 
-		DO_STATS(dev->archdata.dmabounce->bounce_count++);
+	DO_STATS(dev->archdata.dmabounce->bounce_count++);
 
 
-		if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) {
-			void *ptr = buf->ptr;
+	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) {
+		void *ptr = buf->ptr;
 
 
-			dev_dbg(dev,
-				"%s: copy back safe %p to unsafe %p size %d\n",
-				__func__, buf->safe, ptr, size);
-			memcpy(ptr, buf->safe, size);
+		dev_dbg(dev, "%s: copy back safe %p to unsafe %p size %d\n",
+			__func__, buf->safe, ptr, size);
+		memcpy(ptr, buf->safe, size);
 
 
-			/*
-			 * Since we may have written to a page cache page,
-			 * we need to ensure that the data will be coherent
-			 * with user mappings.
-			 */
-			__cpuc_flush_dcache_area(ptr, size);
-		}
-		free_safe_buffer(dev->archdata.dmabounce, buf);
-	} else {
-		__dma_single_dev_to_cpu(dma_to_virt(dev, dma_addr), size, dir);
+		/*
+		 * Since we may have written to a page cache page,
+		 * we need to ensure that the data will be coherent
+		 * with user mappings.
+		 */
+		__cpuc_flush_dcache_area(ptr, size);
 	}
 	}
+	free_safe_buffer(dev->archdata.dmabounce, buf);
 }
 }
 
 
 /* ************************************************** */
 /* ************************************************** */
@@ -328,45 +307,28 @@ static inline void unmap_single(struct device *dev, dma_addr_t dma_addr,
  * substitute the safe buffer for the unsafe one.
  * substitute the safe buffer for the unsafe one.
  * (basically move the buffer from an unsafe area to a safe one)
  * (basically move the buffer from an unsafe area to a safe one)
  */
  */
-dma_addr_t __dma_map_single(struct device *dev, void *ptr, size_t size,
-		enum dma_data_direction dir)
-{
-	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
-		__func__, ptr, size, dir);
-
-	BUG_ON(!valid_dma_direction(dir));
-
-	return map_single(dev, ptr, size, dir);
-}
-EXPORT_SYMBOL(__dma_map_single);
-
-/*
- * see if a mapped address was really a "safe" buffer and if so, copy
- * the data from the safe buffer back to the unsafe buffer and free up
- * the safe buffer.  (basically return things back to the way they
- * should be)
- */
-void __dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-		enum dma_data_direction dir)
-{
-	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
-		__func__, (void *) dma_addr, size, dir);
-
-	unmap_single(dev, dma_addr, size, dir);
-}
-EXPORT_SYMBOL(__dma_unmap_single);
-
 dma_addr_t __dma_map_page(struct device *dev, struct page *page,
 dma_addr_t __dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir)
 		unsigned long offset, size_t size, enum dma_data_direction dir)
 {
 {
+	dma_addr_t dma_addr;
+	int ret;
+
 	dev_dbg(dev, "%s(page=%p,off=%#lx,size=%zx,dir=%x)\n",
 	dev_dbg(dev, "%s(page=%p,off=%#lx,size=%zx,dir=%x)\n",
 		__func__, page, offset, size, dir);
 		__func__, page, offset, size, dir);
 
 
-	BUG_ON(!valid_dma_direction(dir));
+	dma_addr = pfn_to_dma(dev, page_to_pfn(page)) + offset;
+
+	ret = needs_bounce(dev, dma_addr, size);
+	if (ret < 0)
+		return ~0;
+
+	if (ret == 0) {
+		__dma_page_cpu_to_dev(page, offset, size, dir);
+		return dma_addr;
+	}
 
 
 	if (PageHighMem(page)) {
 	if (PageHighMem(page)) {
-		dev_err(dev, "DMA buffer bouncing of HIGHMEM pages "
-			     "is not supported\n");
+		dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n");
 		return ~0;
 		return ~0;
 	}
 	}
 
 
@@ -383,10 +345,19 @@ EXPORT_SYMBOL(__dma_map_page);
 void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 		enum dma_data_direction dir)
 		enum dma_data_direction dir)
 {
 {
-	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
-		__func__, (void *) dma_addr, size, dir);
+	struct safe_buffer *buf;
+
+	dev_dbg(dev, "%s(dma=%#x,size=%d,dir=%x)\n",
+		__func__, dma_addr, size, dir);
+
+	buf = find_safe_buffer_dev(dev, dma_addr, __func__);
+	if (!buf) {
+		__dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, dma_addr)),
+			dma_addr & ~PAGE_MASK, size, dir);
+		return;
+	}
 
 
-	unmap_single(dev, dma_addr, size, dir);
+	unmap_single(dev, buf, size, dir);
 }
 }
 EXPORT_SYMBOL(__dma_unmap_page);
 EXPORT_SYMBOL(__dma_unmap_page);
 
 
@@ -461,7 +432,8 @@ static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev,
 }
 }
 
 
 int dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size,
 int dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size,
-		unsigned long large_buffer_size)
+		unsigned long large_buffer_size,
+		int (*needs_bounce_fn)(struct device *, dma_addr_t, size_t))
 {
 {
 	struct dmabounce_device_info *device_info;
 	struct dmabounce_device_info *device_info;
 	int ret;
 	int ret;
@@ -497,6 +469,7 @@ int dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size,
 	device_info->dev = dev;
 	device_info->dev = dev;
 	INIT_LIST_HEAD(&device_info->safe_buffers);
 	INIT_LIST_HEAD(&device_info->safe_buffers);
 	rwlock_init(&device_info->lock);
 	rwlock_init(&device_info->lock);
+	device_info->needs_bounce = needs_bounce_fn;
 
 
 #ifdef STATS
 #ifdef STATS
 	device_info->total_allocs = 0;
 	device_info->total_allocs = 0;

+ 3 - 4
arch/arm/common/gic.c

@@ -179,22 +179,21 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 {
 {
 	void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + (gic_irq(d) & ~3);
 	void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + (gic_irq(d) & ~3);
 	unsigned int shift = (d->irq % 4) * 8;
 	unsigned int shift = (d->irq % 4) * 8;
-	unsigned int cpu = cpumask_first(mask_val);
+	unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask);
 	u32 val, mask, bit;
 	u32 val, mask, bit;
 
 
-	if (cpu >= 8)
+	if (cpu >= 8 || cpu >= nr_cpu_ids)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	mask = 0xff << shift;
 	mask = 0xff << shift;
 	bit = 1 << (cpu + shift);
 	bit = 1 << (cpu + shift);
 
 
 	spin_lock(&irq_controller_lock);
 	spin_lock(&irq_controller_lock);
-	d->node = cpu;
 	val = readl_relaxed(reg) & ~mask;
 	val = readl_relaxed(reg) & ~mask;
 	writel_relaxed(val | bit, reg);
 	writel_relaxed(val | bit, reg);
 	spin_unlock(&irq_controller_lock);
 	spin_unlock(&irq_controller_lock);
 
 
-	return 0;
+	return IRQ_SET_MASK_OK;
 }
 }
 #endif
 #endif
 
 

+ 7 - 9
arch/arm/common/it8152.c

@@ -243,6 +243,12 @@ static struct resource it8152_mem = {
  * ITE8152 chip can address up to 64MByte, so all the devices
  * ITE8152 chip can address up to 64MByte, so all the devices
  * connected to ITE8152 (PCI and USB) should have limited DMA window
  * connected to ITE8152 (PCI and USB) should have limited DMA window
  */
  */
+static int it8152_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size)
+{
+	dev_dbg(dev, "%s: dma_addr %08x, size %08x\n",
+		__func__, dma_addr, size);
+	return (dma_addr + size - PHYS_OFFSET) >= SZ_64M;
+}
 
 
 /*
 /*
  * Setup DMA mask to 64MB on devices connected to ITE8152. Ignore all
  * Setup DMA mask to 64MB on devices connected to ITE8152. Ignore all
@@ -254,7 +260,7 @@ static int it8152_pci_platform_notify(struct device *dev)
 		if (dev->dma_mask)
 		if (dev->dma_mask)
 			*dev->dma_mask = (SZ_64M - 1) | PHYS_OFFSET;
 			*dev->dma_mask = (SZ_64M - 1) | PHYS_OFFSET;
 		dev->coherent_dma_mask = (SZ_64M - 1) | PHYS_OFFSET;
 		dev->coherent_dma_mask = (SZ_64M - 1) | PHYS_OFFSET;
-		dmabounce_register_dev(dev, 2048, 4096);
+		dmabounce_register_dev(dev, 2048, 4096, it8152_needs_bounce);
 	}
 	}
 	return 0;
 	return 0;
 }
 }
@@ -267,14 +273,6 @@ static int it8152_pci_platform_notify_remove(struct device *dev)
 	return 0;
 	return 0;
 }
 }
 
 
-int dma_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size)
-{
-	dev_dbg(dev, "%s: dma_addr %08x, size %08x\n",
-		__func__, dma_addr, size);
-	return (dev->bus == &pci_bus_type) &&
-		((dma_addr + size - PHYS_OFFSET) >= SZ_64M);
-}
-
 int dma_set_coherent_mask(struct device *dev, u64 mask)
 int dma_set_coherent_mask(struct device *dev, u64 mask)
 {
 {
 	if (mask >= PHYS_OFFSET + SZ_64M - 1)
 	if (mask >= PHYS_OFFSET + SZ_64M - 1)

+ 31 - 29
arch/arm/common/sa1111.c

@@ -579,7 +579,36 @@ sa1111_configure_smc(struct sa1111 *sachip, int sdram, unsigned int drac,
 
 
 	sachip->dev->coherent_dma_mask &= sa1111_dma_mask[drac >> 2];
 	sachip->dev->coherent_dma_mask &= sa1111_dma_mask[drac >> 2];
 }
 }
+#endif
 
 
+#ifdef CONFIG_DMABOUNCE
+/*
+ * According to the "Intel StrongARM SA-1111 Microprocessor Companion
+ * Chip Specification Update" (June 2000), erratum #7, there is a
+ * significant bug in the SA1111 SDRAM shared memory controller.  If
+ * an access to a region of memory above 1MB relative to the bank base,
+ * it is important that address bit 10 _NOT_ be asserted. Depending
+ * on the configuration of the RAM, bit 10 may correspond to one
+ * of several different (processor-relative) address bits.
+ *
+ * This routine only identifies whether or not a given DMA address
+ * is susceptible to the bug.
+ *
+ * This should only get called for sa1111_device types due to the
+ * way we configure our device dma_masks.
+ */
+static int sa1111_needs_bounce(struct device *dev, dma_addr_t addr, size_t size)
+{
+	/*
+	 * Section 4.6 of the "Intel StrongARM SA-1111 Development Module
+	 * User's Guide" mentions that jumpers R51 and R52 control the
+	 * target of SA-1111 DMA (either SDRAM bank 0 on Assabet, or
+	 * SDRAM bank 1 on Neponset). The default configuration selects
+	 * Assabet, so any address in bank 1 is necessarily invalid.
+	 */
+	return (machine_is_assabet() || machine_is_pfs168()) &&
+		(addr >= 0xc8000000 || (addr + size) >= 0xc8000000);
+}
 #endif
 #endif
 
 
 static void sa1111_dev_release(struct device *_dev)
 static void sa1111_dev_release(struct device *_dev)
@@ -644,7 +673,8 @@ sa1111_init_one_child(struct sa1111 *sachip, struct resource *parent,
 		dev->dev.dma_mask = &dev->dma_mask;
 		dev->dev.dma_mask = &dev->dma_mask;
 
 
 		if (dev->dma_mask != 0xffffffffUL) {
 		if (dev->dma_mask != 0xffffffffUL) {
-			ret = dmabounce_register_dev(&dev->dev, 1024, 4096);
+			ret = dmabounce_register_dev(&dev->dev, 1024, 4096,
+					sa1111_needs_bounce);
 			if (ret) {
 			if (ret) {
 				dev_err(&dev->dev, "SA1111: Failed to register"
 				dev_err(&dev->dev, "SA1111: Failed to register"
 					" with dmabounce\n");
 					" with dmabounce\n");
@@ -818,34 +848,6 @@ static void __sa1111_remove(struct sa1111 *sachip)
 	kfree(sachip);
 	kfree(sachip);
 }
 }
 
 
-/*
- * According to the "Intel StrongARM SA-1111 Microprocessor Companion
- * Chip Specification Update" (June 2000), erratum #7, there is a
- * significant bug in the SA1111 SDRAM shared memory controller.  If
- * an access to a region of memory above 1MB relative to the bank base,
- * it is important that address bit 10 _NOT_ be asserted. Depending
- * on the configuration of the RAM, bit 10 may correspond to one
- * of several different (processor-relative) address bits.
- *
- * This routine only identifies whether or not a given DMA address
- * is susceptible to the bug.
- *
- * This should only get called for sa1111_device types due to the
- * way we configure our device dma_masks.
- */
-int dma_needs_bounce(struct device *dev, dma_addr_t addr, size_t size)
-{
-	/*
-	 * Section 4.6 of the "Intel StrongARM SA-1111 Development Module
-	 * User's Guide" mentions that jumpers R51 and R52 control the
-	 * target of SA-1111 DMA (either SDRAM bank 0 on Assabet, or
-	 * SDRAM bank 1 on Neponset). The default configuration selects
-	 * Assabet, so any address in bank 1 is necessarily invalid.
-	 */
-	return ((machine_is_assabet() || machine_is_pfs168()) &&
-		(addr >= 0xc8000000 || (addr + size) >= 0xc8000000));
-}
-
 struct sa1111_save_data {
 struct sa1111_save_data {
 	unsigned int	skcr;
 	unsigned int	skcr;
 	unsigned int	skpcr;
 	unsigned int	skpcr;

+ 9 - 0
arch/arm/include/asm/assembler.h

@@ -293,4 +293,13 @@
 	.macro	ldrusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
 	.macro	ldrusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
 	usracc	ldr, \reg, \ptr, \inc, \cond, \rept, \abort
 	usracc	ldr, \reg, \ptr, \inc, \cond, \rept, \abort
 	.endm
 	.endm
+
+/* Utility macro for declaring string literals */
+	.macro	string name:req, string
+	.type \name , #object
+\name:
+	.asciz "\string"
+	.size \name , . - \name
+	.endm
+
 #endif /* __ASM_ASSEMBLER_H__ */
 #endif /* __ASM_ASSEMBLER_H__ */

+ 2 - 2
arch/arm/include/asm/bitops.h

@@ -26,8 +26,8 @@
 #include <linux/compiler.h>
 #include <linux/compiler.h>
 #include <asm/system.h>
 #include <asm/system.h>
 
 
-#define smp_mb__before_clear_bit()	mb()
-#define smp_mb__after_clear_bit()	mb()
+#define smp_mb__before_clear_bit()	smp_mb()
+#define smp_mb__after_clear_bit()	smp_mb()
 
 
 /*
 /*
  * These functions are the basis of our bit ops.
  * These functions are the basis of our bit ops.

+ 13 - 75
arch/arm/include/asm/dma-mapping.h

@@ -115,39 +115,8 @@ static inline void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
 		___dma_page_dev_to_cpu(page, off, size, dir);
 		___dma_page_dev_to_cpu(page, off, size, dir);
 }
 }
 
 
-/*
- * Return whether the given device DMA address mask can be supported
- * properly.  For example, if your device can only drive the low 24-bits
- * during bus mastering, then you would pass 0x00ffffff as the mask
- * to this function.
- *
- * FIXME: This should really be a platform specific issue - we should
- * return false if GFP_DMA allocations may not satisfy the supplied 'mask'.
- */
-static inline int dma_supported(struct device *dev, u64 mask)
-{
-	if (mask < ISA_DMA_THRESHOLD)
-		return 0;
-	return 1;
-}
-
-static inline int dma_set_mask(struct device *dev, u64 dma_mask)
-{
-#ifdef CONFIG_DMABOUNCE
-	if (dev->archdata.dmabounce) {
-		if (dma_mask >= ISA_DMA_THRESHOLD)
-			return 0;
-		else
-			return -EIO;
-	}
-#endif
-	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
-		return -EIO;
-
-	*dev->dma_mask = dma_mask;
-
-	return 0;
-}
+extern int dma_supported(struct device *, u64);
+extern int dma_set_mask(struct device *, u64);
 
 
 /*
 /*
  * DMA errors are defined by all-bits-set in the DMA address.
  * DMA errors are defined by all-bits-set in the DMA address.
@@ -256,14 +225,14 @@ int dma_mmap_writecombine(struct device *, struct vm_area_struct *,
  * @dev: valid struct device pointer
  * @dev: valid struct device pointer
  * @small_buf_size: size of buffers to use with small buffer pool
  * @small_buf_size: size of buffers to use with small buffer pool
  * @large_buf_size: size of buffers to use with large buffer pool (can be 0)
  * @large_buf_size: size of buffers to use with large buffer pool (can be 0)
+ * @needs_bounce_fn: called to determine whether buffer needs bouncing
  *
  *
  * This function should be called by low-level platform code to register
  * This function should be called by low-level platform code to register
  * a device as requireing DMA buffer bouncing. The function will allocate
  * a device as requireing DMA buffer bouncing. The function will allocate
  * appropriate DMA pools for the device.
  * appropriate DMA pools for the device.
- *
  */
  */
 extern int dmabounce_register_dev(struct device *, unsigned long,
 extern int dmabounce_register_dev(struct device *, unsigned long,
-		unsigned long);
+		unsigned long, int (*)(struct device *, dma_addr_t, size_t));
 
 
 /**
 /**
  * dmabounce_unregister_dev
  * dmabounce_unregister_dev
@@ -277,31 +246,9 @@ extern int dmabounce_register_dev(struct device *, unsigned long,
  */
  */
 extern void dmabounce_unregister_dev(struct device *);
 extern void dmabounce_unregister_dev(struct device *);
 
 
-/**
- * dma_needs_bounce
- *
- * @dev: valid struct device pointer
- * @dma_handle: dma_handle of unbounced buffer
- * @size: size of region being mapped
- *
- * Platforms that utilize the dmabounce mechanism must implement
- * this function.
- *
- * The dmabounce routines call this function whenever a dma-mapping
- * is requested to determine whether a given buffer needs to be bounced
- * or not. The function must return 0 if the buffer is OK for
- * DMA access and 1 if the buffer needs to be bounced.
- *
- */
-extern int dma_needs_bounce(struct device*, dma_addr_t, size_t);
-
 /*
 /*
  * The DMA API, implemented by dmabounce.c.  See below for descriptions.
  * The DMA API, implemented by dmabounce.c.  See below for descriptions.
  */
  */
-extern dma_addr_t __dma_map_single(struct device *, void *, size_t,
-		enum dma_data_direction);
-extern void __dma_unmap_single(struct device *, dma_addr_t, size_t,
-		enum dma_data_direction);
 extern dma_addr_t __dma_map_page(struct device *, struct page *,
 extern dma_addr_t __dma_map_page(struct device *, struct page *,
 		unsigned long, size_t, enum dma_data_direction);
 		unsigned long, size_t, enum dma_data_direction);
 extern void __dma_unmap_page(struct device *, dma_addr_t, size_t,
 extern void __dma_unmap_page(struct device *, dma_addr_t, size_t,
@@ -328,13 +275,6 @@ static inline int dmabounce_sync_for_device(struct device *d, dma_addr_t addr,
 }
 }
 
 
 
 
-static inline dma_addr_t __dma_map_single(struct device *dev, void *cpu_addr,
-		size_t size, enum dma_data_direction dir)
-{
-	__dma_single_cpu_to_dev(cpu_addr, size, dir);
-	return virt_to_dma(dev, cpu_addr);
-}
-
 static inline dma_addr_t __dma_map_page(struct device *dev, struct page *page,
 static inline dma_addr_t __dma_map_page(struct device *dev, struct page *page,
 	     unsigned long offset, size_t size, enum dma_data_direction dir)
 	     unsigned long offset, size_t size, enum dma_data_direction dir)
 {
 {
@@ -342,12 +282,6 @@ static inline dma_addr_t __dma_map_page(struct device *dev, struct page *page,
 	return pfn_to_dma(dev, page_to_pfn(page)) + offset;
 	return pfn_to_dma(dev, page_to_pfn(page)) + offset;
 }
 }
 
 
-static inline void __dma_unmap_single(struct device *dev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir)
-{
-	__dma_single_dev_to_cpu(dma_to_virt(dev, handle), size, dir);
-}
-
 static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle,
 static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir)
 		size_t size, enum dma_data_direction dir)
 {
 {
@@ -373,14 +307,18 @@ static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle,
 static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
 static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
 		size_t size, enum dma_data_direction dir)
 		size_t size, enum dma_data_direction dir)
 {
 {
+	unsigned long offset;
+	struct page *page;
 	dma_addr_t addr;
 	dma_addr_t addr;
 
 
+	BUG_ON(!virt_addr_valid(cpu_addr));
+	BUG_ON(!virt_addr_valid(cpu_addr + size - 1));
 	BUG_ON(!valid_dma_direction(dir));
 	BUG_ON(!valid_dma_direction(dir));
 
 
-	addr = __dma_map_single(dev, cpu_addr, size, dir);
-	debug_dma_map_page(dev, virt_to_page(cpu_addr),
-			(unsigned long)cpu_addr & ~PAGE_MASK, size,
-			dir, addr, true);
+	page = virt_to_page(cpu_addr);
+	offset = (unsigned long)cpu_addr & ~PAGE_MASK;
+	addr = __dma_map_page(dev, page, offset, size, dir);
+	debug_dma_map_page(dev, page, offset, size, dir, addr, true);
 
 
 	return addr;
 	return addr;
 }
 }
@@ -430,7 +368,7 @@ static inline void dma_unmap_single(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir)
 		size_t size, enum dma_data_direction dir)
 {
 {
 	debug_dma_unmap_page(dev, handle, size, dir, true);
 	debug_dma_unmap_page(dev, handle, size, dir, true);
-	__dma_unmap_single(dev, handle, size, dir);
+	__dma_unmap_page(dev, handle, size, dir);
 }
 }
 
 
 /**
 /**

+ 6 - 5
arch/arm/include/asm/dma.h

@@ -1,15 +1,16 @@
 #ifndef __ASM_ARM_DMA_H
 #ifndef __ASM_ARM_DMA_H
 #define __ASM_ARM_DMA_H
 #define __ASM_ARM_DMA_H
 
 
-#include <asm/memory.h>
-
 /*
 /*
  * This is the maximum virtual address which can be DMA'd from.
  * This is the maximum virtual address which can be DMA'd from.
  */
  */
-#ifndef ARM_DMA_ZONE_SIZE
-#define MAX_DMA_ADDRESS	0xffffffff
+#ifndef CONFIG_ZONE_DMA
+#define MAX_DMA_ADDRESS	0xffffffffUL
 #else
 #else
-#define MAX_DMA_ADDRESS	(PAGE_OFFSET + ARM_DMA_ZONE_SIZE)
+#define MAX_DMA_ADDRESS	({ \
+	extern unsigned long arm_dma_zone_size; \
+	arm_dma_zone_size ? \
+		(PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })
 #endif
 #endif
 
 
 #ifdef CONFIG_ISA_DMA_API
 #ifdef CONFIG_ISA_DMA_API

+ 7 - 7
arch/arm/include/asm/entry-macro-multi.S

@@ -4,8 +4,8 @@
  * Interrupt handling.  Preserves r7, r8, r9
  * Interrupt handling.  Preserves r7, r8, r9
  */
  */
 	.macro	arch_irq_handler_default
 	.macro	arch_irq_handler_default
-	get_irqnr_preamble r5, lr
-1:	get_irqnr_and_base r0, r6, r5, lr
+	get_irqnr_preamble r6, lr
+1:	get_irqnr_and_base r0, r2, r6, lr
 	movne	r1, sp
 	movne	r1, sp
 	@
 	@
 	@ routine called with r0 = irq number, r1 = struct pt_regs *
 	@ routine called with r0 = irq number, r1 = struct pt_regs *
@@ -17,17 +17,17 @@
 	/*
 	/*
 	 * XXX
 	 * XXX
 	 *
 	 *
-	 * this macro assumes that irqstat (r6) and base (r5) are
+	 * this macro assumes that irqstat (r2) and base (r6) are
 	 * preserved from get_irqnr_and_base above
 	 * preserved from get_irqnr_and_base above
 	 */
 	 */
-	ALT_SMP(test_for_ipi r0, r6, r5, lr)
+	ALT_SMP(test_for_ipi r0, r2, r6, lr)
 	ALT_UP_B(9997f)
 	ALT_UP_B(9997f)
 	movne	r1, sp
 	movne	r1, sp
 	adrne	lr, BSYM(1b)
 	adrne	lr, BSYM(1b)
 	bne	do_IPI
 	bne	do_IPI
 
 
 #ifdef CONFIG_LOCAL_TIMERS
 #ifdef CONFIG_LOCAL_TIMERS
-	test_for_ltirq r0, r6, r5, lr
+	test_for_ltirq r0, r2, r6, lr
 	movne	r0, sp
 	movne	r0, sp
 	adrne	lr, BSYM(1b)
 	adrne	lr, BSYM(1b)
 	bne	do_local_timer
 	bne	do_local_timer
@@ -40,7 +40,7 @@
 	.align	5
 	.align	5
 	.global \symbol_name
 	.global \symbol_name
 \symbol_name:
 \symbol_name:
-	mov	r4, lr
+	mov	r8, lr
 	arch_irq_handler_default
 	arch_irq_handler_default
-	mov     pc, r4
+	mov     pc, r8
 	.endm
 	.endm

+ 20 - 16
arch/arm/include/asm/hwcap.h

@@ -4,22 +4,26 @@
 /*
 /*
  * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
  * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
  */
  */
-#define HWCAP_SWP	1
-#define HWCAP_HALF	2
-#define HWCAP_THUMB	4
-#define HWCAP_26BIT	8	/* Play it safe */
-#define HWCAP_FAST_MULT	16
-#define HWCAP_FPA	32
-#define HWCAP_VFP	64
-#define HWCAP_EDSP	128
-#define HWCAP_JAVA	256
-#define HWCAP_IWMMXT	512
-#define HWCAP_CRUNCH	1024
-#define HWCAP_THUMBEE	2048
-#define HWCAP_NEON	4096
-#define HWCAP_VFPv3	8192
-#define HWCAP_VFPv3D16	16384
-#define HWCAP_TLS	32768
+#define HWCAP_SWP	(1 << 0)
+#define HWCAP_HALF	(1 << 1)
+#define HWCAP_THUMB	(1 << 2)
+#define HWCAP_26BIT	(1 << 3)	/* Play it safe */
+#define HWCAP_FAST_MULT	(1 << 4)
+#define HWCAP_FPA	(1 << 5)
+#define HWCAP_VFP	(1 << 6)
+#define HWCAP_EDSP	(1 << 7)
+#define HWCAP_JAVA	(1 << 8)
+#define HWCAP_IWMMXT	(1 << 9)
+#define HWCAP_CRUNCH	(1 << 10)
+#define HWCAP_THUMBEE	(1 << 11)
+#define HWCAP_NEON	(1 << 12)
+#define HWCAP_VFPv3	(1 << 13)
+#define HWCAP_VFPv3D16	(1 << 14)
+#define HWCAP_TLS	(1 << 15)
+#define HWCAP_VFPv4	(1 << 16)
+#define HWCAP_IDIVA	(1 << 17)
+#define HWCAP_IDIVT	(1 << 18)
+#define HWCAP_IDIV	(HWCAP_IDIVA | HWCAP_IDIVT)
 
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 /*
 /*

+ 7 - 21
arch/arm/include/asm/kprobes.h

@@ -24,12 +24,6 @@
 #define MAX_INSN_SIZE			2
 #define MAX_INSN_SIZE			2
 #define MAX_STACK_SIZE			64	/* 32 would probably be OK */
 #define MAX_STACK_SIZE			64	/* 32 would probably be OK */
 
 
-/*
- * This undefined instruction must be unique and
- * reserved solely for kprobes' use.
- */
-#define KPROBE_BREAKPOINT_INSTRUCTION	0xe7f001f8
-
 #define regs_return_value(regs)		((regs)->ARM_r0)
 #define regs_return_value(regs)		((regs)->ARM_r0)
 #define flush_insn_slot(p)		do { } while (0)
 #define flush_insn_slot(p)		do { } while (0)
 #define kretprobe_blacklist_size	0
 #define kretprobe_blacklist_size	0
@@ -38,14 +32,17 @@ typedef u32 kprobe_opcode_t;
 
 
 struct kprobe;
 struct kprobe;
 typedef void (kprobe_insn_handler_t)(struct kprobe *, struct pt_regs *);
 typedef void (kprobe_insn_handler_t)(struct kprobe *, struct pt_regs *);
-
 typedef unsigned long (kprobe_check_cc)(unsigned long);
 typedef unsigned long (kprobe_check_cc)(unsigned long);
+typedef void (kprobe_insn_singlestep_t)(struct kprobe *, struct pt_regs *);
+typedef void (kprobe_insn_fn_t)(void);
 
 
 /* Architecture specific copy of original instruction. */
 /* Architecture specific copy of original instruction. */
 struct arch_specific_insn {
 struct arch_specific_insn {
-	kprobe_opcode_t		*insn;
-	kprobe_insn_handler_t	*insn_handler;
-	kprobe_check_cc		*insn_check_cc;
+	kprobe_opcode_t			*insn;
+	kprobe_insn_handler_t		*insn_handler;
+	kprobe_check_cc			*insn_check_cc;
+	kprobe_insn_singlestep_t	*insn_singlestep;
+	kprobe_insn_fn_t		*insn_fn;
 };
 };
 
 
 struct prev_kprobe {
 struct prev_kprobe {
@@ -62,20 +59,9 @@ struct kprobe_ctlblk {
 };
 };
 
 
 void arch_remove_kprobe(struct kprobe *);
 void arch_remove_kprobe(struct kprobe *);
-void kretprobe_trampoline(void);
-
 int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
 int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
 int kprobe_exceptions_notify(struct notifier_block *self,
 int kprobe_exceptions_notify(struct notifier_block *self,
 			     unsigned long val, void *data);
 			     unsigned long val, void *data);
 
 
-enum kprobe_insn {
-	INSN_REJECTED,
-	INSN_GOOD,
-	INSN_GOOD_NO_SLOT
-};
-
-enum kprobe_insn arm_kprobe_decode_insn(kprobe_opcode_t,
-					struct arch_specific_insn *);
-void __init arm_kprobe_decode_init(void);
 
 
 #endif /* _ARM_KPROBES_H */
 #endif /* _ARM_KPROBES_H */

+ 4 - 0
arch/arm/include/asm/mach/arch.h

@@ -23,6 +23,10 @@ struct machine_desc {
 
 
 	unsigned int		nr_irqs;	/* number of IRQs */
 	unsigned int		nr_irqs;	/* number of IRQs */
 
 
+#ifdef CONFIG_ZONE_DMA
+	unsigned long		dma_zone_size;	/* size of DMA-able area */
+#endif
+
 	unsigned int		video_start;	/* start of video RAM	*/
 	unsigned int		video_start;	/* start of video RAM	*/
 	unsigned int		video_end;	/* end of video RAM	*/
 	unsigned int		video_end;	/* end of video RAM	*/
 
 

+ 0 - 12
arch/arm/include/asm/memory.h

@@ -203,18 +203,6 @@ static inline unsigned long __phys_to_virt(unsigned long x)
 #define PHYS_OFFSET	PLAT_PHYS_OFFSET
 #define PHYS_OFFSET	PLAT_PHYS_OFFSET
 #endif
 #endif
 
 
-/*
- * The DMA mask corresponding to the maximum bus address allocatable
- * using GFP_DMA.  The default here places no restriction on DMA
- * allocations.  This must be the smallest DMA mask in the system,
- * so a successful GFP_DMA allocation will always satisfy this.
- */
-#ifndef ARM_DMA_ZONE_SIZE
-#define ISA_DMA_THRESHOLD	(0xffffffffULL)
-#else
-#define ISA_DMA_THRESHOLD	(PHYS_OFFSET + ARM_DMA_ZONE_SIZE - 1)
-#endif
-
 /*
 /*
  * PFNs are used to describe any physical page; this means
  * PFNs are used to describe any physical page; this means
  * PFN 0 == physical address 0.
  * PFN 0 == physical address 0.

+ 2 - 0
arch/arm/include/asm/perf_event.h

@@ -24,6 +24,8 @@ enum arm_perf_pmu_ids {
 	ARM_PERF_PMU_ID_V6MP,
 	ARM_PERF_PMU_ID_V6MP,
 	ARM_PERF_PMU_ID_CA8,
 	ARM_PERF_PMU_ID_CA8,
 	ARM_PERF_PMU_ID_CA9,
 	ARM_PERF_PMU_ID_CA9,
+	ARM_PERF_PMU_ID_CA5,
+	ARM_PERF_PMU_ID_CA15,
 	ARM_NUM_PMU_IDS,
 	ARM_NUM_PMU_IDS,
 };
 };
 
 

+ 1 - 1
arch/arm/include/asm/pmu.h

@@ -52,7 +52,7 @@ reserve_pmu(enum arm_pmu_type device);
  * a cookie.
  * a cookie.
  */
  */
 extern int
 extern int
-release_pmu(struct platform_device *pdev);
+release_pmu(enum arm_pmu_type type);
 
 
 /**
 /**
  * init_pmu() - Initialise the PMU.
  * init_pmu() - Initialise the PMU.

+ 7 - 7
arch/arm/include/asm/proc-fns.h

@@ -82,13 +82,13 @@ extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
 #else
 #else
-#define cpu_proc_init()			processor._proc_init()
-#define cpu_proc_fin()			processor._proc_fin()
-#define cpu_reset(addr)			processor.reset(addr)
-#define cpu_do_idle()			processor._do_idle()
-#define cpu_dcache_clean_area(addr,sz)	processor.dcache_clean_area(addr,sz)
-#define cpu_set_pte_ext(ptep,pte,ext)	processor.set_pte_ext(ptep,pte,ext)
-#define cpu_do_switch_mm(pgd,mm)	processor.switch_mm(pgd,mm)
+#define cpu_proc_init			processor._proc_init
+#define cpu_proc_fin			processor._proc_fin
+#define cpu_reset			processor.reset
+#define cpu_do_idle			processor._do_idle
+#define cpu_dcache_clean_area		processor.dcache_clean_area
+#define cpu_set_pte_ext			processor.set_pte_ext
+#define cpu_do_switch_mm		processor.switch_mm
 #endif
 #endif
 
 
 extern void cpu_resume(void);
 extern void cpu_resume(void);

+ 10 - 1
arch/arm/include/asm/ptrace.h

@@ -69,8 +69,9 @@
 #define PSR_c		0x000000ff	/* Control		*/
 #define PSR_c		0x000000ff	/* Control		*/
 
 
 /*
 /*
- * ARMv7 groups of APSR bits
+ * ARMv7 groups of PSR bits
  */
  */
+#define APSR_MASK	0xf80f0000	/* N, Z, C, V, Q and GE flags */
 #define PSR_ISET_MASK	0x01000010	/* ISA state (J, T) mask */
 #define PSR_ISET_MASK	0x01000010	/* ISA state (J, T) mask */
 #define PSR_IT_MASK	0x0600fc00	/* If-Then execution state mask */
 #define PSR_IT_MASK	0x0600fc00	/* If-Then execution state mask */
 #define PSR_ENDIAN_MASK	0x00000200	/* Endianness state mask */
 #define PSR_ENDIAN_MASK	0x00000200	/* Endianness state mask */
@@ -199,6 +200,14 @@ extern unsigned long profile_pc(struct pt_regs *regs);
 #define predicate(x)		((x) & 0xf0000000)
 #define predicate(x)		((x) & 0xf0000000)
 #define PREDICATE_ALWAYS	0xe0000000
 #define PREDICATE_ALWAYS	0xe0000000
 
 
+/*
+ * True if instr is a 32-bit thumb instruction. This works if instr
+ * is the first or only half-word of a thumb instruction. It also works
+ * when instr holds all 32-bits of a wide thumb instruction if stored
+ * in the form (first_half<<16)|(second_half)
+ */
+#define is_wide_instruction(instr)	((unsigned)(instr) >= 0xe800)
+
 /*
 /*
  * kprobe-based event tracer support
  * kprobe-based event tracer support
  */
  */

+ 4 - 0
arch/arm/include/asm/scatterlist.h

@@ -1,6 +1,10 @@
 #ifndef _ASMARM_SCATTERLIST_H
 #ifndef _ASMARM_SCATTERLIST_H
 #define _ASMARM_SCATTERLIST_H
 #define _ASMARM_SCATTERLIST_H
 
 
+#ifdef CONFIG_ARM_HAS_SG_CHAIN
+#define ARCH_HAS_SG_CHAIN
+#endif
+
 #include <asm/memory.h>
 #include <asm/memory.h>
 #include <asm/types.h>
 #include <asm/types.h>
 #include <asm-generic/scatterlist.h>
 #include <asm-generic/scatterlist.h>

+ 6 - 2
arch/arm/include/asm/setup.h

@@ -187,12 +187,16 @@ struct tagtable {
 
 
 #define __tag __used __attribute__((__section__(".taglist.init")))
 #define __tag __used __attribute__((__section__(".taglist.init")))
 #define __tagtable(tag, fn) \
 #define __tagtable(tag, fn) \
-static struct tagtable __tagtable_##fn __tag = { tag, fn }
+static const struct tagtable __tagtable_##fn __tag = { tag, fn }
 
 
 /*
 /*
  * Memory map description
  * Memory map description
  */
  */
-#define NR_BANKS 8
+#ifdef CONFIG_ARCH_EP93XX
+# define NR_BANKS 16
+#else
+# define NR_BANKS 8
+#endif
 
 
 struct membank {
 struct membank {
 	phys_addr_t start;
 	phys_addr_t start;

+ 22 - 0
arch/arm/include/asm/suspend.h

@@ -0,0 +1,22 @@
+#ifndef __ASM_ARM_SUSPEND_H
+#define __ASM_ARM_SUSPEND_H
+
+#include <asm/memory.h>
+#include <asm/tlbflush.h>
+
+extern void cpu_resume(void);
+
+/*
+ * Hide the first two arguments to __cpu_suspend - these are an implementation
+ * detail which platform code shouldn't have to know about.
+ */
+static inline int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	extern int __cpu_suspend(int, long, unsigned long,
+				 int (*)(unsigned long));
+	int ret = __cpu_suspend(0, PHYS_OFFSET - PAGE_OFFSET, arg, fn);
+	flush_tlb_all();
+	return ret;
+}
+
+#endif

+ 2 - 0
arch/arm/include/asm/tcm.h

@@ -27,5 +27,7 @@
 
 
 void *tcm_alloc(size_t len);
 void *tcm_alloc(size_t len);
 void tcm_free(void *addr, size_t len);
 void tcm_free(void *addr, size_t len);
+bool tcm_dtcm_present(void);
+bool tcm_itcm_present(void);
 
 
 #endif
 #endif

+ 10 - 48
arch/arm/include/asm/tlbflush.h

@@ -34,16 +34,12 @@
 #define TLB_V6_D_ASID	(1 << 17)
 #define TLB_V6_D_ASID	(1 << 17)
 #define TLB_V6_I_ASID	(1 << 18)
 #define TLB_V6_I_ASID	(1 << 18)
 
 
-#define TLB_BTB		(1 << 28)
-
 /* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */
 /* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */
 #define TLB_V7_UIS_PAGE	(1 << 19)
 #define TLB_V7_UIS_PAGE	(1 << 19)
 #define TLB_V7_UIS_FULL (1 << 20)
 #define TLB_V7_UIS_FULL (1 << 20)
 #define TLB_V7_UIS_ASID (1 << 21)
 #define TLB_V7_UIS_ASID (1 << 21)
 
 
-/* Inner Shareable BTB operation (ARMv7 MP extensions) */
-#define TLB_V7_IS_BTB	(1 << 22)
-
+#define TLB_BARRIER	(1 << 28)
 #define TLB_L2CLEAN_FR	(1 << 29)		/* Feroceon */
 #define TLB_L2CLEAN_FR	(1 << 29)		/* Feroceon */
 #define TLB_DCLEAN	(1 << 30)
 #define TLB_DCLEAN	(1 << 30)
 #define TLB_WB		(1 << 31)
 #define TLB_WB		(1 << 31)
@@ -58,7 +54,7 @@
  *	  v4wb  - ARMv4 with write buffer without I TLB flush entry instruction
  *	  v4wb  - ARMv4 with write buffer without I TLB flush entry instruction
  *	  v4wbi - ARMv4 with write buffer with I TLB flush entry instruction
  *	  v4wbi - ARMv4 with write buffer with I TLB flush entry instruction
  *	  fr    - Feroceon (v4wbi with non-outer-cacheable page table walks)
  *	  fr    - Feroceon (v4wbi with non-outer-cacheable page table walks)
- *	  fa    - Faraday (v4 with write buffer with UTLB and branch target buffer (BTB))
+ *	  fa    - Faraday (v4 with write buffer with UTLB)
  *	  v6wbi - ARMv6 with write buffer with I TLB flush entry instruction
  *	  v6wbi - ARMv6 with write buffer with I TLB flush entry instruction
  *	  v7wbi - identical to v6wbi
  *	  v7wbi - identical to v6wbi
  */
  */
@@ -99,7 +95,7 @@
 # define v4_always_flags	(-1UL)
 # define v4_always_flags	(-1UL)
 #endif
 #endif
 
 
-#define fa_tlb_flags	(TLB_WB | TLB_BTB | TLB_DCLEAN | \
+#define fa_tlb_flags	(TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
 			 TLB_V4_U_FULL | TLB_V4_U_PAGE)
 			 TLB_V4_U_FULL | TLB_V4_U_PAGE)
 
 
 #ifdef CONFIG_CPU_TLB_FA
 #ifdef CONFIG_CPU_TLB_FA
@@ -166,7 +162,7 @@
 # define v4wb_always_flags	(-1UL)
 # define v4wb_always_flags	(-1UL)
 #endif
 #endif
 
 
-#define v6wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \
+#define v6wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
 			 TLB_V6_I_FULL | TLB_V6_D_FULL | \
 			 TLB_V6_I_FULL | TLB_V6_D_FULL | \
 			 TLB_V6_I_PAGE | TLB_V6_D_PAGE | \
 			 TLB_V6_I_PAGE | TLB_V6_D_PAGE | \
 			 TLB_V6_I_ASID | TLB_V6_D_ASID)
 			 TLB_V6_I_ASID | TLB_V6_D_ASID)
@@ -184,9 +180,9 @@
 # define v6wbi_always_flags	(-1UL)
 # define v6wbi_always_flags	(-1UL)
 #endif
 #endif
 
 
-#define v7wbi_tlb_flags_smp	(TLB_WB | TLB_DCLEAN | TLB_V7_IS_BTB | \
+#define v7wbi_tlb_flags_smp	(TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
 			 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID)
 			 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID)
-#define v7wbi_tlb_flags_up	(TLB_WB | TLB_DCLEAN | TLB_BTB | \
+#define v7wbi_tlb_flags_up	(TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
 			 TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID)
 			 TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID)
 
 
 #ifdef CONFIG_CPU_TLB_V7
 #ifdef CONFIG_CPU_TLB_V7
@@ -341,15 +337,7 @@ static inline void local_flush_tlb_all(void)
 	if (tlb_flag(TLB_V7_UIS_FULL))
 	if (tlb_flag(TLB_V7_UIS_FULL))
 		asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc");
 		asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc");
 
 
-	if (tlb_flag(TLB_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
-		dsb();
-		isb();
-	}
-	if (tlb_flag(TLB_V7_IS_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_BARRIER)) {
 		dsb();
 		dsb();
 		isb();
 		isb();
 	}
 	}
@@ -389,17 +377,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 		asm("mcr p15, 0, %0, c8, c3, 2" : : "r" (asid) : "cc");
 		asm("mcr p15, 0, %0, c8, c3, 2" : : "r" (asid) : "cc");
 #endif
 #endif
 
 
-	if (tlb_flag(TLB_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
-		dsb();
-	}
-	if (tlb_flag(TLB_V7_IS_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_BARRIER))
 		dsb();
 		dsb();
-		isb();
-	}
 }
 }
 
 
 static inline void
 static inline void
@@ -439,17 +418,8 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 		asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (uaddr) : "cc");
 		asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (uaddr) : "cc");
 #endif
 #endif
 
 
-	if (tlb_flag(TLB_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
-		dsb();
-	}
-	if (tlb_flag(TLB_V7_IS_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_BARRIER))
 		dsb();
 		dsb();
-		isb();
-	}
 }
 }
 
 
 static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
 static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
@@ -482,15 +452,7 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
 	if (tlb_flag(TLB_V7_UIS_PAGE))
 	if (tlb_flag(TLB_V7_UIS_PAGE))
 		asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (kaddr) : "cc");
 		asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (kaddr) : "cc");
 
 
-	if (tlb_flag(TLB_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
-		dsb();
-		isb();
-	}
-	if (tlb_flag(TLB_V7_IS_BTB)) {
-		/* flush the branch target cache */
-		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_BARRIER)) {
 		dsb();
 		dsb();
 		isb();
 		isb();
 	}
 	}

+ 3 - 0
arch/arm/include/asm/traps.h

@@ -3,6 +3,9 @@
 
 
 #include <linux/list.h>
 #include <linux/list.h>
 
 
+struct pt_regs;
+struct task_struct;
+
 struct undef_hook {
 struct undef_hook {
 	struct list_head node;
 	struct list_head node;
 	u32 instr_mask;
 	u32 instr_mask;

+ 6 - 1
arch/arm/kernel/Makefile

@@ -37,7 +37,12 @@ obj-$(CONFIG_HAVE_ARM_TWD)	+= smp_twd.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
-obj-$(CONFIG_KPROBES)		+= kprobes.o kprobes-decode.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o kprobes-common.o
+ifdef CONFIG_THUMB2_KERNEL
+obj-$(CONFIG_KPROBES)		+= kprobes-thumb.o
+else
+obj-$(CONFIG_KPROBES)		+= kprobes-arm.o
+endif
 obj-$(CONFIG_ATAGS_PROC)	+= atags.o
 obj-$(CONFIG_ATAGS_PROC)	+= atags.o
 obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
 obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
 obj-$(CONFIG_ARM_THUMBEE)	+= thumbee.o
 obj-$(CONFIG_ARM_THUMBEE)	+= thumbee.o

+ 3 - 0
arch/arm/kernel/asm-offsets.c

@@ -59,6 +59,9 @@ int main(void)
   DEFINE(TI_TP_VALUE,		offsetof(struct thread_info, tp_value));
   DEFINE(TI_TP_VALUE,		offsetof(struct thread_info, tp_value));
   DEFINE(TI_FPSTATE,		offsetof(struct thread_info, fpstate));
   DEFINE(TI_FPSTATE,		offsetof(struct thread_info, fpstate));
   DEFINE(TI_VFPSTATE,		offsetof(struct thread_info, vfpstate));
   DEFINE(TI_VFPSTATE,		offsetof(struct thread_info, vfpstate));
+#ifdef CONFIG_SMP
+  DEFINE(VFP_CPU,		offsetof(union vfp_state, hard.cpu));
+#endif
 #ifdef CONFIG_ARM_THUMBEE
 #ifdef CONFIG_ARM_THUMBEE
   DEFINE(TI_THUMBEE_STATE,	offsetof(struct thread_info, thumbee_state));
   DEFINE(TI_THUMBEE_STATE,	offsetof(struct thread_info, thumbee_state));
 #endif
 #endif

+ 204 - 319
arch/arm/kernel/entry-armv.S

@@ -29,21 +29,53 @@
 #include <asm/entry-macro-multi.S>
 #include <asm/entry-macro-multi.S>
 
 
 /*
 /*
- * Interrupt handling.  Preserves r7, r8, r9
+ * Interrupt handling.
  */
  */
 	.macro	irq_handler
 	.macro	irq_handler
 #ifdef CONFIG_MULTI_IRQ_HANDLER
 #ifdef CONFIG_MULTI_IRQ_HANDLER
-	ldr	r5, =handle_arch_irq
+	ldr	r1, =handle_arch_irq
 	mov	r0, sp
 	mov	r0, sp
-	ldr	r5, [r5]
+	ldr	r1, [r1]
 	adr	lr, BSYM(9997f)
 	adr	lr, BSYM(9997f)
-	teq	r5, #0
-	movne	pc, r5
+	teq	r1, #0
+	movne	pc, r1
 #endif
 #endif
 	arch_irq_handler_default
 	arch_irq_handler_default
 9997:
 9997:
 	.endm
 	.endm
 
 
+	.macro	pabt_helper
+	@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
+#ifdef MULTI_PABORT
+	ldr	ip, .LCprocfns
+	mov	lr, pc
+	ldr	pc, [ip, #PROCESSOR_PABT_FUNC]
+#else
+	bl	CPU_PABORT_HANDLER
+#endif
+	.endm
+
+	.macro	dabt_helper
+
+	@
+	@ Call the processor-specific abort handler:
+	@
+	@  r2 - pt_regs
+	@  r4 - aborted context pc
+	@  r5 - aborted context psr
+	@
+	@ The abort handler must return the aborted address in r0, and
+	@ the fault status register in r1.  r9 must be preserved.
+	@
+#ifdef MULTI_DABORT
+	ldr	ip, .LCprocfns
+	mov	lr, pc
+	ldr	pc, [ip, #PROCESSOR_DABT_FUNC]
+#else
+	bl	CPU_DABORT_HANDLER
+#endif
+	.endm
+
 #ifdef CONFIG_KPROBES
 #ifdef CONFIG_KPROBES
 	.section	.kprobes.text,"ax",%progbits
 	.section	.kprobes.text,"ax",%progbits
 #else
 #else
@@ -126,106 +158,74 @@ ENDPROC(__und_invalid)
  SPFIX(	subeq	sp, sp, #4	)
  SPFIX(	subeq	sp, sp, #4	)
 	stmia	sp, {r1 - r12}
 	stmia	sp, {r1 - r12}
 
 
-	ldmia	r0, {r1 - r3}
-	add	r5, sp, #S_SP - 4	@ here for interlock avoidance
-	mov	r4, #-1			@  ""  ""      ""       ""
-	add	r0, sp, #(S_FRAME_SIZE + \stack_hole - 4)
- SPFIX(	addeq	r0, r0, #4	)
-	str	r1, [sp, #-4]!		@ save the "real" r0 copied
+	ldmia	r0, {r3 - r5}
+	add	r7, sp, #S_SP - 4	@ here for interlock avoidance
+	mov	r6, #-1			@  ""  ""      ""       ""
+	add	r2, sp, #(S_FRAME_SIZE + \stack_hole - 4)
+ SPFIX(	addeq	r2, r2, #4	)
+	str	r3, [sp, #-4]!		@ save the "real" r0 copied
 					@ from the exception stack
 					@ from the exception stack
 
 
-	mov	r1, lr
+	mov	r3, lr
 
 
 	@
 	@
 	@ We are now ready to fill in the remaining blanks on the stack:
 	@ We are now ready to fill in the remaining blanks on the stack:
 	@
 	@
-	@  r0 - sp_svc
-	@  r1 - lr_svc
-	@  r2 - lr_<exception>, already fixed up for correct return/restart
-	@  r3 - spsr_<exception>
-	@  r4 - orig_r0 (see pt_regs definition in ptrace.h)
+	@  r2 - sp_svc
+	@  r3 - lr_svc
+	@  r4 - lr_<exception>, already fixed up for correct return/restart
+	@  r5 - spsr_<exception>
+	@  r6 - orig_r0 (see pt_regs definition in ptrace.h)
 	@
 	@
-	stmia	r5, {r0 - r4}
+	stmia	r7, {r2 - r6}
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bl	trace_hardirqs_off
+#endif
 	.endm
 	.endm
 
 
 	.align	5
 	.align	5
 __dabt_svc:
 __dabt_svc:
 	svc_entry
 	svc_entry
-
-	@
-	@ get ready to re-enable interrupts if appropriate
-	@
-	mrs	r9, cpsr
-	tst	r3, #PSR_I_BIT
-	biceq	r9, r9, #PSR_I_BIT
-
-	@
-	@ Call the processor-specific abort handler:
-	@
-	@  r2 - aborted context pc
-	@  r3 - aborted context cpsr
-	@
-	@ The abort handler must return the aborted address in r0, and
-	@ the fault status register in r1.  r9 must be preserved.
-	@
-#ifdef MULTI_DABORT
-	ldr	r4, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [r4, #PROCESSOR_DABT_FUNC]
-#else
-	bl	CPU_DABORT_HANDLER
-#endif
-
-	@
-	@ set desired IRQ state, then call main handler
-	@
-	debug_entry r1
-	msr	cpsr_c, r9
 	mov	r2, sp
 	mov	r2, sp
-	bl	do_DataAbort
+	dabt_helper
 
 
 	@
 	@
 	@ IRQs off again before pulling preserved data off the stack
 	@ IRQs off again before pulling preserved data off the stack
 	@
 	@
 	disable_irq_notrace
 	disable_irq_notrace
 
 
-	@
-	@ restore SPSR and restart the instruction
-	@
-	ldr	r2, [sp, #S_PSR]
-	svc_exit r2				@ return from exception
+#ifdef CONFIG_TRACE_IRQFLAGS
+	tst	r5, #PSR_I_BIT
+	bleq	trace_hardirqs_on
+	tst	r5, #PSR_I_BIT
+	blne	trace_hardirqs_off
+#endif
+	svc_exit r5				@ return from exception
  UNWIND(.fnend		)
  UNWIND(.fnend		)
 ENDPROC(__dabt_svc)
 ENDPROC(__dabt_svc)
 
 
 	.align	5
 	.align	5
 __irq_svc:
 __irq_svc:
 	svc_entry
 	svc_entry
+	irq_handler
 
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
 #ifdef CONFIG_PREEMPT
 #ifdef CONFIG_PREEMPT
 	get_thread_info tsk
 	get_thread_info tsk
 	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
 	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
-	add	r7, r8, #1			@ increment it
-	str	r7, [tsk, #TI_PREEMPT]
-#endif
-
-	irq_handler
-#ifdef CONFIG_PREEMPT
-	str	r8, [tsk, #TI_PREEMPT]		@ restore preempt count
 	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
 	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
 	teq	r8, #0				@ if preempt count != 0
 	teq	r8, #0				@ if preempt count != 0
 	movne	r0, #0				@ force flags to 0
 	movne	r0, #0				@ force flags to 0
 	tst	r0, #_TIF_NEED_RESCHED
 	tst	r0, #_TIF_NEED_RESCHED
 	blne	svc_preempt
 	blne	svc_preempt
 #endif
 #endif
-	ldr	r4, [sp, #S_PSR]		@ irqs are already disabled
+
 #ifdef CONFIG_TRACE_IRQFLAGS
 #ifdef CONFIG_TRACE_IRQFLAGS
-	tst	r4, #PSR_I_BIT
-	bleq	trace_hardirqs_on
+	@ The parent context IRQs must have been enabled to get here in
+	@ the first place, so there's no point checking the PSR I bit.
+	bl	trace_hardirqs_on
 #endif
 #endif
-	svc_exit r4				@ return from exception
+	svc_exit r5				@ return from exception
  UNWIND(.fnend		)
  UNWIND(.fnend		)
 ENDPROC(__irq_svc)
 ENDPROC(__irq_svc)
 
 
@@ -251,7 +251,6 @@ __und_svc:
 #else
 #else
 	svc_entry
 	svc_entry
 #endif
 #endif
-
 	@
 	@
 	@ call emulation code, which returns using r9 if it has emulated
 	@ call emulation code, which returns using r9 if it has emulated
 	@ the instruction, or the more conventional lr if we are to treat
 	@ the instruction, or the more conventional lr if we are to treat
@@ -260,15 +259,16 @@ __und_svc:
 	@  r0 - instruction
 	@  r0 - instruction
 	@
 	@
 #ifndef	CONFIG_THUMB2_KERNEL
 #ifndef	CONFIG_THUMB2_KERNEL
-	ldr	r0, [r2, #-4]
+	ldr	r0, [r4, #-4]
 #else
 #else
-	ldrh	r0, [r2, #-2]			@ Thumb instruction at LR - 2
+	ldrh	r0, [r4, #-2]			@ Thumb instruction at LR - 2
 	and	r9, r0, #0xf800
 	and	r9, r0, #0xf800
 	cmp	r9, #0xe800			@ 32-bit instruction if xx >= 0
 	cmp	r9, #0xe800			@ 32-bit instruction if xx >= 0
-	ldrhhs	r9, [r2]			@ bottom 16 bits
+	ldrhhs	r9, [r4]			@ bottom 16 bits
 	orrhs	r0, r9, r0, lsl #16
 	orrhs	r0, r9, r0, lsl #16
 #endif
 #endif
 	adr	r9, BSYM(1f)
 	adr	r9, BSYM(1f)
+	mov	r2, r4
 	bl	call_fpe
 	bl	call_fpe
 
 
 	mov	r0, sp				@ struct pt_regs *regs
 	mov	r0, sp				@ struct pt_regs *regs
@@ -282,45 +282,35 @@ __und_svc:
 	@
 	@
 	@ restore SPSR and restart the instruction
 	@ restore SPSR and restart the instruction
 	@
 	@
-	ldr	r2, [sp, #S_PSR]		@ Get SVC cpsr
-	svc_exit r2				@ return from exception
+	ldr	r5, [sp, #S_PSR]		@ Get SVC cpsr
+#ifdef CONFIG_TRACE_IRQFLAGS
+	tst	r5, #PSR_I_BIT
+	bleq	trace_hardirqs_on
+	tst	r5, #PSR_I_BIT
+	blne	trace_hardirqs_off
+#endif
+	svc_exit r5				@ return from exception
  UNWIND(.fnend		)
  UNWIND(.fnend		)
 ENDPROC(__und_svc)
 ENDPROC(__und_svc)
 
 
 	.align	5
 	.align	5
 __pabt_svc:
 __pabt_svc:
 	svc_entry
 	svc_entry
-
-	@
-	@ re-enable interrupts if appropriate
-	@
-	mrs	r9, cpsr
-	tst	r3, #PSR_I_BIT
-	biceq	r9, r9, #PSR_I_BIT
-
-	mov	r0, r2			@ pass address of aborted instruction.
-#ifdef MULTI_PABORT
-	ldr	r4, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [r4, #PROCESSOR_PABT_FUNC]
-#else
-	bl	CPU_PABORT_HANDLER
-#endif
-	debug_entry r1
-	msr	cpsr_c, r9			@ Maybe enable interrupts
 	mov	r2, sp				@ regs
 	mov	r2, sp				@ regs
-	bl	do_PrefetchAbort		@ call abort handler
+	pabt_helper
 
 
 	@
 	@
 	@ IRQs off again before pulling preserved data off the stack
 	@ IRQs off again before pulling preserved data off the stack
 	@
 	@
 	disable_irq_notrace
 	disable_irq_notrace
 
 
-	@
-	@ restore SPSR and restart the instruction
-	@
-	ldr	r2, [sp, #S_PSR]
-	svc_exit r2				@ return from exception
+#ifdef CONFIG_TRACE_IRQFLAGS
+	tst	r5, #PSR_I_BIT
+	bleq	trace_hardirqs_on
+	tst	r5, #PSR_I_BIT
+	blne	trace_hardirqs_off
+#endif
+	svc_exit r5				@ return from exception
  UNWIND(.fnend		)
  UNWIND(.fnend		)
 ENDPROC(__pabt_svc)
 ENDPROC(__pabt_svc)
 
 
@@ -351,23 +341,23 @@ ENDPROC(__pabt_svc)
  ARM(	stmib	sp, {r1 - r12}	)
  ARM(	stmib	sp, {r1 - r12}	)
  THUMB(	stmia	sp, {r0 - r12}	)
  THUMB(	stmia	sp, {r0 - r12}	)
 
 
-	ldmia	r0, {r1 - r3}
+	ldmia	r0, {r3 - r5}
 	add	r0, sp, #S_PC		@ here for interlock avoidance
 	add	r0, sp, #S_PC		@ here for interlock avoidance
-	mov	r4, #-1			@  ""  ""     ""        ""
+	mov	r6, #-1			@  ""  ""     ""        ""
 
 
-	str	r1, [sp]		@ save the "real" r0 copied
+	str	r3, [sp]		@ save the "real" r0 copied
 					@ from the exception stack
 					@ from the exception stack
 
 
 	@
 	@
 	@ We are now ready to fill in the remaining blanks on the stack:
 	@ We are now ready to fill in the remaining blanks on the stack:
 	@
 	@
-	@  r2 - lr_<exception>, already fixed up for correct return/restart
-	@  r3 - spsr_<exception>
-	@  r4 - orig_r0 (see pt_regs definition in ptrace.h)
+	@  r4 - lr_<exception>, already fixed up for correct return/restart
+	@  r5 - spsr_<exception>
+	@  r6 - orig_r0 (see pt_regs definition in ptrace.h)
 	@
 	@
 	@ Also, separately save sp_usr and lr_usr
 	@ Also, separately save sp_usr and lr_usr
 	@
 	@
-	stmia	r0, {r2 - r4}
+	stmia	r0, {r4 - r6}
  ARM(	stmdb	r0, {sp, lr}^			)
  ARM(	stmdb	r0, {sp, lr}^			)
  THUMB(	store_user_sp_lr r0, r1, S_SP - S_PC	)
  THUMB(	store_user_sp_lr r0, r1, S_SP - S_PC	)
 
 
@@ -380,10 +370,14 @@ ENDPROC(__pabt_svc)
 	@ Clear FP to mark the first stack frame
 	@ Clear FP to mark the first stack frame
 	@
 	@
 	zero_fp
 	zero_fp
+
+#ifdef CONFIG_IRQSOFF_TRACER
+	bl	trace_hardirqs_off
+#endif
 	.endm
 	.endm
 
 
 	.macro	kuser_cmpxchg_check
 	.macro	kuser_cmpxchg_check
-#if __LINUX_ARM_ARCH__ < 6 && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
+#if !defined(CONFIG_CPU_32v6K) && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
 #ifndef CONFIG_MMU
 #ifndef CONFIG_MMU
 #warning "NPTL on non MMU needs fixing"
 #warning "NPTL on non MMU needs fixing"
 #else
 #else
@@ -391,8 +385,8 @@ ENDPROC(__pabt_svc)
 	@ if it was interrupted in a critical region.  Here we
 	@ if it was interrupted in a critical region.  Here we
 	@ perform a quick test inline since it should be false
 	@ perform a quick test inline since it should be false
 	@ 99.9999% of the time.  The rest is done out of line.
 	@ 99.9999% of the time.  The rest is done out of line.
-	cmp	r2, #TASK_SIZE
-	blhs	kuser_cmpxchg_fixup
+	cmp	r4, #TASK_SIZE
+	blhs	kuser_cmpxchg64_fixup
 #endif
 #endif
 #endif
 #endif
 	.endm
 	.endm
@@ -401,32 +395,9 @@ ENDPROC(__pabt_svc)
 __dabt_usr:
 __dabt_usr:
 	usr_entry
 	usr_entry
 	kuser_cmpxchg_check
 	kuser_cmpxchg_check
-
-	@
-	@ Call the processor-specific abort handler:
-	@
-	@  r2 - aborted context pc
-	@  r3 - aborted context cpsr
-	@
-	@ The abort handler must return the aborted address in r0, and
-	@ the fault status register in r1.
-	@
-#ifdef MULTI_DABORT
-	ldr	r4, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [r4, #PROCESSOR_DABT_FUNC]
-#else
-	bl	CPU_DABORT_HANDLER
-#endif
-
-	@
-	@ IRQs on, then call the main handler
-	@
-	debug_entry r1
-	enable_irq
 	mov	r2, sp
 	mov	r2, sp
-	adr	lr, BSYM(ret_from_exception)
-	b	do_DataAbort
+	dabt_helper
+	b	ret_from_exception
  UNWIND(.fnend		)
  UNWIND(.fnend		)
 ENDPROC(__dabt_usr)
 ENDPROC(__dabt_usr)
 
 
@@ -434,28 +405,8 @@ ENDPROC(__dabt_usr)
 __irq_usr:
 __irq_usr:
 	usr_entry
 	usr_entry
 	kuser_cmpxchg_check
 	kuser_cmpxchg_check
-
-#ifdef CONFIG_IRQSOFF_TRACER
-	bl	trace_hardirqs_off
-#endif
-
-	get_thread_info tsk
-#ifdef CONFIG_PREEMPT
-	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
-	add	r7, r8, #1			@ increment it
-	str	r7, [tsk, #TI_PREEMPT]
-#endif
-
 	irq_handler
 	irq_handler
-#ifdef CONFIG_PREEMPT
-	ldr	r0, [tsk, #TI_PREEMPT]
-	str	r8, [tsk, #TI_PREEMPT]
-	teq	r0, r7
- ARM(	strne	r0, [r0, -r0]	)
- THUMB(	movne	r0, #0		)
- THUMB(	strne	r0, [r0]	)
-#endif
-
+	get_thread_info tsk
 	mov	why, #0
 	mov	why, #0
 	b	ret_to_user_from_irq
 	b	ret_to_user_from_irq
  UNWIND(.fnend		)
  UNWIND(.fnend		)
@@ -467,6 +418,9 @@ ENDPROC(__irq_usr)
 __und_usr:
 __und_usr:
 	usr_entry
 	usr_entry
 
 
+	mov	r2, r4
+	mov	r3, r5
+
 	@
 	@
 	@ fall through to the emulation code, which returns using r9 if
 	@ fall through to the emulation code, which returns using r9 if
 	@ it has emulated the instruction, or the more conventional lr
 	@ it has emulated the instruction, or the more conventional lr
@@ -682,19 +636,8 @@ ENDPROC(__und_usr_unknown)
 	.align	5
 	.align	5
 __pabt_usr:
 __pabt_usr:
 	usr_entry
 	usr_entry
-
-	mov	r0, r2			@ pass address of aborted instruction.
-#ifdef MULTI_PABORT
-	ldr	r4, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [r4, #PROCESSOR_PABT_FUNC]
-#else
-	bl	CPU_PABORT_HANDLER
-#endif
-	debug_entry r1
-	enable_irq				@ Enable interrupts
 	mov	r2, sp				@ regs
 	mov	r2, sp				@ regs
-	bl	do_PrefetchAbort		@ call abort handler
+	pabt_helper
  UNWIND(.fnend		)
  UNWIND(.fnend		)
 	/* fall through */
 	/* fall through */
 /*
 /*
@@ -758,31 +701,12 @@ ENDPROC(__switch_to)
 /*
 /*
  * User helpers.
  * User helpers.
  *
  *
- * These are segment of kernel provided user code reachable from user space
- * at a fixed address in kernel memory.  This is used to provide user space
- * with some operations which require kernel help because of unimplemented
- * native feature and/or instructions in many ARM CPUs. The idea is for
- * this code to be executed directly in user mode for best efficiency but
- * which is too intimate with the kernel counter part to be left to user
- * libraries.  In fact this code might even differ from one CPU to another
- * depending on the available  instruction set and restrictions like on
- * SMP systems.  In other words, the kernel reserves the right to change
- * this code as needed without warning. Only the entry points and their
- * results are guaranteed to be stable.
- *
  * Each segment is 32-byte aligned and will be moved to the top of the high
  * Each segment is 32-byte aligned and will be moved to the top of the high
  * vector page.  New segments (if ever needed) must be added in front of
  * vector page.  New segments (if ever needed) must be added in front of
  * existing ones.  This mechanism should be used only for things that are
  * existing ones.  This mechanism should be used only for things that are
  * really small and justified, and not be abused freely.
  * really small and justified, and not be abused freely.
  *
  *
- * User space is expected to implement those things inline when optimizing
- * for a processor that has the necessary native support, but only if such
- * resulting binaries are already to be incompatible with earlier ARM
- * processors due to the use of unsupported instructions other than what
- * is provided here.  In other words don't make binaries unable to run on
- * earlier processors just for the sake of not using these kernel helpers
- * if your compiled code is not going to use the new instructions for other
- * purpose.
+ * See Documentation/arm/kernel_user_helpers.txt for formal definitions.
  */
  */
  THUMB(	.arm	)
  THUMB(	.arm	)
 
 
@@ -799,96 +723,103 @@ ENDPROC(__switch_to)
 __kuser_helper_start:
 __kuser_helper_start:
 
 
 /*
 /*
- * Reference prototype:
- *
- *	void __kernel_memory_barrier(void)
- *
- * Input:
- *
- *	lr = return address
- *
- * Output:
- *
- *	none
- *
- * Clobbered:
- *
- *	none
- *
- * Definition and user space usage example:
- *
- *	typedef void (__kernel_dmb_t)(void);
- *	#define __kernel_dmb (*(__kernel_dmb_t *)0xffff0fa0)
- *
- * Apply any needed memory barrier to preserve consistency with data modified
- * manually and __kuser_cmpxchg usage.
- *
- * This could be used as follows:
- *
- * #define __kernel_dmb() \
- *         asm volatile ( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #95" \
- *	        : : : "r0", "lr","cc" )
+ * Due to the length of some sequences, __kuser_cmpxchg64 spans 2 regular
+ * kuser "slots", therefore 0xffff0f80 is not used as a valid entry point.
  */
  */
 
 
-__kuser_memory_barrier:				@ 0xffff0fa0
+__kuser_cmpxchg64:				@ 0xffff0f60
+
+#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
+
+	/*
+	 * Poor you.  No fast solution possible...
+	 * The kernel itself must perform the operation.
+	 * A special ghost syscall is used for that (see traps.c).
+	 */
+	stmfd	sp!, {r7, lr}
+	ldr	r7, 1f			@ it's 20 bits
+	swi	__ARM_NR_cmpxchg64
+	ldmfd	sp!, {r7, pc}
+1:	.word	__ARM_NR_cmpxchg64
+
+#elif defined(CONFIG_CPU_32v6K)
+
+	stmfd	sp!, {r4, r5, r6, r7}
+	ldrd	r4, r5, [r0]			@ load old val
+	ldrd	r6, r7, [r1]			@ load new val
+	smp_dmb	arm
+1:	ldrexd	r0, r1, [r2]			@ load current val
+	eors	r3, r0, r4			@ compare with oldval (1)
+	eoreqs	r3, r1, r5			@ compare with oldval (2)
+	strexdeq r3, r6, r7, [r2]		@ store newval if eq
+	teqeq	r3, #1				@ success?
+	beq	1b				@ if no then retry
 	smp_dmb	arm
 	smp_dmb	arm
+	rsbs	r0, r3, #0			@ set returned val and C flag
+	ldmfd	sp!, {r4, r5, r6, r7}
+	bx	lr
+
+#elif !defined(CONFIG_SMP)
+
+#ifdef CONFIG_MMU
+
+	/*
+	 * The only thing that can break atomicity in this cmpxchg64
+	 * implementation is either an IRQ or a data abort exception
+	 * causing another process/thread to be scheduled in the middle of
+	 * the critical sequence.  The same strategy as for cmpxchg is used.
+	 */
+	stmfd	sp!, {r4, r5, r6, lr}
+	ldmia	r0, {r4, r5}			@ load old val
+	ldmia	r1, {r6, lr}			@ load new val
+1:	ldmia	r2, {r0, r1}			@ load current val
+	eors	r3, r0, r4			@ compare with oldval (1)
+	eoreqs	r3, r1, r5			@ compare with oldval (2)
+2:	stmeqia	r2, {r6, lr}			@ store newval if eq
+	rsbs	r0, r3, #0			@ set return val and C flag
+	ldmfd	sp!, {r4, r5, r6, pc}
+
+	.text
+kuser_cmpxchg64_fixup:
+	@ Called from kuser_cmpxchg_fixup.
+	@ r4 = address of interrupted insn (must be preserved).
+	@ sp = saved regs. r7 and r8 are clobbered.
+	@ 1b = first critical insn, 2b = last critical insn.
+	@ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b.
+	mov	r7, #0xffff0fff
+	sub	r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64)))
+	subs	r8, r4, r7
+	rsbcss	r8, r8, #(2b - 1b)
+	strcs	r7, [sp, #S_PC]
+#if __LINUX_ARM_ARCH__ < 6
+	bcc	kuser_cmpxchg32_fixup
+#endif
+	mov	pc, lr
+	.previous
+
+#else
+#warning "NPTL on non MMU needs fixing"
+	mov	r0, #-1
+	adds	r0, r0, #0
 	usr_ret	lr
 	usr_ret	lr
+#endif
+
+#else
+#error "incoherent kernel configuration"
+#endif
+
+	/* pad to next slot */
+	.rept	(16 - (. - __kuser_cmpxchg64)/4)
+	.word	0
+	.endr
 
 
 	.align	5
 	.align	5
 
 
-/*
- * Reference prototype:
- *
- *	int __kernel_cmpxchg(int oldval, int newval, int *ptr)
- *
- * Input:
- *
- *	r0 = oldval
- *	r1 = newval
- *	r2 = ptr
- *	lr = return address
- *
- * Output:
- *
- *	r0 = returned value (zero or non-zero)
- *	C flag = set if r0 == 0, clear if r0 != 0
- *
- * Clobbered:
- *
- *	r3, ip, flags
- *
- * Definition and user space usage example:
- *
- *	typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
- *	#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
- *
- * Atomically store newval in *ptr if *ptr is equal to oldval for user space.
- * Return zero if *ptr was changed or non-zero if no exchange happened.
- * The C flag is also set if *ptr was changed to allow for assembly
- * optimization in the calling code.
- *
- * Notes:
- *
- *    - This routine already includes memory barriers as needed.
- *
- * For example, a user space atomic_add implementation could look like this:
- *
- * #define atomic_add(ptr, val) \
- *	({ register unsigned int *__ptr asm("r2") = (ptr); \
- *	   register unsigned int __result asm("r1"); \
- *	   asm volatile ( \
- *	       "1: @ atomic_add\n\t" \
- *	       "ldr	r0, [r2]\n\t" \
- *	       "mov	r3, #0xffff0fff\n\t" \
- *	       "add	lr, pc, #4\n\t" \
- *	       "add	r1, r0, %2\n\t" \
- *	       "add	pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \
- *	       "bcc	1b" \
- *	       : "=&r" (__result) \
- *	       : "r" (__ptr), "rIL" (val) \
- *	       : "r0","r3","ip","lr","cc","memory" ); \
- *	   __result; })
- */
+__kuser_memory_barrier:				@ 0xffff0fa0
+	smp_dmb	arm
+	usr_ret	lr
+
+	.align	5
 
 
 __kuser_cmpxchg:				@ 0xffff0fc0
 __kuser_cmpxchg:				@ 0xffff0fc0
 
 
@@ -925,15 +856,15 @@ __kuser_cmpxchg:				@ 0xffff0fc0
 	usr_ret	lr
 	usr_ret	lr
 
 
 	.text
 	.text
-kuser_cmpxchg_fixup:
+kuser_cmpxchg32_fixup:
 	@ Called from kuser_cmpxchg_check macro.
 	@ Called from kuser_cmpxchg_check macro.
-	@ r2 = address of interrupted insn (must be preserved).
+	@ r4 = address of interrupted insn (must be preserved).
 	@ sp = saved regs. r7 and r8 are clobbered.
 	@ sp = saved regs. r7 and r8 are clobbered.
 	@ 1b = first critical insn, 2b = last critical insn.
 	@ 1b = first critical insn, 2b = last critical insn.
-	@ If r2 >= 1b and r2 <= 2b then saved pc_usr is set to 1b.
+	@ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b.
 	mov	r7, #0xffff0fff
 	mov	r7, #0xffff0fff
 	sub	r7, r7, #(0xffff0fff - (0xffff0fc0 + (1b - __kuser_cmpxchg)))
 	sub	r7, r7, #(0xffff0fff - (0xffff0fc0 + (1b - __kuser_cmpxchg)))
-	subs	r8, r2, r7
+	subs	r8, r4, r7
 	rsbcss	r8, r8, #(2b - 1b)
 	rsbcss	r8, r8, #(2b - 1b)
 	strcs	r7, [sp, #S_PC]
 	strcs	r7, [sp, #S_PC]
 	mov	pc, lr
 	mov	pc, lr
@@ -963,39 +894,6 @@ kuser_cmpxchg_fixup:
 
 
 	.align	5
 	.align	5
 
 
-/*
- * Reference prototype:
- *
- *	int __kernel_get_tls(void)
- *
- * Input:
- *
- *	lr = return address
- *
- * Output:
- *
- *	r0 = TLS value
- *
- * Clobbered:
- *
- *	none
- *
- * Definition and user space usage example:
- *
- *	typedef int (__kernel_get_tls_t)(void);
- *	#define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0)
- *
- * Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
- *
- * This could be used as follows:
- *
- * #define __kernel_get_tls() \
- *	({ register unsigned int __val asm("r0"); \
- *         asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \
- *	        : "=r" (__val) : : "lr","cc" ); \
- *	   __val; })
- */
-
 __kuser_get_tls:				@ 0xffff0fe0
 __kuser_get_tls:				@ 0xffff0fe0
 	ldr	r0, [pc, #(16 - 8)]	@ read TLS, set in kuser_get_tls_init
 	ldr	r0, [pc, #(16 - 8)]	@ read TLS, set in kuser_get_tls_init
 	usr_ret	lr
 	usr_ret	lr
@@ -1004,19 +902,6 @@ __kuser_get_tls:				@ 0xffff0fe0
 	.word	0			@ 0xffff0ff0 software TLS value, then
 	.word	0			@ 0xffff0ff0 software TLS value, then
 	.endr				@ pad up to __kuser_helper_version
 	.endr				@ pad up to __kuser_helper_version
 
 
-/*
- * Reference declaration:
- *
- *	extern unsigned int __kernel_helper_version;
- *
- * Definition and user space usage example:
- *
- *	#define __kernel_helper_version (*(unsigned int *)0xffff0ffc)
- *
- * User space may read this to determine the curent number of helpers
- * available.
- */
-
 __kuser_helper_version:				@ 0xffff0ffc
 __kuser_helper_version:				@ 0xffff0ffc
 	.word	((__kuser_helper_end - __kuser_helper_start) >> 5)
 	.word	((__kuser_helper_end - __kuser_helper_start) >> 5)
 
 

+ 5 - 26
arch/arm/kernel/entry-header.S

@@ -121,15 +121,13 @@
 	.endm
 	.endm
 #else	/* CONFIG_THUMB2_KERNEL */
 #else	/* CONFIG_THUMB2_KERNEL */
 	.macro	svc_exit, rpsr
 	.macro	svc_exit, rpsr
+	ldr	lr, [sp, #S_SP]			@ top of the stack
+	ldrd	r0, r1, [sp, #S_LR]		@ calling lr and pc
 	clrex					@ clear the exclusive monitor
 	clrex					@ clear the exclusive monitor
-	ldr	r0, [sp, #S_SP]			@ top of the stack
-	ldr	r1, [sp, #S_PC]			@ return address
-	tst	r0, #4				@ orig stack 8-byte aligned?
-	stmdb	r0, {r1, \rpsr}			@ rfe context
+	stmdb	lr!, {r0, r1, \rpsr}		@ calling lr and rfe context
 	ldmia	sp, {r0 - r12}
 	ldmia	sp, {r0 - r12}
-	ldr	lr, [sp, #S_LR]
-	addeq	sp, sp, #S_FRAME_SIZE - 8	@ aligned
-	addne	sp, sp, #S_FRAME_SIZE - 4	@ not aligned
+	mov	sp, lr
+	ldr	lr, [sp], #4
 	rfeia	sp!
 	rfeia	sp!
 	.endm
 	.endm
 
 
@@ -165,25 +163,6 @@
 	.endm
 	.endm
 #endif	/* !CONFIG_THUMB2_KERNEL */
 #endif	/* !CONFIG_THUMB2_KERNEL */
 
 
-	@
-	@ Debug exceptions are taken as prefetch or data aborts.
-	@ We must disable preemption during the handler so that
-	@ we can access the debug registers safely.
-	@
-	.macro	debug_entry, fsr
-#if defined(CONFIG_HAVE_HW_BREAKPOINT) && defined(CONFIG_PREEMPT)
-	ldr	r4, =0x40f		@ mask out fsr.fs
-	and	r5, r4, \fsr
-	cmp	r5, #2			@ debug exception
-	bne	1f
-	get_thread_info r10
-	ldr	r6, [r10, #TI_PREEMPT]	@ get preempt count
-	add	r11, r6, #1		@ increment it
-	str	r11, [r10, #TI_PREEMPT]
-1:
-#endif
-	.endm
-
 /*
 /*
  * These are the registers used in the syscall handler, and allow us to
  * These are the registers used in the syscall handler, and allow us to
  * have in theory up to 7 arguments to a function - r0 to r6.
  * have in theory up to 7 arguments to a function - r0 to r6.

+ 8 - 0
arch/arm/kernel/head-nommu.S

@@ -32,8 +32,16 @@
  * numbers for r1.
  * numbers for r1.
  *
  *
  */
  */
+	.arm
+
 	__HEAD
 	__HEAD
 ENTRY(stext)
 ENTRY(stext)
+
+ THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
+ THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
+ THUMB(	.thumb			)	@ switch to Thumb now.
+ THUMB(1:			)
+
 	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
 	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
 						@ and irqs disabled
 						@ and irqs disabled
 #ifndef CONFIG_CPU_CP15
 #ifndef CONFIG_CPU_CP15

+ 8 - 0
arch/arm/kernel/head.S

@@ -71,8 +71,16 @@
  * crap here - that's what the boot loader (or in extreme, well justified
  * crap here - that's what the boot loader (or in extreme, well justified
  * circumstances, zImage) is for.
  * circumstances, zImage) is for.
  */
  */
+	.arm
+
 	__HEAD
 	__HEAD
 ENTRY(stext)
 ENTRY(stext)
+
+ THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
+ THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
+ THUMB(	.thumb			)	@ switch to Thumb now.
+ THUMB(1:			)
+
 	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
 	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
 						@ and irqs disabled
 						@ and irqs disabled
 	mrc	p15, 0, r9, c0, c0		@ get processor id
 	mrc	p15, 0, r9, c0, c0		@ get processor id

+ 5 - 7
arch/arm/kernel/hw_breakpoint.c

@@ -796,7 +796,7 @@ unlock:
 
 
 /*
 /*
  * Called from either the Data Abort Handler [watchpoint] or the
  * Called from either the Data Abort Handler [watchpoint] or the
- * Prefetch Abort Handler [breakpoint] with preemption disabled.
+ * Prefetch Abort Handler [breakpoint] with interrupts disabled.
  */
  */
 static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 				 struct pt_regs *regs)
 				 struct pt_regs *regs)
@@ -804,8 +804,10 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 	int ret = 0;
 	int ret = 0;
 	u32 dscr;
 	u32 dscr;
 
 
-	/* We must be called with preemption disabled. */
-	WARN_ON(preemptible());
+	preempt_disable();
+
+	if (interrupts_enabled(regs))
+		local_irq_enable();
 
 
 	/* We only handle watchpoints and hardware breakpoints. */
 	/* We only handle watchpoints and hardware breakpoints. */
 	ARM_DBG_READ(c1, 0, dscr);
 	ARM_DBG_READ(c1, 0, dscr);
@@ -824,10 +826,6 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 		ret = 1; /* Unhandled fault. */
 		ret = 1; /* Unhandled fault. */
 	}
 	}
 
 
-	/*
-	 * Re-enable preemption after it was disabled in the
-	 * low-level exception handling code.
-	 */
 	preempt_enable();
 	preempt_enable();
 
 
 	return ret;
 	return ret;

+ 30 - 21
arch/arm/kernel/irq.c

@@ -131,54 +131,63 @@ int __init arch_probe_nr_irqs(void)
 
 
 #ifdef CONFIG_HOTPLUG_CPU
 #ifdef CONFIG_HOTPLUG_CPU
 
 
-static bool migrate_one_irq(struct irq_data *d)
+static bool migrate_one_irq(struct irq_desc *desc)
 {
 {
-	unsigned int cpu = cpumask_any_and(d->affinity, cpu_online_mask);
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+	const struct cpumask *affinity = d->affinity;
+	struct irq_chip *c;
 	bool ret = false;
 	bool ret = false;
 
 
-	if (cpu >= nr_cpu_ids) {
-		cpu = cpumask_any(cpu_online_mask);
+	/*
+	 * If this is a per-CPU interrupt, or the affinity does not
+	 * include this CPU, then we have nothing to do.
+	 */
+	if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
+		return false;
+
+	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+		affinity = cpu_online_mask;
 		ret = true;
 		ret = true;
 	}
 	}
 
 
-	pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", d->irq, d->node, cpu);
-
-	d->chip->irq_set_affinity(d, cpumask_of(cpu), true);
+	c = irq_data_get_irq_chip(d);
+	if (c->irq_set_affinity)
+		c->irq_set_affinity(d, affinity, true);
+	else
+		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
 
 
 	return ret;
 	return ret;
 }
 }
 
 
 /*
 /*
- * The CPU has been marked offline.  Migrate IRQs off this CPU.  If
- * the affinity settings do not allow other CPUs, force them onto any
+ * The current CPU has been marked offline.  Migrate IRQs off this CPU.
+ * If the affinity settings do not allow other CPUs, force them onto any
  * available CPU.
  * available CPU.
+ *
+ * Note: we must iterate over all IRQs, whether they have an attached
+ * action structure or not, as we need to get chained interrupts too.
  */
  */
 void migrate_irqs(void)
 void migrate_irqs(void)
 {
 {
-	unsigned int i, cpu = smp_processor_id();
+	unsigned int i;
 	struct irq_desc *desc;
 	struct irq_desc *desc;
 	unsigned long flags;
 	unsigned long flags;
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
 
 
 	for_each_irq_desc(i, desc) {
 	for_each_irq_desc(i, desc) {
-		struct irq_data *d = &desc->irq_data;
 		bool affinity_broken = false;
 		bool affinity_broken = false;
 
 
-		raw_spin_lock(&desc->lock);
-		do {
-			if (desc->action == NULL)
-				break;
-
-			if (d->node != cpu)
-				break;
+		if (!desc)
+			continue;
 
 
-			affinity_broken = migrate_one_irq(d);
-		} while (0);
+		raw_spin_lock(&desc->lock);
+		affinity_broken = migrate_one_irq(desc);
 		raw_spin_unlock(&desc->lock);
 		raw_spin_unlock(&desc->lock);
 
 
 		if (affinity_broken && printk_ratelimit())
 		if (affinity_broken && printk_ratelimit())
-			pr_warning("IRQ%u no longer affine to CPU%u\n", i, cpu);
+			pr_warning("IRQ%u no longer affine to CPU%u\n", i,
+				smp_processor_id());
 	}
 	}
 
 
 	local_irq_restore(flags);
 	local_irq_restore(flags);

+ 999 - 0
arch/arm/kernel/kprobes-arm.c

@@ -0,0 +1,999 @@
+/*
+ * arch/arm/kernel/kprobes-decode.c
+ *
+ * Copyright (C) 2006, 2007 Motorola Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+/*
+ * We do not have hardware single-stepping on ARM, This
+ * effort is further complicated by the ARM not having a
+ * "next PC" register.  Instructions that change the PC
+ * can't be safely single-stepped in a MP environment, so
+ * we have a lot of work to do:
+ *
+ * In the prepare phase:
+ *   *) If it is an instruction that does anything
+ *      with the CPU mode, we reject it for a kprobe.
+ *      (This is out of laziness rather than need.  The
+ *      instructions could be simulated.)
+ *
+ *   *) Otherwise, decode the instruction rewriting its
+ *      registers to take fixed, ordered registers and
+ *      setting a handler for it to run the instruction.
+ *
+ * In the execution phase by an instruction's handler:
+ *
+ *   *) If the PC is written to by the instruction, the
+ *      instruction must be fully simulated in software.
+ *
+ *   *) Otherwise, a modified form of the instruction is
+ *      directly executed.  Its handler calls the
+ *      instruction in insn[0].  In insn[1] is a
+ *      "mov pc, lr" to return.
+ *
+ *      Before calling, load up the reordered registers
+ *      from the original instruction's registers.  If one
+ *      of the original input registers is the PC, compute
+ *      and adjust the appropriate input register.
+ *
+ *	After call completes, copy the output registers to
+ *      the original instruction's original registers.
+ *
+ * We don't use a real breakpoint instruction since that
+ * would have us in the kernel go from SVC mode to SVC
+ * mode losing the link register.  Instead we use an
+ * undefined instruction.  To simplify processing, the
+ * undefined instruction used for kprobes must be reserved
+ * exclusively for kprobes use.
+ *
+ * TODO: ifdef out some instruction decoding based on architecture.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+
+#include "kprobes.h"
+
+#define sign_extend(x, signbit) ((x) | (0 - ((x) & (1 << (signbit)))))
+
+#define branch_displacement(insn) sign_extend(((insn) & 0xffffff) << 2, 25)
+
+#if  __LINUX_ARM_ARCH__ >= 6
+#define BLX(reg)	"blx	"reg"		\n\t"
+#else
+#define BLX(reg)	"mov	lr, pc		\n\t"	\
+			"mov	pc, "reg"	\n\t"
+#endif
+
+/*
+ * To avoid the complications of mimicing single-stepping on a
+ * processor without a Next-PC or a single-step mode, and to
+ * avoid having to deal with the side-effects of boosting, we
+ * simulate or emulate (almost) all ARM instructions.
+ *
+ * "Simulation" is where the instruction's behavior is duplicated in
+ * C code.  "Emulation" is where the original instruction is rewritten
+ * and executed, often by altering its registers.
+ *
+ * By having all behavior of the kprobe'd instruction completed before
+ * returning from the kprobe_handler(), all locks (scheduler and
+ * interrupt) can safely be released.  There is no need for secondary
+ * breakpoints, no race with MP or preemptable kernels, nor having to
+ * clean up resources counts at a later time impacting overall system
+ * performance.  By rewriting the instruction, only the minimum registers
+ * need to be loaded and saved back optimizing performance.
+ *
+ * Calling the insnslot_*_rwflags version of a function doesn't hurt
+ * anything even when the CPSR flags aren't updated by the
+ * instruction.  It's just a little slower in return for saving
+ * a little space by not having a duplicate function that doesn't
+ * update the flags.  (The same optimization can be said for
+ * instructions that do or don't perform register writeback)
+ * Also, instructions can either read the flags, only write the
+ * flags, or read and write the flags.  To save combinations
+ * rather than for sheer performance, flag functions just assume
+ * read and write of flags.
+ */
+
+static void __kprobes simulate_bbl(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	long iaddr = (long)p->addr;
+	int disp  = branch_displacement(insn);
+
+	if (insn & (1 << 24))
+		regs->ARM_lr = iaddr + 4;
+
+	regs->ARM_pc = iaddr + 8 + disp;
+}
+
+static void __kprobes simulate_blx1(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	long iaddr = (long)p->addr;
+	int disp = branch_displacement(insn);
+
+	regs->ARM_lr = iaddr + 4;
+	regs->ARM_pc = iaddr + 8 + disp + ((insn >> 23) & 0x2);
+	regs->ARM_cpsr |= PSR_T_BIT;
+}
+
+static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rm = insn & 0xf;
+	long rmv = regs->uregs[rm];
+
+	if (insn & (1 << 5))
+		regs->ARM_lr = (long)p->addr + 4;
+
+	regs->ARM_pc = rmv & ~0x1;
+	regs->ARM_cpsr &= ~PSR_T_BIT;
+	if (rmv & 0x1)
+		regs->ARM_cpsr |= PSR_T_BIT;
+}
+
+static void __kprobes simulate_mrs(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rd = (insn >> 12) & 0xf;
+	unsigned long mask = 0xf8ff03df; /* Mask out execution state */
+	regs->uregs[rd] = regs->ARM_cpsr & mask;
+}
+
+static void __kprobes simulate_mov_ipsp(struct kprobe *p, struct pt_regs *regs)
+{
+	regs->uregs[12] = regs->uregs[13];
+}
+
+static void __kprobes
+emulate_ldrdstrd(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long pc = (unsigned long)p->addr + 8;
+	int rt = (insn >> 12) & 0xf;
+	int rn = (insn >> 16) & 0xf;
+	int rm = insn & 0xf;
+
+	register unsigned long rtv asm("r0") = regs->uregs[rt];
+	register unsigned long rt2v asm("r1") = regs->uregs[rt+1];
+	register unsigned long rnv asm("r2") = (rn == 15) ? pc
+							  : regs->uregs[rn];
+	register unsigned long rmv asm("r3") = regs->uregs[rm];
+
+	__asm__ __volatile__ (
+		BLX("%[fn]")
+		: "=r" (rtv), "=r" (rt2v), "=r" (rnv)
+		: "0" (rtv), "1" (rt2v), "2" (rnv), "r" (rmv),
+		  [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	regs->uregs[rt] = rtv;
+	regs->uregs[rt+1] = rt2v;
+	if (is_writeback(insn))
+		regs->uregs[rn] = rnv;
+}
+
+static void __kprobes
+emulate_ldr(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long pc = (unsigned long)p->addr + 8;
+	int rt = (insn >> 12) & 0xf;
+	int rn = (insn >> 16) & 0xf;
+	int rm = insn & 0xf;
+
+	register unsigned long rtv asm("r0");
+	register unsigned long rnv asm("r2") = (rn == 15) ? pc
+							  : regs->uregs[rn];
+	register unsigned long rmv asm("r3") = regs->uregs[rm];
+
+	__asm__ __volatile__ (
+		BLX("%[fn]")
+		: "=r" (rtv), "=r" (rnv)
+		: "1" (rnv), "r" (rmv), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	if (rt == 15)
+		load_write_pc(rtv, regs);
+	else
+		regs->uregs[rt] = rtv;
+
+	if (is_writeback(insn))
+		regs->uregs[rn] = rnv;
+}
+
+static void __kprobes
+emulate_str(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long rtpc = (unsigned long)p->addr + str_pc_offset;
+	unsigned long rnpc = (unsigned long)p->addr + 8;
+	int rt = (insn >> 12) & 0xf;
+	int rn = (insn >> 16) & 0xf;
+	int rm = insn & 0xf;
+
+	register unsigned long rtv asm("r0") = (rt == 15) ? rtpc
+							  : regs->uregs[rt];
+	register unsigned long rnv asm("r2") = (rn == 15) ? rnpc
+							  : regs->uregs[rn];
+	register unsigned long rmv asm("r3") = regs->uregs[rm];
+
+	__asm__ __volatile__ (
+		BLX("%[fn]")
+		: "=r" (rnv)
+		: "r" (rtv), "0" (rnv), "r" (rmv), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	if (is_writeback(insn))
+		regs->uregs[rn] = rnv;
+}
+
+static void __kprobes
+emulate_rd12rn16rm0rs8_rwflags(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long pc = (unsigned long)p->addr + 8;
+	int rd = (insn >> 12) & 0xf;
+	int rn = (insn >> 16) & 0xf;
+	int rm = insn & 0xf;
+	int rs = (insn >> 8) & 0xf;
+
+	register unsigned long rdv asm("r0") = regs->uregs[rd];
+	register unsigned long rnv asm("r2") = (rn == 15) ? pc
+							  : regs->uregs[rn];
+	register unsigned long rmv asm("r3") = (rm == 15) ? pc
+							  : regs->uregs[rm];
+	register unsigned long rsv asm("r1") = regs->uregs[rs];
+	unsigned long cpsr = regs->ARM_cpsr;
+
+	__asm__ __volatile__ (
+		"msr	cpsr_fs, %[cpsr]	\n\t"
+		BLX("%[fn]")
+		"mrs	%[cpsr], cpsr		\n\t"
+		: "=r" (rdv), [cpsr] "=r" (cpsr)
+		: "0" (rdv), "r" (rnv), "r" (rmv), "r" (rsv),
+		  "1" (cpsr), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	if (rd == 15)
+		alu_write_pc(rdv, regs);
+	else
+		regs->uregs[rd] = rdv;
+	regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK);
+}
+
+static void __kprobes
+emulate_rd12rn16rm0_rwflags_nopc(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rd = (insn >> 12) & 0xf;
+	int rn = (insn >> 16) & 0xf;
+	int rm = insn & 0xf;
+
+	register unsigned long rdv asm("r0") = regs->uregs[rd];
+	register unsigned long rnv asm("r2") = regs->uregs[rn];
+	register unsigned long rmv asm("r3") = regs->uregs[rm];
+	unsigned long cpsr = regs->ARM_cpsr;
+
+	__asm__ __volatile__ (
+		"msr	cpsr_fs, %[cpsr]	\n\t"
+		BLX("%[fn]")
+		"mrs	%[cpsr], cpsr		\n\t"
+		: "=r" (rdv), [cpsr] "=r" (cpsr)
+		: "0" (rdv), "r" (rnv), "r" (rmv),
+		  "1" (cpsr), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	regs->uregs[rd] = rdv;
+	regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK);
+}
+
+static void __kprobes
+emulate_rd16rn12rm0rs8_rwflags_nopc(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rd = (insn >> 16) & 0xf;
+	int rn = (insn >> 12) & 0xf;
+	int rm = insn & 0xf;
+	int rs = (insn >> 8) & 0xf;
+
+	register unsigned long rdv asm("r2") = regs->uregs[rd];
+	register unsigned long rnv asm("r0") = regs->uregs[rn];
+	register unsigned long rmv asm("r3") = regs->uregs[rm];
+	register unsigned long rsv asm("r1") = regs->uregs[rs];
+	unsigned long cpsr = regs->ARM_cpsr;
+
+	__asm__ __volatile__ (
+		"msr	cpsr_fs, %[cpsr]	\n\t"
+		BLX("%[fn]")
+		"mrs	%[cpsr], cpsr		\n\t"
+		: "=r" (rdv), [cpsr] "=r" (cpsr)
+		: "0" (rdv), "r" (rnv), "r" (rmv), "r" (rsv),
+		  "1" (cpsr), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	regs->uregs[rd] = rdv;
+	regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK);
+}
+
+static void __kprobes
+emulate_rd12rm0_noflags_nopc(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rd = (insn >> 12) & 0xf;
+	int rm = insn & 0xf;
+
+	register unsigned long rdv asm("r0") = regs->uregs[rd];
+	register unsigned long rmv asm("r3") = regs->uregs[rm];
+
+	__asm__ __volatile__ (
+		BLX("%[fn]")
+		: "=r" (rdv)
+		: "0" (rdv), "r" (rmv), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	regs->uregs[rd] = rdv;
+}
+
+static void __kprobes
+emulate_rdlo12rdhi16rn0rm8_rwflags_nopc(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rdlo = (insn >> 12) & 0xf;
+	int rdhi = (insn >> 16) & 0xf;
+	int rn = insn & 0xf;
+	int rm = (insn >> 8) & 0xf;
+
+	register unsigned long rdlov asm("r0") = regs->uregs[rdlo];
+	register unsigned long rdhiv asm("r2") = regs->uregs[rdhi];
+	register unsigned long rnv asm("r3") = regs->uregs[rn];
+	register unsigned long rmv asm("r1") = regs->uregs[rm];
+	unsigned long cpsr = regs->ARM_cpsr;
+
+	__asm__ __volatile__ (
+		"msr	cpsr_fs, %[cpsr]	\n\t"
+		BLX("%[fn]")
+		"mrs	%[cpsr], cpsr		\n\t"
+		: "=r" (rdlov), "=r" (rdhiv), [cpsr] "=r" (cpsr)
+		: "0" (rdlov), "1" (rdhiv), "r" (rnv), "r" (rmv),
+		  "2" (cpsr), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	regs->uregs[rdlo] = rdlov;
+	regs->uregs[rdhi] = rdhiv;
+	regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK);
+}
+
+/*
+ * For the instruction masking and comparisons in all the "space_*"
+ * functions below, Do _not_ rearrange the order of tests unless
+ * you're very, very sure of what you are doing.  For the sake of
+ * efficiency, the masks for some tests sometimes assume other test
+ * have been done prior to them so the number of patterns to test
+ * for an instruction set can be as broad as possible to reduce the
+ * number of tests needed.
+ */
+
+static const union decode_item arm_1111_table[] = {
+	/* Unconditional instructions					*/
+
+	/* memory hint		1111 0100 x001 xxxx xxxx xxxx xxxx xxxx */
+	/* PLDI (immediate)	1111 0100 x101 xxxx xxxx xxxx xxxx xxxx */
+	/* PLDW (immediate)	1111 0101 x001 xxxx xxxx xxxx xxxx xxxx */
+	/* PLD (immediate)	1111 0101 x101 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_SIMULATE	(0xfe300000, 0xf4100000, kprobe_simulate_nop),
+
+	/* memory hint		1111 0110 x001 xxxx xxxx xxxx xxx0 xxxx */
+	/* PLDI (register)	1111 0110 x101 xxxx xxxx xxxx xxx0 xxxx */
+	/* PLDW (register)	1111 0111 x001 xxxx xxxx xxxx xxx0 xxxx */
+	/* PLD (register)	1111 0111 x101 xxxx xxxx xxxx xxx0 xxxx */
+	DECODE_SIMULATE	(0xfe300010, 0xf6100000, kprobe_simulate_nop),
+
+	/* BLX (immediate)	1111 101x xxxx xxxx xxxx xxxx xxxx xxxx */
+	DECODE_SIMULATE	(0xfe000000, 0xfa000000, simulate_blx1),
+
+	/* CPS			1111 0001 0000 xxx0 xxxx xxxx xx0x xxxx */
+	/* SETEND		1111 0001 0000 0001 xxxx xxxx 0000 xxxx */
+	/* SRS			1111 100x x1x0 xxxx xxxx xxxx xxxx xxxx */
+	/* RFE			1111 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
+
+	/* Coprocessor instructions... */
+	/* MCRR2		1111 1100 0100 xxxx xxxx xxxx xxxx xxxx */
+	/* MRRC2		1111 1100 0101 xxxx xxxx xxxx xxxx xxxx */
+	/* LDC2			1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
+	/* STC2			1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
+	/* CDP2			1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
+	/* MCR2			1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
+	/* MRC2			1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
+
+	/* Other unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_0001_0xx0____0xxx_table[] = {
+	/* Miscellaneous instructions					*/
+
+	/* MRS cpsr		cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */
+	DECODE_SIMULATEX(0x0ff000f0, 0x01000000, simulate_mrs,
+						 REGS(0, NOPC, 0, 0, 0)),
+
+	/* BX			cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */
+	DECODE_SIMULATE	(0x0ff000f0, 0x01200010, simulate_blx2bx),
+
+	/* BLX (register)	cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */
+	DECODE_SIMULATEX(0x0ff000f0, 0x01200030, simulate_blx2bx,
+						 REGS(0, 0, 0, 0, NOPC)),
+
+	/* CLZ			cccc 0001 0110 xxxx xxxx xxxx 0001 xxxx */
+	DECODE_EMULATEX	(0x0ff000f0, 0x01600010, emulate_rd12rm0_noflags_nopc,
+						 REGS(0, NOPC, 0, 0, NOPC)),
+
+	/* QADD			cccc 0001 0000 xxxx xxxx xxxx 0101 xxxx */
+	/* QSUB			cccc 0001 0010 xxxx xxxx xxxx 0101 xxxx */
+	/* QDADD		cccc 0001 0100 xxxx xxxx xxxx 0101 xxxx */
+	/* QDSUB		cccc 0001 0110 xxxx xxxx xxxx 0101 xxxx */
+	DECODE_EMULATEX	(0x0f9000f0, 0x01000050, emulate_rd12rn16rm0_rwflags_nopc,
+						 REGS(NOPC, NOPC, 0, 0, NOPC)),
+
+	/* BXJ			cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */
+	/* MSR			cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */
+	/* MRS spsr		cccc 0001 0100 xxxx xxxx xxxx 0000 xxxx */
+	/* BKPT			1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
+	/* SMC			cccc 0001 0110 xxxx xxxx xxxx 0111 xxxx */
+	/* And unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_0001_0xx0____1xx0_table[] = {
+	/* Halfword multiply and multiply-accumulate			*/
+
+	/* SMLALxy		cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */
+	DECODE_EMULATEX	(0x0ff00090, 0x01400080, emulate_rdlo12rdhi16rn0rm8_rwflags_nopc,
+						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
+
+	/* SMULWy		cccc 0001 0010 xxxx xxxx xxxx 1x10 xxxx */
+	DECODE_OR	(0x0ff000b0, 0x012000a0),
+	/* SMULxy		cccc 0001 0110 xxxx xxxx xxxx 1xx0 xxxx */
+	DECODE_EMULATEX	(0x0ff00090, 0x01600080, emulate_rd16rn12rm0rs8_rwflags_nopc,
+						 REGS(NOPC, 0, NOPC, 0, NOPC)),
+
+	/* SMLAxy		cccc 0001 0000 xxxx xxxx xxxx 1xx0 xxxx */
+	DECODE_OR	(0x0ff00090, 0x01000080),
+	/* SMLAWy		cccc 0001 0010 xxxx xxxx xxxx 1x00 xxxx */
+	DECODE_EMULATEX	(0x0ff000b0, 0x01200080, emulate_rd16rn12rm0rs8_rwflags_nopc,
+						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_0000_____1001_table[] = {
+	/* Multiply and multiply-accumulate				*/
+
+	/* MUL			cccc 0000 0000 xxxx xxxx xxxx 1001 xxxx */
+	/* MULS			cccc 0000 0001 xxxx xxxx xxxx 1001 xxxx */
+	DECODE_EMULATEX	(0x0fe000f0, 0x00000090, emulate_rd16rn12rm0rs8_rwflags_nopc,
+						 REGS(NOPC, 0, NOPC, 0, NOPC)),
+
+	/* MLA			cccc 0000 0010 xxxx xxxx xxxx 1001 xxxx */
+	/* MLAS			cccc 0000 0011 xxxx xxxx xxxx 1001 xxxx */
+	DECODE_OR	(0x0fe000f0, 0x00200090),
+	/* MLS			cccc 0000 0110 xxxx xxxx xxxx 1001 xxxx */
+	DECODE_EMULATEX	(0x0ff000f0, 0x00600090, emulate_rd16rn12rm0rs8_rwflags_nopc,
+						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
+
+	/* UMAAL		cccc 0000 0100 xxxx xxxx xxxx 1001 xxxx */
+	DECODE_OR	(0x0ff000f0, 0x00400090),
+	/* UMULL		cccc 0000 1000 xxxx xxxx xxxx 1001 xxxx */
+	/* UMULLS		cccc 0000 1001 xxxx xxxx xxxx 1001 xxxx */
+	/* UMLAL		cccc 0000 1010 xxxx xxxx xxxx 1001 xxxx */
+	/* UMLALS		cccc 0000 1011 xxxx xxxx xxxx 1001 xxxx */
+	/* SMULL		cccc 0000 1100 xxxx xxxx xxxx 1001 xxxx */
+	/* SMULLS		cccc 0000 1101 xxxx xxxx xxxx 1001 xxxx */
+	/* SMLAL		cccc 0000 1110 xxxx xxxx xxxx 1001 xxxx */
+	/* SMLALS		cccc 0000 1111 xxxx xxxx xxxx 1001 xxxx */
+	DECODE_EMULATEX	(0x0f8000f0, 0x00800090, emulate_rdlo12rdhi16rn0rm8_rwflags_nopc,
+						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_0001_____1001_table[] = {
+	/* Synchronization primitives					*/
+
+	/* SMP/SWPB		cccc 0001 0x00 xxxx xxxx xxxx 1001 xxxx */
+	DECODE_EMULATEX	(0x0fb000f0, 0x01000090, emulate_rd12rn16rm0_rwflags_nopc,
+						 REGS(NOPC, NOPC, 0, 0, NOPC)),
+
+	/* LDREX/STREX{,D,B,H}	cccc 0001 1xxx xxxx xxxx xxxx 1001 xxxx */
+	/* And unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_000x_____1xx1_table[] = {
+	/* Extra load/store instructions				*/
+
+	/* STRHT		cccc 0000 xx10 xxxx xxxx xxxx 1011 xxxx */
+	/* ???			cccc 0000 xx10 xxxx xxxx xxxx 11x1 xxxx */
+	/* LDRHT		cccc 0000 xx11 xxxx xxxx xxxx 1011 xxxx */
+	/* LDRSBT		cccc 0000 xx11 xxxx xxxx xxxx 1101 xxxx */
+	/* LDRSHT		cccc 0000 xx11 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_REJECT	(0x0f200090, 0x00200090),
+
+	/* LDRD/STRD lr,pc,{...	cccc 000x x0x0 xxxx 111x xxxx 1101 xxxx */
+	DECODE_REJECT	(0x0e10e0d0, 0x0000e0d0),
+
+	/* LDRD (register)	cccc 000x x0x0 xxxx xxxx xxxx 1101 xxxx */
+	/* STRD (register)	cccc 000x x0x0 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_EMULATEX	(0x0e5000d0, 0x000000d0, emulate_ldrdstrd,
+						 REGS(NOPCWB, NOPCX, 0, 0, NOPC)),
+
+	/* LDRD (immediate)	cccc 000x x1x0 xxxx xxxx xxxx 1101 xxxx */
+	/* STRD (immediate)	cccc 000x x1x0 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_EMULATEX	(0x0e5000d0, 0x004000d0, emulate_ldrdstrd,
+						 REGS(NOPCWB, NOPCX, 0, 0, 0)),
+
+	/* STRH (register)	cccc 000x x0x0 xxxx xxxx xxxx 1011 xxxx */
+	DECODE_EMULATEX	(0x0e5000f0, 0x000000b0, emulate_str,
+						 REGS(NOPCWB, NOPC, 0, 0, NOPC)),
+
+	/* LDRH (register)	cccc 000x x0x1 xxxx xxxx xxxx 1011 xxxx */
+	/* LDRSB (register)	cccc 000x x0x1 xxxx xxxx xxxx 1101 xxxx */
+	/* LDRSH (register)	cccc 000x x0x1 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_EMULATEX	(0x0e500090, 0x00100090, emulate_ldr,
+						 REGS(NOPCWB, NOPC, 0, 0, NOPC)),
+
+	/* STRH (immediate)	cccc 000x x1x0 xxxx xxxx xxxx 1011 xxxx */
+	DECODE_EMULATEX	(0x0e5000f0, 0x004000b0, emulate_str,
+						 REGS(NOPCWB, NOPC, 0, 0, 0)),
+
+	/* LDRH (immediate)	cccc 000x x1x1 xxxx xxxx xxxx 1011 xxxx */
+	/* LDRSB (immediate)	cccc 000x x1x1 xxxx xxxx xxxx 1101 xxxx */
+	/* LDRSH (immediate)	cccc 000x x1x1 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_EMULATEX	(0x0e500090, 0x00500090, emulate_ldr,
+						 REGS(NOPCWB, NOPC, 0, 0, 0)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_000x_table[] = {
+	/* Data-processing (register)					*/
+
+	/* <op>S PC, ...	cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx */
+	DECODE_REJECT	(0x0e10f000, 0x0010f000),
+
+	/* MOV IP, SP		1110 0001 1010 0000 1100 0000 0000 1101 */
+	DECODE_SIMULATE	(0xffffffff, 0xe1a0c00d, simulate_mov_ipsp),
+
+	/* TST (register)	cccc 0001 0001 xxxx xxxx xxxx xxx0 xxxx */
+	/* TEQ (register)	cccc 0001 0011 xxxx xxxx xxxx xxx0 xxxx */
+	/* CMP (register)	cccc 0001 0101 xxxx xxxx xxxx xxx0 xxxx */
+	/* CMN (register)	cccc 0001 0111 xxxx xxxx xxxx xxx0 xxxx */
+	DECODE_EMULATEX	(0x0f900010, 0x01100000, emulate_rd12rn16rm0rs8_rwflags,
+						 REGS(ANY, 0, 0, 0, ANY)),
+
+	/* MOV (register)	cccc 0001 101x xxxx xxxx xxxx xxx0 xxxx */
+	/* MVN (register)	cccc 0001 111x xxxx xxxx xxxx xxx0 xxxx */
+	DECODE_EMULATEX	(0x0fa00010, 0x01a00000, emulate_rd12rn16rm0rs8_rwflags,
+						 REGS(0, ANY, 0, 0, ANY)),
+
+	/* AND (register)	cccc 0000 000x xxxx xxxx xxxx xxx0 xxxx */
+	/* EOR (register)	cccc 0000 001x xxxx xxxx xxxx xxx0 xxxx */
+	/* SUB (register)	cccc 0000 010x xxxx xxxx xxxx xxx0 xxxx */
+	/* RSB (register)	cccc 0000 011x xxxx xxxx xxxx xxx0 xxxx */
+	/* ADD (register)	cccc 0000 100x xxxx xxxx xxxx xxx0 xxxx */
+	/* ADC (register)	cccc 0000 101x xxxx xxxx xxxx xxx0 xxxx */
+	/* SBC (register)	cccc 0000 110x xxxx xxxx xxxx xxx0 xxxx */
+	/* RSC (register)	cccc 0000 111x xxxx xxxx xxxx xxx0 xxxx */
+	/* ORR (register)	cccc 0001 100x xxxx xxxx xxxx xxx0 xxxx */
+	/* BIC (register)	cccc 0001 110x xxxx xxxx xxxx xxx0 xxxx */
+	DECODE_EMULATEX	(0x0e000010, 0x00000000, emulate_rd12rn16rm0rs8_rwflags,
+						 REGS(ANY, ANY, 0, 0, ANY)),
+
+	/* TST (reg-shift reg)	cccc 0001 0001 xxxx xxxx xxxx 0xx1 xxxx */
+	/* TEQ (reg-shift reg)	cccc 0001 0011 xxxx xxxx xxxx 0xx1 xxxx */
+	/* CMP (reg-shift reg)	cccc 0001 0101 xxxx xxxx xxxx 0xx1 xxxx */
+	/* CMN (reg-shift reg)	cccc 0001 0111 xxxx xxxx xxxx 0xx1 xxxx */
+	DECODE_EMULATEX	(0x0f900090, 0x01100010, emulate_rd12rn16rm0rs8_rwflags,
+						 REGS(ANY, 0, NOPC, 0, ANY)),
+
+	/* MOV (reg-shift reg)	cccc 0001 101x xxxx xxxx xxxx 0xx1 xxxx */
+	/* MVN (reg-shift reg)	cccc 0001 111x xxxx xxxx xxxx 0xx1 xxxx */
+	DECODE_EMULATEX	(0x0fa00090, 0x01a00010, emulate_rd12rn16rm0rs8_rwflags,
+						 REGS(0, ANY, NOPC, 0, ANY)),
+
+	/* AND (reg-shift reg)	cccc 0000 000x xxxx xxxx xxxx 0xx1 xxxx */
+	/* EOR (reg-shift reg)	cccc 0000 001x xxxx xxxx xxxx 0xx1 xxxx */
+	/* SUB (reg-shift reg)	cccc 0000 010x xxxx xxxx xxxx 0xx1 xxxx */
+	/* RSB (reg-shift reg)	cccc 0000 011x xxxx xxxx xxxx 0xx1 xxxx */
+	/* ADD (reg-shift reg)	cccc 0000 100x xxxx xxxx xxxx 0xx1 xxxx */
+	/* ADC (reg-shift reg)	cccc 0000 101x xxxx xxxx xxxx 0xx1 xxxx */
+	/* SBC (reg-shift reg)	cccc 0000 110x xxxx xxxx xxxx 0xx1 xxxx */
+	/* RSC (reg-shift reg)	cccc 0000 111x xxxx xxxx xxxx 0xx1 xxxx */
+	/* ORR (reg-shift reg)	cccc 0001 100x xxxx xxxx xxxx 0xx1 xxxx */
+	/* BIC (reg-shift reg)	cccc 0001 110x xxxx xxxx xxxx 0xx1 xxxx */
+	DECODE_EMULATEX	(0x0e000090, 0x00000010, emulate_rd12rn16rm0rs8_rwflags,
+						 REGS(ANY, ANY, NOPC, 0, ANY)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_001x_table[] = {
+	/* Data-processing (immediate)					*/
+
+	/* MOVW			cccc 0011 0000 xxxx xxxx xxxx xxxx xxxx */
+	/* MOVT			cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0fb00000, 0x03000000, emulate_rd12rm0_noflags_nopc,
+						 REGS(0, NOPC, 0, 0, 0)),
+
+	/* YIELD		cccc 0011 0010 0000 xxxx xxxx 0000 0001 */
+	DECODE_OR	(0x0fff00ff, 0x03200001),
+	/* SEV			cccc 0011 0010 0000 xxxx xxxx 0000 0100 */
+	DECODE_EMULATE	(0x0fff00ff, 0x03200004, kprobe_emulate_none),
+	/* NOP			cccc 0011 0010 0000 xxxx xxxx 0000 0000 */
+	/* WFE			cccc 0011 0010 0000 xxxx xxxx 0000 0010 */
+	/* WFI			cccc 0011 0010 0000 xxxx xxxx 0000 0011 */
+	DECODE_SIMULATE	(0x0fff00fc, 0x03200000, kprobe_simulate_nop),
+	/* DBG			cccc 0011 0010 0000 xxxx xxxx ffff xxxx */
+	/* unallocated hints	cccc 0011 0010 0000 xxxx xxxx xxxx xxxx */
+	/* MSR (immediate)	cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0x0fb00000, 0x03200000),
+
+	/* <op>S PC, ...	cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx */
+	DECODE_REJECT	(0x0e10f000, 0x0210f000),
+
+	/* TST (immediate)	cccc 0011 0001 xxxx xxxx xxxx xxxx xxxx */
+	/* TEQ (immediate)	cccc 0011 0011 xxxx xxxx xxxx xxxx xxxx */
+	/* CMP (immediate)	cccc 0011 0101 xxxx xxxx xxxx xxxx xxxx */
+	/* CMN (immediate)	cccc 0011 0111 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0f900000, 0x03100000, emulate_rd12rn16rm0rs8_rwflags,
+						 REGS(ANY, 0, 0, 0, 0)),
+
+	/* MOV (immediate)	cccc 0011 101x xxxx xxxx xxxx xxxx xxxx */
+	/* MVN (immediate)	cccc 0011 111x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0fa00000, 0x03a00000, emulate_rd12rn16rm0rs8_rwflags,
+						 REGS(0, ANY, 0, 0, 0)),
+
+	/* AND (immediate)	cccc 0010 000x xxxx xxxx xxxx xxxx xxxx */
+	/* EOR (immediate)	cccc 0010 001x xxxx xxxx xxxx xxxx xxxx */
+	/* SUB (immediate)	cccc 0010 010x xxxx xxxx xxxx xxxx xxxx */
+	/* RSB (immediate)	cccc 0010 011x xxxx xxxx xxxx xxxx xxxx */
+	/* ADD (immediate)	cccc 0010 100x xxxx xxxx xxxx xxxx xxxx */
+	/* ADC (immediate)	cccc 0010 101x xxxx xxxx xxxx xxxx xxxx */
+	/* SBC (immediate)	cccc 0010 110x xxxx xxxx xxxx xxxx xxxx */
+	/* RSC (immediate)	cccc 0010 111x xxxx xxxx xxxx xxxx xxxx */
+	/* ORR (immediate)	cccc 0011 100x xxxx xxxx xxxx xxxx xxxx */
+	/* BIC (immediate)	cccc 0011 110x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0e000000, 0x02000000, emulate_rd12rn16rm0rs8_rwflags,
+						 REGS(ANY, ANY, 0, 0, 0)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_0110_____xxx1_table[] = {
+	/* Media instructions						*/
+
+	/* SEL			cccc 0110 1000 xxxx xxxx xxxx 1011 xxxx */
+	DECODE_EMULATEX	(0x0ff000f0, 0x068000b0, emulate_rd12rn16rm0_rwflags_nopc,
+						 REGS(NOPC, NOPC, 0, 0, NOPC)),
+
+	/* SSAT			cccc 0110 101x xxxx xxxx xxxx xx01 xxxx */
+	/* USAT			cccc 0110 111x xxxx xxxx xxxx xx01 xxxx */
+	DECODE_OR(0x0fa00030, 0x06a00010),
+	/* SSAT16		cccc 0110 1010 xxxx xxxx xxxx 0011 xxxx */
+	/* USAT16		cccc 0110 1110 xxxx xxxx xxxx 0011 xxxx */
+	DECODE_EMULATEX	(0x0fb000f0, 0x06a00030, emulate_rd12rn16rm0_rwflags_nopc,
+						 REGS(0, NOPC, 0, 0, NOPC)),
+
+	/* REV			cccc 0110 1011 xxxx xxxx xxxx 0011 xxxx */
+	/* REV16		cccc 0110 1011 xxxx xxxx xxxx 1011 xxxx */
+	/* RBIT			cccc 0110 1111 xxxx xxxx xxxx 0011 xxxx */
+	/* REVSH		cccc 0110 1111 xxxx xxxx xxxx 1011 xxxx */
+	DECODE_EMULATEX	(0x0fb00070, 0x06b00030, emulate_rd12rm0_noflags_nopc,
+						 REGS(0, NOPC, 0, 0, NOPC)),
+
+	/* ???			cccc 0110 0x00 xxxx xxxx xxxx xxx1 xxxx */
+	DECODE_REJECT	(0x0fb00010, 0x06000010),
+	/* ???			cccc 0110 0xxx xxxx xxxx xxxx 1011 xxxx */
+	DECODE_REJECT	(0x0f8000f0, 0x060000b0),
+	/* ???			cccc 0110 0xxx xxxx xxxx xxxx 1101 xxxx */
+	DECODE_REJECT	(0x0f8000f0, 0x060000d0),
+	/* SADD16		cccc 0110 0001 xxxx xxxx xxxx 0001 xxxx */
+	/* SADDSUBX		cccc 0110 0001 xxxx xxxx xxxx 0011 xxxx */
+	/* SSUBADDX		cccc 0110 0001 xxxx xxxx xxxx 0101 xxxx */
+	/* SSUB16		cccc 0110 0001 xxxx xxxx xxxx 0111 xxxx */
+	/* SADD8		cccc 0110 0001 xxxx xxxx xxxx 1001 xxxx */
+	/* SSUB8		cccc 0110 0001 xxxx xxxx xxxx 1111 xxxx */
+	/* QADD16		cccc 0110 0010 xxxx xxxx xxxx 0001 xxxx */
+	/* QADDSUBX		cccc 0110 0010 xxxx xxxx xxxx 0011 xxxx */
+	/* QSUBADDX		cccc 0110 0010 xxxx xxxx xxxx 0101 xxxx */
+	/* QSUB16		cccc 0110 0010 xxxx xxxx xxxx 0111 xxxx */
+	/* QADD8		cccc 0110 0010 xxxx xxxx xxxx 1001 xxxx */
+	/* QSUB8		cccc 0110 0010 xxxx xxxx xxxx 1111 xxxx */
+	/* SHADD16		cccc 0110 0011 xxxx xxxx xxxx 0001 xxxx */
+	/* SHADDSUBX		cccc 0110 0011 xxxx xxxx xxxx 0011 xxxx */
+	/* SHSUBADDX		cccc 0110 0011 xxxx xxxx xxxx 0101 xxxx */
+	/* SHSUB16		cccc 0110 0011 xxxx xxxx xxxx 0111 xxxx */
+	/* SHADD8		cccc 0110 0011 xxxx xxxx xxxx 1001 xxxx */
+	/* SHSUB8		cccc 0110 0011 xxxx xxxx xxxx 1111 xxxx */
+	/* UADD16		cccc 0110 0101 xxxx xxxx xxxx 0001 xxxx */
+	/* UADDSUBX		cccc 0110 0101 xxxx xxxx xxxx 0011 xxxx */
+	/* USUBADDX		cccc 0110 0101 xxxx xxxx xxxx 0101 xxxx */
+	/* USUB16		cccc 0110 0101 xxxx xxxx xxxx 0111 xxxx */
+	/* UADD8		cccc 0110 0101 xxxx xxxx xxxx 1001 xxxx */
+	/* USUB8		cccc 0110 0101 xxxx xxxx xxxx 1111 xxxx */
+	/* UQADD16		cccc 0110 0110 xxxx xxxx xxxx 0001 xxxx */
+	/* UQADDSUBX		cccc 0110 0110 xxxx xxxx xxxx 0011 xxxx */
+	/* UQSUBADDX		cccc 0110 0110 xxxx xxxx xxxx 0101 xxxx */
+	/* UQSUB16		cccc 0110 0110 xxxx xxxx xxxx 0111 xxxx */
+	/* UQADD8		cccc 0110 0110 xxxx xxxx xxxx 1001 xxxx */
+	/* UQSUB8		cccc 0110 0110 xxxx xxxx xxxx 1111 xxxx */
+	/* UHADD16		cccc 0110 0111 xxxx xxxx xxxx 0001 xxxx */
+	/* UHADDSUBX		cccc 0110 0111 xxxx xxxx xxxx 0011 xxxx */
+	/* UHSUBADDX		cccc 0110 0111 xxxx xxxx xxxx 0101 xxxx */
+	/* UHSUB16		cccc 0110 0111 xxxx xxxx xxxx 0111 xxxx */
+	/* UHADD8		cccc 0110 0111 xxxx xxxx xxxx 1001 xxxx */
+	/* UHSUB8		cccc 0110 0111 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_EMULATEX	(0x0f800010, 0x06000010, emulate_rd12rn16rm0_rwflags_nopc,
+						 REGS(NOPC, NOPC, 0, 0, NOPC)),
+
+	/* PKHBT		cccc 0110 1000 xxxx xxxx xxxx x001 xxxx */
+	/* PKHTB		cccc 0110 1000 xxxx xxxx xxxx x101 xxxx */
+	DECODE_EMULATEX	(0x0ff00030, 0x06800010, emulate_rd12rn16rm0_rwflags_nopc,
+						 REGS(NOPC, NOPC, 0, 0, NOPC)),
+
+	/* ???			cccc 0110 1001 xxxx xxxx xxxx 0111 xxxx */
+	/* ???			cccc 0110 1101 xxxx xxxx xxxx 0111 xxxx */
+	DECODE_REJECT	(0x0fb000f0, 0x06900070),
+
+	/* SXTB16		cccc 0110 1000 1111 xxxx xxxx 0111 xxxx */
+	/* SXTB			cccc 0110 1010 1111 xxxx xxxx 0111 xxxx */
+	/* SXTH			cccc 0110 1011 1111 xxxx xxxx 0111 xxxx */
+	/* UXTB16		cccc 0110 1100 1111 xxxx xxxx 0111 xxxx */
+	/* UXTB			cccc 0110 1110 1111 xxxx xxxx 0111 xxxx */
+	/* UXTH			cccc 0110 1111 1111 xxxx xxxx 0111 xxxx */
+	DECODE_EMULATEX	(0x0f8f00f0, 0x068f0070, emulate_rd12rm0_noflags_nopc,
+						 REGS(0, NOPC, 0, 0, NOPC)),
+
+	/* SXTAB16		cccc 0110 1000 xxxx xxxx xxxx 0111 xxxx */
+	/* SXTAB		cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx */
+	/* SXTAH		cccc 0110 1011 xxxx xxxx xxxx 0111 xxxx */
+	/* UXTAB16		cccc 0110 1100 xxxx xxxx xxxx 0111 xxxx */
+	/* UXTAB		cccc 0110 1110 xxxx xxxx xxxx 0111 xxxx */
+	/* UXTAH		cccc 0110 1111 xxxx xxxx xxxx 0111 xxxx */
+	DECODE_EMULATEX	(0x0f8000f0, 0x06800070, emulate_rd12rn16rm0_rwflags_nopc,
+						 REGS(NOPCX, NOPC, 0, 0, NOPC)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_0111_____xxx1_table[] = {
+	/* Media instructions						*/
+
+	/* UNDEFINED		cccc 0111 1111 xxxx xxxx xxxx 1111 xxxx */
+	DECODE_REJECT	(0x0ff000f0, 0x07f000f0),
+
+	/* SMLALD		cccc 0111 0100 xxxx xxxx xxxx 00x1 xxxx */
+	/* SMLSLD		cccc 0111 0100 xxxx xxxx xxxx 01x1 xxxx */
+	DECODE_EMULATEX	(0x0ff00090, 0x07400010, emulate_rdlo12rdhi16rn0rm8_rwflags_nopc,
+						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
+
+	/* SMUAD		cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx */
+	/* SMUSD		cccc 0111 0000 xxxx 1111 xxxx 01x1 xxxx */
+	DECODE_OR	(0x0ff0f090, 0x0700f010),
+	/* SMMUL		cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx */
+	DECODE_OR	(0x0ff0f0d0, 0x0750f010),
+	/* USAD8		cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx */
+	DECODE_EMULATEX	(0x0ff0f0f0, 0x0780f010, emulate_rd16rn12rm0rs8_rwflags_nopc,
+						 REGS(NOPC, 0, NOPC, 0, NOPC)),
+
+	/* SMLAD		cccc 0111 0000 xxxx xxxx xxxx 00x1 xxxx */
+	/* SMLSD		cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx */
+	DECODE_OR	(0x0ff00090, 0x07000010),
+	/* SMMLA		cccc 0111 0101 xxxx xxxx xxxx 00x1 xxxx */
+	DECODE_OR	(0x0ff000d0, 0x07500010),
+	/* USADA8		cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx */
+	DECODE_EMULATEX	(0x0ff000f0, 0x07800010, emulate_rd16rn12rm0rs8_rwflags_nopc,
+						 REGS(NOPC, NOPCX, NOPC, 0, NOPC)),
+
+	/* SMMLS		cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx */
+	DECODE_EMULATEX	(0x0ff000d0, 0x075000d0, emulate_rd16rn12rm0rs8_rwflags_nopc,
+						 REGS(NOPC, NOPC, NOPC, 0, NOPC)),
+
+	/* SBFX			cccc 0111 101x xxxx xxxx xxxx x101 xxxx */
+	/* UBFX			cccc 0111 111x xxxx xxxx xxxx x101 xxxx */
+	DECODE_EMULATEX	(0x0fa00070, 0x07a00050, emulate_rd12rm0_noflags_nopc,
+						 REGS(0, NOPC, 0, 0, NOPC)),
+
+	/* BFC			cccc 0111 110x xxxx xxxx xxxx x001 1111 */
+	DECODE_EMULATEX	(0x0fe0007f, 0x07c0001f, emulate_rd12rm0_noflags_nopc,
+						 REGS(0, NOPC, 0, 0, 0)),
+
+	/* BFI			cccc 0111 110x xxxx xxxx xxxx x001 xxxx */
+	DECODE_EMULATEX	(0x0fe00070, 0x07c00010, emulate_rd12rm0_noflags_nopc,
+						 REGS(0, NOPC, 0, 0, NOPCX)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_01xx_table[] = {
+	/* Load/store word and unsigned byte				*/
+
+	/* LDRB/STRB pc,[...]	cccc 01xx x0xx xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0x0c40f000, 0x0440f000),
+
+	/* STRT			cccc 01x0 x010 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRT			cccc 01x0 x011 xxxx xxxx xxxx xxxx xxxx */
+	/* STRBT		cccc 01x0 x110 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRBT		cccc 01x0 x111 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0x0d200000, 0x04200000),
+
+	/* STR (immediate)	cccc 010x x0x0 xxxx xxxx xxxx xxxx xxxx */
+	/* STRB (immediate)	cccc 010x x1x0 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0e100000, 0x04000000, emulate_str,
+						 REGS(NOPCWB, ANY, 0, 0, 0)),
+
+	/* LDR (immediate)	cccc 010x x0x1 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRB (immediate)	cccc 010x x1x1 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0e100000, 0x04100000, emulate_ldr,
+						 REGS(NOPCWB, ANY, 0, 0, 0)),
+
+	/* STR (register)	cccc 011x x0x0 xxxx xxxx xxxx xxxx xxxx */
+	/* STRB (register)	cccc 011x x1x0 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0e100000, 0x06000000, emulate_str,
+						 REGS(NOPCWB, ANY, 0, 0, NOPC)),
+
+	/* LDR (register)	cccc 011x x0x1 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRB (register)	cccc 011x x1x1 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0x0e100000, 0x06100000, emulate_ldr,
+						 REGS(NOPCWB, ANY, 0, 0, NOPC)),
+
+	DECODE_END
+};
+
+static const union decode_item arm_cccc_100x_table[] = {
+	/* Block data transfer instructions				*/
+
+	/* LDM			cccc 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
+	/* STM			cccc 100x x0x0 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_CUSTOM	(0x0e400000, 0x08000000, kprobe_decode_ldmstm),
+
+	/* STM (user registers)	cccc 100x x1x0 xxxx xxxx xxxx xxxx xxxx */
+	/* LDM (user registers)	cccc 100x x1x1 xxxx 0xxx xxxx xxxx xxxx */
+	/* LDM (exception ret)	cccc 100x x1x1 xxxx 1xxx xxxx xxxx xxxx */
+	DECODE_END
+};
+
+const union decode_item kprobe_decode_arm_table[] = {
+	/*
+	 * Unconditional instructions
+	 *			1111 xxxx xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xf0000000, 0xf0000000, arm_1111_table),
+
+	/*
+	 * Miscellaneous instructions
+	 *			cccc 0001 0xx0 xxxx xxxx xxxx 0xxx xxxx
+	 */
+	DECODE_TABLE	(0x0f900080, 0x01000000, arm_cccc_0001_0xx0____0xxx_table),
+
+	/*
+	 * Halfword multiply and multiply-accumulate
+	 *			cccc 0001 0xx0 xxxx xxxx xxxx 1xx0 xxxx
+	 */
+	DECODE_TABLE	(0x0f900090, 0x01000080, arm_cccc_0001_0xx0____1xx0_table),
+
+	/*
+	 * Multiply and multiply-accumulate
+	 *			cccc 0000 xxxx xxxx xxxx xxxx 1001 xxxx
+	 */
+	DECODE_TABLE	(0x0f0000f0, 0x00000090, arm_cccc_0000_____1001_table),
+
+	/*
+	 * Synchronization primitives
+	 *			cccc 0001 xxxx xxxx xxxx xxxx 1001 xxxx
+	 */
+	DECODE_TABLE	(0x0f0000f0, 0x01000090, arm_cccc_0001_____1001_table),
+
+	/*
+	 * Extra load/store instructions
+	 *			cccc 000x xxxx xxxx xxxx xxxx 1xx1 xxxx
+	 */
+	DECODE_TABLE	(0x0e000090, 0x00000090, arm_cccc_000x_____1xx1_table),
+
+	/*
+	 * Data-processing (register)
+	 *			cccc 000x xxxx xxxx xxxx xxxx xxx0 xxxx
+	 * Data-processing (register-shifted register)
+	 *			cccc 000x xxxx xxxx xxxx xxxx 0xx1 xxxx
+	 */
+	DECODE_TABLE	(0x0e000000, 0x00000000, arm_cccc_000x_table),
+
+	/*
+	 * Data-processing (immediate)
+	 *			cccc 001x xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0x0e000000, 0x02000000, arm_cccc_001x_table),
+
+	/*
+	 * Media instructions
+	 *			cccc 011x xxxx xxxx xxxx xxxx xxx1 xxxx
+	 */
+	DECODE_TABLE	(0x0f000010, 0x06000010, arm_cccc_0110_____xxx1_table),
+	DECODE_TABLE	(0x0f000010, 0x07000010, arm_cccc_0111_____xxx1_table),
+
+	/*
+	 * Load/store word and unsigned byte
+	 *			cccc 01xx xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0x0c000000, 0x04000000, arm_cccc_01xx_table),
+
+	/*
+	 * Block data transfer instructions
+	 *			cccc 100x xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0x0e000000, 0x08000000, arm_cccc_100x_table),
+
+	/* B			cccc 1010 xxxx xxxx xxxx xxxx xxxx xxxx */
+	/* BL			cccc 1011 xxxx xxxx xxxx xxxx xxxx xxxx */
+	DECODE_SIMULATE	(0x0e000000, 0x0a000000, simulate_bbl),
+
+	/*
+	 * Supervisor Call, and coprocessor instructions
+	 */
+
+	/* MCRR			cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx */
+	/* MRRC			cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx */
+	/* LDC			cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
+	/* STC			cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
+	/* CDP			cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
+	/* MCR			cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
+	/* MRC			cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
+	/* SVC			cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0x0c000000, 0x0c000000),
+
+	DECODE_END
+};
+
+static void __kprobes arm_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	regs->ARM_pc += 4;
+	p->ainsn.insn_handler(p, regs);
+}
+
+/* Return:
+ *   INSN_REJECTED     If instruction is one not allowed to kprobe,
+ *   INSN_GOOD         If instruction is supported and uses instruction slot,
+ *   INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
+ *
+ * For instructions we don't want to kprobe (INSN_REJECTED return result):
+ *   These are generally ones that modify the processor state making
+ *   them "hard" to simulate such as switches processor modes or
+ *   make accesses in alternate modes.  Any of these could be simulated
+ *   if the work was put into it, but low return considering they
+ *   should also be very rare.
+ */
+enum kprobe_insn __kprobes
+arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	asi->insn_singlestep = arm_singlestep;
+	asi->insn_check_cc = kprobe_condition_checks[insn>>28];
+	return kprobe_decode_insn(insn, asi, kprobe_decode_arm_table, false);
+}

+ 577 - 0
arch/arm/kernel/kprobes-common.c

@@ -0,0 +1,577 @@
+/*
+ * arch/arm/kernel/kprobes-common.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * Some contents moved here from arch/arm/include/asm/kprobes-arm.c which is
+ * Copyright (C) 2006, 2007 Motorola Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+
+#include "kprobes.h"
+
+
+#ifndef find_str_pc_offset
+
+/*
+ * For STR and STM instructions, an ARM core may choose to use either
+ * a +8 or a +12 displacement from the current instruction's address.
+ * Whichever value is chosen for a given core, it must be the same for
+ * both instructions and may not change.  This function measures it.
+ */
+
+int str_pc_offset;
+
+void __init find_str_pc_offset(void)
+{
+	int addr, scratch, ret;
+
+	__asm__ (
+		"sub	%[ret], pc, #4		\n\t"
+		"str	pc, %[addr]		\n\t"
+		"ldr	%[scr], %[addr]		\n\t"
+		"sub	%[ret], %[scr], %[ret]	\n\t"
+		: [ret] "=r" (ret), [scr] "=r" (scratch), [addr] "+m" (addr));
+
+	str_pc_offset = ret;
+}
+
+#endif /* !find_str_pc_offset */
+
+
+#ifndef test_load_write_pc_interworking
+
+bool load_write_pc_interworks;
+
+void __init test_load_write_pc_interworking(void)
+{
+	int arch = cpu_architecture();
+	BUG_ON(arch == CPU_ARCH_UNKNOWN);
+	load_write_pc_interworks = arch >= CPU_ARCH_ARMv5T;
+}
+
+#endif /* !test_load_write_pc_interworking */
+
+
+#ifndef test_alu_write_pc_interworking
+
+bool alu_write_pc_interworks;
+
+void __init test_alu_write_pc_interworking(void)
+{
+	int arch = cpu_architecture();
+	BUG_ON(arch == CPU_ARCH_UNKNOWN);
+	alu_write_pc_interworks = arch >= CPU_ARCH_ARMv7;
+}
+
+#endif /* !test_alu_write_pc_interworking */
+
+
+void __init arm_kprobe_decode_init(void)
+{
+	find_str_pc_offset();
+	test_load_write_pc_interworking();
+	test_alu_write_pc_interworking();
+}
+
+
+static unsigned long __kprobes __check_eq(unsigned long cpsr)
+{
+	return cpsr & PSR_Z_BIT;
+}
+
+static unsigned long __kprobes __check_ne(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_Z_BIT;
+}
+
+static unsigned long __kprobes __check_cs(unsigned long cpsr)
+{
+	return cpsr & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_cc(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_mi(unsigned long cpsr)
+{
+	return cpsr & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_pl(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_vs(unsigned long cpsr)
+{
+	return cpsr & PSR_V_BIT;
+}
+
+static unsigned long __kprobes __check_vc(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_V_BIT;
+}
+
+static unsigned long __kprobes __check_hi(unsigned long cpsr)
+{
+	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+	return cpsr & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_ls(unsigned long cpsr)
+{
+	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+	return (~cpsr) & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_ge(unsigned long cpsr)
+{
+	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	return (~cpsr) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_lt(unsigned long cpsr)
+{
+	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	return cpsr & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_gt(unsigned long cpsr)
+{
+	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
+	return (~temp) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_le(unsigned long cpsr)
+{
+	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
+	return temp & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_al(unsigned long cpsr)
+{
+	return true;
+}
+
+kprobe_check_cc * const kprobe_condition_checks[16] = {
+	&__check_eq, &__check_ne, &__check_cs, &__check_cc,
+	&__check_mi, &__check_pl, &__check_vs, &__check_vc,
+	&__check_hi, &__check_ls, &__check_ge, &__check_lt,
+	&__check_gt, &__check_le, &__check_al, &__check_al
+};
+
+
+void __kprobes kprobe_simulate_nop(struct kprobe *p, struct pt_regs *regs)
+{
+}
+
+void __kprobes kprobe_emulate_none(struct kprobe *p, struct pt_regs *regs)
+{
+	p->ainsn.insn_fn();
+}
+
+static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rn = (insn >> 16) & 0xf;
+	int lbit = insn & (1 << 20);
+	int wbit = insn & (1 << 21);
+	int ubit = insn & (1 << 23);
+	int pbit = insn & (1 << 24);
+	long *addr = (long *)regs->uregs[rn];
+	int reg_bit_vector;
+	int reg_count;
+
+	reg_count = 0;
+	reg_bit_vector = insn & 0xffff;
+	while (reg_bit_vector) {
+		reg_bit_vector &= (reg_bit_vector - 1);
+		++reg_count;
+	}
+
+	if (!ubit)
+		addr -= reg_count;
+	addr += (!pbit == !ubit);
+
+	reg_bit_vector = insn & 0xffff;
+	while (reg_bit_vector) {
+		int reg = __ffs(reg_bit_vector);
+		reg_bit_vector &= (reg_bit_vector - 1);
+		if (lbit)
+			regs->uregs[reg] = *addr++;
+		else
+			*addr++ = regs->uregs[reg];
+	}
+
+	if (wbit) {
+		if (!ubit)
+			addr -= reg_count;
+		addr -= (!pbit == !ubit);
+		regs->uregs[rn] = (long)addr;
+	}
+}
+
+static void __kprobes simulate_stm1_pc(struct kprobe *p, struct pt_regs *regs)
+{
+	regs->ARM_pc = (long)p->addr + str_pc_offset;
+	simulate_ldm1stm1(p, regs);
+	regs->ARM_pc = (long)p->addr + 4;
+}
+
+static void __kprobes simulate_ldm1_pc(struct kprobe *p, struct pt_regs *regs)
+{
+	simulate_ldm1stm1(p, regs);
+	load_write_pc(regs->ARM_pc, regs);
+}
+
+static void __kprobes
+emulate_generic_r0_12_noflags(struct kprobe *p, struct pt_regs *regs)
+{
+	register void *rregs asm("r1") = regs;
+	register void *rfn asm("lr") = p->ainsn.insn_fn;
+
+	__asm__ __volatile__ (
+		"stmdb	sp!, {%[regs], r11}	\n\t"
+		"ldmia	%[regs], {r0-r12}	\n\t"
+#if __LINUX_ARM_ARCH__ >= 6
+		"blx	%[fn]			\n\t"
+#else
+		"str	%[fn], [sp, #-4]!	\n\t"
+		"adr	lr, 1f			\n\t"
+		"ldr	pc, [sp], #4		\n\t"
+		"1:				\n\t"
+#endif
+		"ldr	lr, [sp], #4		\n\t" /* lr = regs */
+		"stmia	lr, {r0-r12}		\n\t"
+		"ldr	r11, [sp], #4		\n\t"
+		: [regs] "=r" (rregs), [fn] "=r" (rfn)
+		: "0" (rregs), "1" (rfn)
+		: "r0", "r2", "r3", "r4", "r5", "r6", "r7",
+		  "r8", "r9", "r10", "r12", "memory", "cc"
+		);
+}
+
+static void __kprobes
+emulate_generic_r2_14_noflags(struct kprobe *p, struct pt_regs *regs)
+{
+	emulate_generic_r0_12_noflags(p, (struct pt_regs *)(regs->uregs+2));
+}
+
+static void __kprobes
+emulate_ldm_r3_15(struct kprobe *p, struct pt_regs *regs)
+{
+	emulate_generic_r0_12_noflags(p, (struct pt_regs *)(regs->uregs+3));
+	load_write_pc(regs->ARM_pc, regs);
+}
+
+enum kprobe_insn __kprobes
+kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	kprobe_insn_handler_t *handler = 0;
+	unsigned reglist = insn & 0xffff;
+	int is_ldm = insn & 0x100000;
+	int rn = (insn >> 16) & 0xf;
+
+	if (rn <= 12 && (reglist & 0xe000) == 0) {
+		/* Instruction only uses registers in the range R0..R12 */
+		handler = emulate_generic_r0_12_noflags;
+
+	} else if (rn >= 2 && (reglist & 0x8003) == 0) {
+		/* Instruction only uses registers in the range R2..R14 */
+		rn -= 2;
+		reglist >>= 2;
+		handler = emulate_generic_r2_14_noflags;
+
+	} else if (rn >= 3 && (reglist & 0x0007) == 0) {
+		/* Instruction only uses registers in the range R3..R15 */
+		if (is_ldm && (reglist & 0x8000)) {
+			rn -= 3;
+			reglist >>= 3;
+			handler = emulate_ldm_r3_15;
+		}
+	}
+
+	if (handler) {
+		/* We can emulate the instruction in (possibly) modified form */
+		asi->insn[0] = (insn & 0xfff00000) | (rn << 16) | reglist;
+		asi->insn_handler = handler;
+		return INSN_GOOD;
+	}
+
+	/* Fallback to slower simulation... */
+	if (reglist & 0x8000)
+		handler = is_ldm ? simulate_ldm1_pc : simulate_stm1_pc;
+	else
+		handler = simulate_ldm1stm1;
+	asi->insn_handler = handler;
+	return INSN_GOOD_NO_SLOT;
+}
+
+
+/*
+ * Prepare an instruction slot to receive an instruction for emulating.
+ * This is done by placing a subroutine return after the location where the
+ * instruction will be placed. We also modify ARM instructions to be
+ * unconditional as the condition code will already be checked before any
+ * emulation handler is called.
+ */
+static kprobe_opcode_t __kprobes
+prepare_emulated_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
+								bool thumb)
+{
+#ifdef CONFIG_THUMB2_KERNEL
+	if (thumb) {
+		u16 *thumb_insn = (u16 *)asi->insn;
+		thumb_insn[1] = 0x4770; /* Thumb bx lr */
+		thumb_insn[2] = 0x4770; /* Thumb bx lr */
+		return insn;
+	}
+	asi->insn[1] = 0xe12fff1e; /* ARM bx lr */
+#else
+	asi->insn[1] = 0xe1a0f00e; /* mov pc, lr */
+#endif
+	/* Make an ARM instruction unconditional */
+	if (insn < 0xe0000000)
+		insn = (insn | 0xe0000000) & ~0x10000000;
+	return insn;
+}
+
+/*
+ * Write a (probably modified) instruction into the slot previously prepared by
+ * prepare_emulated_insn
+ */
+static void  __kprobes
+set_emulated_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
+								bool thumb)
+{
+#ifdef CONFIG_THUMB2_KERNEL
+	if (thumb) {
+		u16 *ip = (u16 *)asi->insn;
+		if (is_wide_instruction(insn))
+			*ip++ = insn >> 16;
+		*ip++ = insn;
+		return;
+	}
+#endif
+	asi->insn[0] = insn;
+}
+
+/*
+ * When we modify the register numbers encoded in an instruction to be emulated,
+ * the new values come from this define. For ARM and 32-bit Thumb instructions
+ * this gives...
+ *
+ *	bit position	  16  12   8   4   0
+ *	---------------+---+---+---+---+---+
+ *	register	 r2  r0  r1  --  r3
+ */
+#define INSN_NEW_BITS		0x00020103
+
+/* Each nibble has same value as that at INSN_NEW_BITS bit 16 */
+#define INSN_SAMEAS16_BITS	0x22222222
+
+/*
+ * Validate and modify each of the registers encoded in an instruction.
+ *
+ * Each nibble in regs contains a value from enum decode_reg_type. For each
+ * non-zero value, the corresponding nibble in pinsn is validated and modified
+ * according to the type.
+ */
+static bool __kprobes decode_regs(kprobe_opcode_t* pinsn, u32 regs)
+{
+	kprobe_opcode_t insn = *pinsn;
+	kprobe_opcode_t mask = 0xf; /* Start at least significant nibble */
+
+	for (; regs != 0; regs >>= 4, mask <<= 4) {
+
+		kprobe_opcode_t new_bits = INSN_NEW_BITS;
+
+		switch (regs & 0xf) {
+
+		case REG_TYPE_NONE:
+			/* Nibble not a register, skip to next */
+			continue;
+
+		case REG_TYPE_ANY:
+			/* Any register is allowed */
+			break;
+
+		case REG_TYPE_SAMEAS16:
+			/* Replace register with same as at bit position 16 */
+			new_bits = INSN_SAMEAS16_BITS;
+			break;
+
+		case REG_TYPE_SP:
+			/* Only allow SP (R13) */
+			if ((insn ^ 0xdddddddd) & mask)
+				goto reject;
+			break;
+
+		case REG_TYPE_PC:
+			/* Only allow PC (R15) */
+			if ((insn ^ 0xffffffff) & mask)
+				goto reject;
+			break;
+
+		case REG_TYPE_NOSP:
+			/* Reject SP (R13) */
+			if (((insn ^ 0xdddddddd) & mask) == 0)
+				goto reject;
+			break;
+
+		case REG_TYPE_NOSPPC:
+		case REG_TYPE_NOSPPCX:
+			/* Reject SP and PC (R13 and R15) */
+			if (((insn ^ 0xdddddddd) & 0xdddddddd & mask) == 0)
+				goto reject;
+			break;
+
+		case REG_TYPE_NOPCWB:
+			if (!is_writeback(insn))
+				break; /* No writeback, so any register is OK */
+			/* fall through... */
+		case REG_TYPE_NOPC:
+		case REG_TYPE_NOPCX:
+			/* Reject PC (R15) */
+			if (((insn ^ 0xffffffff) & mask) == 0)
+				goto reject;
+			break;
+		}
+
+		/* Replace value of nibble with new register number... */
+		insn &= ~mask;
+		insn |= new_bits & mask;
+	}
+
+	*pinsn = insn;
+	return true;
+
+reject:
+	return false;
+}
+
+static const int decode_struct_sizes[NUM_DECODE_TYPES] = {
+	[DECODE_TYPE_TABLE]	= sizeof(struct decode_table),
+	[DECODE_TYPE_CUSTOM]	= sizeof(struct decode_custom),
+	[DECODE_TYPE_SIMULATE]	= sizeof(struct decode_simulate),
+	[DECODE_TYPE_EMULATE]	= sizeof(struct decode_emulate),
+	[DECODE_TYPE_OR]	= sizeof(struct decode_or),
+	[DECODE_TYPE_REJECT]	= sizeof(struct decode_reject)
+};
+
+/*
+ * kprobe_decode_insn operates on data tables in order to decode an ARM
+ * architecture instruction onto which a kprobe has been placed.
+ *
+ * These instruction decoding tables are a concatenation of entries each
+ * of which consist of one of the following structs:
+ *
+ *	decode_table
+ *	decode_custom
+ *	decode_simulate
+ *	decode_emulate
+ *	decode_or
+ *	decode_reject
+ *
+ * Each of these starts with a struct decode_header which has the following
+ * fields:
+ *
+ *	type_regs
+ *	mask
+ *	value
+ *
+ * The least significant DECODE_TYPE_BITS of type_regs contains a value
+ * from enum decode_type, this indicates which of the decode_* structs
+ * the entry contains. The value DECODE_TYPE_END indicates the end of the
+ * table.
+ *
+ * When the table is parsed, each entry is checked in turn to see if it
+ * matches the instruction to be decoded using the test:
+ *
+ *	(insn & mask) == value
+ *
+ * If no match is found before the end of the table is reached then decoding
+ * fails with INSN_REJECTED.
+ *
+ * When a match is found, decode_regs() is called to validate and modify each
+ * of the registers encoded in the instruction; the data it uses to do this
+ * is (type_regs >> DECODE_TYPE_BITS). A validation failure will cause decoding
+ * to fail with INSN_REJECTED.
+ *
+ * Once the instruction has passed the above tests, further processing
+ * depends on the type of the table entry's decode struct.
+ *
+ */
+int __kprobes
+kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
+				const union decode_item *table, bool thumb)
+{
+	const struct decode_header *h = (struct decode_header *)table;
+	const struct decode_header *next;
+	bool matched = false;
+
+	insn = prepare_emulated_insn(insn, asi, thumb);
+
+	for (;; h = next) {
+		enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+		u32 regs = h->type_regs.bits >> DECODE_TYPE_BITS;
+
+		if (type == DECODE_TYPE_END)
+			return INSN_REJECTED;
+
+		next = (struct decode_header *)
+				((uintptr_t)h + decode_struct_sizes[type]);
+
+		if (!matched && (insn & h->mask.bits) != h->value.bits)
+			continue;
+
+		if (!decode_regs(&insn, regs))
+			return INSN_REJECTED;
+
+		switch (type) {
+
+		case DECODE_TYPE_TABLE: {
+			struct decode_table *d = (struct decode_table *)h;
+			next = (struct decode_header *)d->table.table;
+			break;
+		}
+
+		case DECODE_TYPE_CUSTOM: {
+			struct decode_custom *d = (struct decode_custom *)h;
+			return (*d->decoder.decoder)(insn, asi);
+		}
+
+		case DECODE_TYPE_SIMULATE: {
+			struct decode_simulate *d = (struct decode_simulate *)h;
+			asi->insn_handler = d->handler.handler;
+			return INSN_GOOD_NO_SLOT;
+		}
+
+		case DECODE_TYPE_EMULATE: {
+			struct decode_emulate *d = (struct decode_emulate *)h;
+			asi->insn_handler = d->handler.handler;
+			set_emulated_insn(insn, asi, thumb);
+			return INSN_GOOD;
+		}
+
+		case DECODE_TYPE_OR:
+			matched = true;
+			break;
+
+		case DECODE_TYPE_REJECT:
+		default:
+			return INSN_REJECTED;
+		}
+		}
+	}

+ 0 - 1670
arch/arm/kernel/kprobes-decode.c

@@ -1,1670 +0,0 @@
-/*
- * arch/arm/kernel/kprobes-decode.c
- *
- * Copyright (C) 2006, 2007 Motorola Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-/*
- * We do not have hardware single-stepping on ARM, This
- * effort is further complicated by the ARM not having a
- * "next PC" register.  Instructions that change the PC
- * can't be safely single-stepped in a MP environment, so
- * we have a lot of work to do:
- *
- * In the prepare phase:
- *   *) If it is an instruction that does anything
- *      with the CPU mode, we reject it for a kprobe.
- *      (This is out of laziness rather than need.  The
- *      instructions could be simulated.)
- *
- *   *) Otherwise, decode the instruction rewriting its
- *      registers to take fixed, ordered registers and
- *      setting a handler for it to run the instruction.
- *
- * In the execution phase by an instruction's handler:
- *
- *   *) If the PC is written to by the instruction, the
- *      instruction must be fully simulated in software.
- *
- *   *) Otherwise, a modified form of the instruction is
- *      directly executed.  Its handler calls the
- *      instruction in insn[0].  In insn[1] is a
- *      "mov pc, lr" to return.
- *
- *      Before calling, load up the reordered registers
- *      from the original instruction's registers.  If one
- *      of the original input registers is the PC, compute
- *      and adjust the appropriate input register.
- *
- *	After call completes, copy the output registers to
- *      the original instruction's original registers.
- *
- * We don't use a real breakpoint instruction since that
- * would have us in the kernel go from SVC mode to SVC
- * mode losing the link register.  Instead we use an
- * undefined instruction.  To simplify processing, the
- * undefined instruction used for kprobes must be reserved
- * exclusively for kprobes use.
- *
- * TODO: ifdef out some instruction decoding based on architecture.
- */
-
-#include <linux/kernel.h>
-#include <linux/kprobes.h>
-
-#define sign_extend(x, signbit) ((x) | (0 - ((x) & (1 << (signbit)))))
-
-#define branch_displacement(insn) sign_extend(((insn) & 0xffffff) << 2, 25)
-
-#define is_r15(insn, bitpos) (((insn) & (0xf << bitpos)) == (0xf << bitpos))
-
-/*
- * Test if load/store instructions writeback the address register.
- * if P (bit 24) == 0 or W (bit 21) == 1
- */
-#define is_writeback(insn) ((insn ^ 0x01000000) & 0x01200000)
-
-#define PSR_fs	(PSR_f|PSR_s)
-
-#define KPROBE_RETURN_INSTRUCTION	0xe1a0f00e	/* mov pc, lr */
-
-typedef long (insn_0arg_fn_t)(void);
-typedef long (insn_1arg_fn_t)(long);
-typedef long (insn_2arg_fn_t)(long, long);
-typedef long (insn_3arg_fn_t)(long, long, long);
-typedef long (insn_4arg_fn_t)(long, long, long, long);
-typedef long long (insn_llret_0arg_fn_t)(void);
-typedef long long (insn_llret_3arg_fn_t)(long, long, long);
-typedef long long (insn_llret_4arg_fn_t)(long, long, long, long);
-
-union reg_pair {
-	long long	dr;
-#ifdef __LITTLE_ENDIAN
-	struct { long	r0, r1; };
-#else
-	struct { long	r1, r0; };
-#endif
-};
-
-/*
- * For STR and STM instructions, an ARM core may choose to use either
- * a +8 or a +12 displacement from the current instruction's address.
- * Whichever value is chosen for a given core, it must be the same for
- * both instructions and may not change.  This function measures it.
- */
-
-static int str_pc_offset;
-
-static void __init find_str_pc_offset(void)
-{
-	int addr, scratch, ret;
-
-	__asm__ (
-		"sub	%[ret], pc, #4		\n\t"
-		"str	pc, %[addr]		\n\t"
-		"ldr	%[scr], %[addr]		\n\t"
-		"sub	%[ret], %[scr], %[ret]	\n\t"
-		: [ret] "=r" (ret), [scr] "=r" (scratch), [addr] "+m" (addr));
-
-	str_pc_offset = ret;
-}
-
-/*
- * The insnslot_?arg_r[w]flags() functions below are to keep the
- * msr -> *fn -> mrs instruction sequences indivisible so that
- * the state of the CPSR flags aren't inadvertently modified
- * just before or just after the call.
- */
-
-static inline long __kprobes
-insnslot_0arg_rflags(long cpsr, insn_0arg_fn_t *fn)
-{
-	register long ret asm("r0");
-
-	__asm__ __volatile__ (
-		"msr	cpsr_fs, %[cpsr]	\n\t"
-		"mov	lr, pc			\n\t"
-		"mov	pc, %[fn]		\n\t"
-		: "=r" (ret)
-		: [cpsr] "r" (cpsr), [fn] "r" (fn)
-		: "lr", "cc"
-	);
-	return ret;
-}
-
-static inline long long __kprobes
-insnslot_llret_0arg_rflags(long cpsr, insn_llret_0arg_fn_t *fn)
-{
-	register long ret0 asm("r0");
-	register long ret1 asm("r1");
-	union reg_pair fnr;
-
-	__asm__ __volatile__ (
-		"msr	cpsr_fs, %[cpsr]	\n\t"
-		"mov	lr, pc			\n\t"
-		"mov	pc, %[fn]		\n\t"
-		: "=r" (ret0), "=r" (ret1)
-		: [cpsr] "r" (cpsr), [fn] "r" (fn)
-		: "lr", "cc"
-	);
-	fnr.r0 = ret0;
-	fnr.r1 = ret1;
-	return fnr.dr;
-}
-
-static inline long __kprobes
-insnslot_1arg_rflags(long r0, long cpsr, insn_1arg_fn_t *fn)
-{
-	register long rr0 asm("r0") = r0;
-	register long ret asm("r0");
-
-	__asm__ __volatile__ (
-		"msr	cpsr_fs, %[cpsr]	\n\t"
-		"mov	lr, pc			\n\t"
-		"mov	pc, %[fn]		\n\t"
-		: "=r" (ret)
-		: "0" (rr0), [cpsr] "r" (cpsr), [fn] "r" (fn)
-		: "lr", "cc"
-	);
-	return ret;
-}
-
-static inline long __kprobes
-insnslot_2arg_rflags(long r0, long r1, long cpsr, insn_2arg_fn_t *fn)
-{
-	register long rr0 asm("r0") = r0;
-	register long rr1 asm("r1") = r1;
-	register long ret asm("r0");
-
-	__asm__ __volatile__ (
-		"msr	cpsr_fs, %[cpsr]	\n\t"
-		"mov	lr, pc			\n\t"
-		"mov	pc, %[fn]		\n\t"
-		: "=r" (ret)
-		: "0" (rr0), "r" (rr1),
-		  [cpsr] "r" (cpsr), [fn] "r" (fn)
-		: "lr", "cc"
-	);
-	return ret;
-}
-
-static inline long __kprobes
-insnslot_3arg_rflags(long r0, long r1, long r2, long cpsr, insn_3arg_fn_t *fn)
-{
-	register long rr0 asm("r0") = r0;
-	register long rr1 asm("r1") = r1;
-	register long rr2 asm("r2") = r2;
-	register long ret asm("r0");
-
-	__asm__ __volatile__ (
-		"msr	cpsr_fs, %[cpsr]	\n\t"
-		"mov	lr, pc			\n\t"
-		"mov	pc, %[fn]		\n\t"
-		: "=r" (ret)
-		: "0" (rr0), "r" (rr1), "r" (rr2),
-		  [cpsr] "r" (cpsr), [fn] "r" (fn)
-		: "lr", "cc"
-	);
-	return ret;
-}
-
-static inline long long __kprobes
-insnslot_llret_3arg_rflags(long r0, long r1, long r2, long cpsr,
-			   insn_llret_3arg_fn_t *fn)
-{
-	register long rr0 asm("r0") = r0;
-	register long rr1 asm("r1") = r1;
-	register long rr2 asm("r2") = r2;
-	register long ret0 asm("r0");
-	register long ret1 asm("r1");
-	union reg_pair fnr;
-
-	__asm__ __volatile__ (
-		"msr	cpsr_fs, %[cpsr]	\n\t"
-		"mov	lr, pc			\n\t"
-		"mov	pc, %[fn]		\n\t"
-		: "=r" (ret0), "=r" (ret1)
-		: "0" (rr0), "r" (rr1), "r" (rr2),
-		  [cpsr] "r" (cpsr), [fn] "r" (fn)
-		: "lr", "cc"
-	);
-	fnr.r0 = ret0;
-	fnr.r1 = ret1;
-	return fnr.dr;
-}
-
-static inline long __kprobes
-insnslot_4arg_rflags(long r0, long r1, long r2, long r3, long cpsr,
-		     insn_4arg_fn_t *fn)
-{
-	register long rr0 asm("r0") = r0;
-	register long rr1 asm("r1") = r1;
-	register long rr2 asm("r2") = r2;
-	register long rr3 asm("r3") = r3;
-	register long ret asm("r0");
-
-	__asm__ __volatile__ (
-		"msr	cpsr_fs, %[cpsr]	\n\t"
-		"mov	lr, pc			\n\t"
-		"mov	pc, %[fn]		\n\t"
-		: "=r" (ret)
-		: "0" (rr0), "r" (rr1), "r" (rr2), "r" (rr3),
-		  [cpsr] "r" (cpsr), [fn] "r" (fn)
-		: "lr", "cc"
-	);
-	return ret;
-}
-
-static inline long __kprobes
-insnslot_1arg_rwflags(long r0, long *cpsr, insn_1arg_fn_t *fn)
-{
-	register long rr0 asm("r0") = r0;
-	register long ret asm("r0");
-	long oldcpsr = *cpsr;
-	long newcpsr;
-
-	__asm__ __volatile__ (
-		"msr	cpsr_fs, %[oldcpsr]	\n\t"
-		"mov	lr, pc			\n\t"
-		"mov	pc, %[fn]		\n\t"
-		"mrs	%[newcpsr], cpsr	\n\t"
-		: "=r" (ret), [newcpsr] "=r" (newcpsr)
-		: "0" (rr0), [oldcpsr] "r" (oldcpsr), [fn] "r" (fn)
-		: "lr", "cc"
-	);
-	*cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs);
-	return ret;
-}
-
-static inline long __kprobes
-insnslot_2arg_rwflags(long r0, long r1, long *cpsr, insn_2arg_fn_t *fn)
-{
-	register long rr0 asm("r0") = r0;
-	register long rr1 asm("r1") = r1;
-	register long ret asm("r0");
-	long oldcpsr = *cpsr;
-	long newcpsr;
-
-	__asm__ __volatile__ (
-		"msr	cpsr_fs, %[oldcpsr]	\n\t"
-		"mov	lr, pc			\n\t"
-		"mov	pc, %[fn]		\n\t"
-		"mrs	%[newcpsr], cpsr	\n\t"
-		: "=r" (ret), [newcpsr] "=r" (newcpsr)
-		: "0" (rr0), "r" (rr1), [oldcpsr] "r" (oldcpsr), [fn] "r" (fn)
-		: "lr", "cc"
-	);
-	*cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs);
-	return ret;
-}
-
-static inline long __kprobes
-insnslot_3arg_rwflags(long r0, long r1, long r2, long *cpsr,
-		      insn_3arg_fn_t *fn)
-{
-	register long rr0 asm("r0") = r0;
-	register long rr1 asm("r1") = r1;
-	register long rr2 asm("r2") = r2;
-	register long ret asm("r0");
-	long oldcpsr = *cpsr;
-	long newcpsr;
-
-	__asm__ __volatile__ (
-		"msr	cpsr_fs, %[oldcpsr]	\n\t"
-		"mov	lr, pc			\n\t"
-		"mov	pc, %[fn]		\n\t"
-		"mrs	%[newcpsr], cpsr	\n\t"
-		: "=r" (ret), [newcpsr] "=r" (newcpsr)
-		: "0" (rr0), "r" (rr1), "r" (rr2),
-		  [oldcpsr] "r" (oldcpsr), [fn] "r" (fn)
-		: "lr", "cc"
-	);
-	*cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs);
-	return ret;
-}
-
-static inline long __kprobes
-insnslot_4arg_rwflags(long r0, long r1, long r2, long r3, long *cpsr,
-		      insn_4arg_fn_t *fn)
-{
-	register long rr0 asm("r0") = r0;
-	register long rr1 asm("r1") = r1;
-	register long rr2 asm("r2") = r2;
-	register long rr3 asm("r3") = r3;
-	register long ret asm("r0");
-	long oldcpsr = *cpsr;
-	long newcpsr;
-
-	__asm__ __volatile__ (
-		"msr	cpsr_fs, %[oldcpsr]	\n\t"
-		"mov	lr, pc			\n\t"
-		"mov	pc, %[fn]		\n\t"
-		"mrs	%[newcpsr], cpsr	\n\t"
-		: "=r" (ret), [newcpsr] "=r" (newcpsr)
-		: "0" (rr0), "r" (rr1), "r" (rr2), "r" (rr3),
-		  [oldcpsr] "r" (oldcpsr), [fn] "r" (fn)
-		: "lr", "cc"
-	);
-	*cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs);
-	return ret;
-}
-
-static inline long long __kprobes
-insnslot_llret_4arg_rwflags(long r0, long r1, long r2, long r3, long *cpsr,
-			    insn_llret_4arg_fn_t *fn)
-{
-	register long rr0 asm("r0") = r0;
-	register long rr1 asm("r1") = r1;
-	register long rr2 asm("r2") = r2;
-	register long rr3 asm("r3") = r3;
-	register long ret0 asm("r0");
-	register long ret1 asm("r1");
-	long oldcpsr = *cpsr;
-	long newcpsr;
-	union reg_pair fnr;
-
-	__asm__ __volatile__ (
-		"msr	cpsr_fs, %[oldcpsr]	\n\t"
-		"mov	lr, pc			\n\t"
-		"mov	pc, %[fn]		\n\t"
-		"mrs	%[newcpsr], cpsr	\n\t"
-		: "=r" (ret0), "=r" (ret1), [newcpsr] "=r" (newcpsr)
-		: "0" (rr0), "r" (rr1), "r" (rr2), "r" (rr3),
-		  [oldcpsr] "r" (oldcpsr), [fn] "r" (fn)
-		: "lr", "cc"
-	);
-	*cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs);
-	fnr.r0 = ret0;
-	fnr.r1 = ret1;
-	return fnr.dr;
-}
-
-/*
- * To avoid the complications of mimicing single-stepping on a
- * processor without a Next-PC or a single-step mode, and to
- * avoid having to deal with the side-effects of boosting, we
- * simulate or emulate (almost) all ARM instructions.
- *
- * "Simulation" is where the instruction's behavior is duplicated in
- * C code.  "Emulation" is where the original instruction is rewritten
- * and executed, often by altering its registers.
- *
- * By having all behavior of the kprobe'd instruction completed before
- * returning from the kprobe_handler(), all locks (scheduler and
- * interrupt) can safely be released.  There is no need for secondary
- * breakpoints, no race with MP or preemptable kernels, nor having to
- * clean up resources counts at a later time impacting overall system
- * performance.  By rewriting the instruction, only the minimum registers
- * need to be loaded and saved back optimizing performance.
- *
- * Calling the insnslot_*_rwflags version of a function doesn't hurt
- * anything even when the CPSR flags aren't updated by the
- * instruction.  It's just a little slower in return for saving
- * a little space by not having a duplicate function that doesn't
- * update the flags.  (The same optimization can be said for
- * instructions that do or don't perform register writeback)
- * Also, instructions can either read the flags, only write the
- * flags, or read and write the flags.  To save combinations
- * rather than for sheer performance, flag functions just assume
- * read and write of flags.
- */
-
-static void __kprobes simulate_bbl(struct kprobe *p, struct pt_regs *regs)
-{
-	kprobe_opcode_t insn = p->opcode;
-	long iaddr = (long)p->addr;
-	int disp  = branch_displacement(insn);
-
-	if (insn & (1 << 24))
-		regs->ARM_lr = iaddr + 4;
-
-	regs->ARM_pc = iaddr + 8 + disp;
-}
-
-static void __kprobes simulate_blx1(struct kprobe *p, struct pt_regs *regs)
-{
-	kprobe_opcode_t insn = p->opcode;
-	long iaddr = (long)p->addr;
-	int disp = branch_displacement(insn);
-
-	regs->ARM_lr = iaddr + 4;
-	regs->ARM_pc = iaddr + 8 + disp + ((insn >> 23) & 0x2);
-	regs->ARM_cpsr |= PSR_T_BIT;
-}
-
-static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs)
-{
-	kprobe_opcode_t insn = p->opcode;
-	int rm = insn & 0xf;
-	long rmv = regs->uregs[rm];
-
-	if (insn & (1 << 5))
-		regs->ARM_lr = (long)p->addr + 4;
-
-	regs->ARM_pc = rmv & ~0x1;
-	regs->ARM_cpsr &= ~PSR_T_BIT;
-	if (rmv & 0x1)
-		regs->ARM_cpsr |= PSR_T_BIT;
-}
-
-static void __kprobes simulate_mrs(struct kprobe *p, struct pt_regs *regs)
-{
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	unsigned long mask = 0xf8ff03df; /* Mask out execution state */
-	regs->uregs[rd] = regs->ARM_cpsr & mask;
-}
-
-static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
-{
-	kprobe_opcode_t insn = p->opcode;
-	int rn = (insn >> 16) & 0xf;
-	int lbit = insn & (1 << 20);
-	int wbit = insn & (1 << 21);
-	int ubit = insn & (1 << 23);
-	int pbit = insn & (1 << 24);
-	long *addr = (long *)regs->uregs[rn];
-	int reg_bit_vector;
-	int reg_count;
-
-	reg_count = 0;
-	reg_bit_vector = insn & 0xffff;
-	while (reg_bit_vector) {
-		reg_bit_vector &= (reg_bit_vector - 1);
-		++reg_count;
-	}
-
-	if (!ubit)
-		addr -= reg_count;
-	addr += (!pbit == !ubit);
-
-	reg_bit_vector = insn & 0xffff;
-	while (reg_bit_vector) {
-		int reg = __ffs(reg_bit_vector);
-		reg_bit_vector &= (reg_bit_vector - 1);
-		if (lbit)
-			regs->uregs[reg] = *addr++;
-		else
-			*addr++ = regs->uregs[reg];
-	}
-
-	if (wbit) {
-		if (!ubit)
-			addr -= reg_count;
-		addr -= (!pbit == !ubit);
-		regs->uregs[rn] = (long)addr;
-	}
-}
-
-static void __kprobes simulate_stm1_pc(struct kprobe *p, struct pt_regs *regs)
-{
-	regs->ARM_pc = (long)p->addr + str_pc_offset;
-	simulate_ldm1stm1(p, regs);
-	regs->ARM_pc = (long)p->addr + 4;
-}
-
-static void __kprobes simulate_mov_ipsp(struct kprobe *p, struct pt_regs *regs)
-{
-	regs->uregs[12] = regs->uregs[13];
-}
-
-static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	long ppc = (long)p->addr + 8;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-	int rm = insn & 0xf;  /* rm may be invalid, don't care. */
-	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
-	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
-
-	/* Not following the C calling convention here, so need asm(). */
-	__asm__ __volatile__ (
-		"ldr	r0, %[rn]	\n\t"
-		"ldr	r1, %[rm]	\n\t"
-		"msr	cpsr_fs, %[cpsr]\n\t"
-		"mov	lr, pc		\n\t"
-		"mov	pc, %[i_fn]	\n\t"
-		"str	r0, %[rn]	\n\t"	/* in case of writeback */
-		"str	r2, %[rd0]	\n\t"
-		"str	r3, %[rd1]	\n\t"
-		: [rn]  "+m" (rnv),
-		  [rd0] "=m" (regs->uregs[rd]),
-		  [rd1] "=m" (regs->uregs[rd+1])
-		: [rm]   "m" (rmv),
-		  [cpsr] "r" (regs->ARM_cpsr),
-		  [i_fn] "r" (i_fn)
-		: "r0", "r1", "r2", "r3", "lr", "cc"
-	);
-	if (is_writeback(insn))
-		regs->uregs[rn] = rnv;
-}
-
-static void __kprobes emulate_strd(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_4arg_fn_t *i_fn = (insn_4arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	long ppc = (long)p->addr + 8;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-	int rm  = insn & 0xf;
-	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
-	/* rm/rmv may be invalid, don't care. */
-	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
-	long rnv_wb;
-
-	rnv_wb = insnslot_4arg_rflags(rnv, rmv, regs->uregs[rd],
-					       regs->uregs[rd+1],
-					       regs->ARM_cpsr, i_fn);
-	if (is_writeback(insn))
-		regs->uregs[rn] = rnv_wb;
-}
-
-static void __kprobes emulate_ldr(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_llret_3arg_fn_t *i_fn = (insn_llret_3arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	long ppc = (long)p->addr + 8;
-	union reg_pair fnr;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-	int rm = insn & 0xf;
-	long rdv;
-	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
-	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
-	long cpsr = regs->ARM_cpsr;
-
-	fnr.dr = insnslot_llret_3arg_rflags(rnv, 0, rmv, cpsr, i_fn);
-	if (rn != 15)
-		regs->uregs[rn] = fnr.r0;  /* Save Rn in case of writeback. */
-	rdv = fnr.r1;
-
-	if (rd == 15) {
-#if __LINUX_ARM_ARCH__ >= 5
-		cpsr &= ~PSR_T_BIT;
-		if (rdv & 0x1)
-			cpsr |= PSR_T_BIT;
-		regs->ARM_cpsr = cpsr;
-		rdv &= ~0x1;
-#else
-		rdv &= ~0x2;
-#endif
-	}
-	regs->uregs[rd] = rdv;
-}
-
-static void __kprobes emulate_str(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	long iaddr = (long)p->addr;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-	int rm = insn & 0xf;
-	long rdv = (rd == 15) ? iaddr + str_pc_offset : regs->uregs[rd];
-	long rnv = (rn == 15) ? iaddr +  8 : regs->uregs[rn];
-	long rmv = regs->uregs[rm];  /* rm/rmv may be invalid, don't care. */
-	long rnv_wb;
-
-	rnv_wb = insnslot_3arg_rflags(rnv, rdv, rmv, regs->ARM_cpsr, i_fn);
-	if (rn != 15)
-		regs->uregs[rn] = rnv_wb;  /* Save Rn in case of writeback. */
-}
-
-static void __kprobes emulate_sat(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	int rm = insn & 0xf;
-	long rmv = regs->uregs[rm];
-
-	/* Writes Q flag */
-	regs->uregs[rd] = insnslot_1arg_rwflags(rmv, &regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes emulate_sel(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-	int rm = insn & 0xf;
-	long rnv = regs->uregs[rn];
-	long rmv = regs->uregs[rm];
-
-	/* Reads GE bits */
-	regs->uregs[rd] = insnslot_2arg_rflags(rnv, rmv, regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes emulate_none(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_0arg_fn_t *i_fn = (insn_0arg_fn_t *)&p->ainsn.insn[0];
-
-	insnslot_0arg_rflags(regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes emulate_nop(struct kprobe *p, struct pt_regs *regs)
-{
-}
-
-static void __kprobes
-emulate_rd12_modify(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	long rdv = regs->uregs[rd];
-
-	regs->uregs[rd] = insnslot_1arg_rflags(rdv, regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes
-emulate_rd12rn0_modify(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	int rn = insn & 0xf;
-	long rdv = regs->uregs[rd];
-	long rnv = regs->uregs[rn];
-
-	regs->uregs[rd] = insnslot_2arg_rflags(rdv, rnv, regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes emulate_rd12rm0(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	int rm = insn & 0xf;
-	long rmv = regs->uregs[rm];
-
-	regs->uregs[rd] = insnslot_1arg_rflags(rmv, regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes
-emulate_rd12rn16rm0_rwflags(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-	int rm = insn & 0xf;
-	long rnv = regs->uregs[rn];
-	long rmv = regs->uregs[rm];
-
-	regs->uregs[rd] =
-		insnslot_2arg_rwflags(rnv, rmv, &regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes
-emulate_rd16rn12rs8rm0_rwflags(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 16) & 0xf;
-	int rn = (insn >> 12) & 0xf;
-	int rs = (insn >> 8) & 0xf;
-	int rm = insn & 0xf;
-	long rnv = regs->uregs[rn];
-	long rsv = regs->uregs[rs];
-	long rmv = regs->uregs[rm];
-
-	regs->uregs[rd] =
-		insnslot_3arg_rwflags(rnv, rsv, rmv, &regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes
-emulate_rd16rs8rm0_rwflags(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 16) & 0xf;
-	int rs = (insn >> 8) & 0xf;
-	int rm = insn & 0xf;
-	long rsv = regs->uregs[rs];
-	long rmv = regs->uregs[rm];
-
-	regs->uregs[rd] =
-		insnslot_2arg_rwflags(rsv, rmv, &regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes
-emulate_rdhi16rdlo12rs8rm0_rwflags(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_llret_4arg_fn_t *i_fn = (insn_llret_4arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	union reg_pair fnr;
-	int rdhi = (insn >> 16) & 0xf;
-	int rdlo = (insn >> 12) & 0xf;
-	int rs   = (insn >> 8) & 0xf;
-	int rm   = insn & 0xf;
-	long rsv = regs->uregs[rs];
-	long rmv = regs->uregs[rm];
-
-	fnr.dr = insnslot_llret_4arg_rwflags(regs->uregs[rdhi],
-					     regs->uregs[rdlo], rsv, rmv,
-					     &regs->ARM_cpsr, i_fn);
-	regs->uregs[rdhi] = fnr.r0;
-	regs->uregs[rdlo] = fnr.r1;
-}
-
-static void __kprobes
-emulate_alu_imm_rflags(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-	long rnv = (rn == 15) ? (long)p->addr + 8 : regs->uregs[rn];
-
-	regs->uregs[rd] = insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes
-emulate_alu_imm_rwflags(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-	long rnv = (rn == 15) ? (long)p->addr + 8 : regs->uregs[rn];
-
-	regs->uregs[rd] = insnslot_1arg_rwflags(rnv, &regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes
-emulate_alu_tests_imm(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rn = (insn >> 16) & 0xf;
-	long rnv = (rn == 15) ? (long)p->addr + 8 : regs->uregs[rn];
-
-	insnslot_1arg_rwflags(rnv, &regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes
-emulate_alu_rflags(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	long ppc = (long)p->addr + 8;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;	/* rn/rnv/rs/rsv may be */
-	int rs = (insn >> 8) & 0xf;	/* invalid, don't care. */
-	int rm = insn & 0xf;
-	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
-	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
-	long rsv = regs->uregs[rs];
-
-	regs->uregs[rd] =
-		insnslot_3arg_rflags(rnv, rmv, rsv, regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes
-emulate_alu_rwflags(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	long ppc = (long)p->addr + 8;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;	/* rn/rnv/rs/rsv may be */
-	int rs = (insn >> 8) & 0xf;	/* invalid, don't care. */
-	int rm = insn & 0xf;
-	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
-	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
-	long rsv = regs->uregs[rs];
-
-	regs->uregs[rd] =
-		insnslot_3arg_rwflags(rnv, rmv, rsv, &regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes
-emulate_alu_tests(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	long ppc = (long)p->addr + 8;
-	int rn = (insn >> 16) & 0xf;
-	int rs = (insn >> 8) & 0xf;	/* rs/rsv may be invalid, don't care. */
-	int rm = insn & 0xf;
-	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
-	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
-	long rsv = regs->uregs[rs];
-
-	insnslot_3arg_rwflags(rnv, rmv, rsv, &regs->ARM_cpsr, i_fn);
-}
-
-static enum kprobe_insn __kprobes
-prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	int not_imm = (insn & (1 << 26)) ? (insn & (1 << 25))
-					 : (~insn & (1 << 22));
-
-	if (is_writeback(insn) && is_r15(insn, 16))
-		return INSN_REJECTED;	/* Writeback to PC */
-
-	insn &= 0xfff00fff;
-	insn |= 0x00001000;	/* Rn = r0, Rd = r1 */
-	if (not_imm) {
-		insn &= ~0xf;
-		insn |= 2;	/* Rm = r2 */
-	}
-	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ? emulate_ldr : emulate_str;
-	return INSN_GOOD;
-}
-
-static enum kprobe_insn __kprobes
-prep_emulate_rd12_modify(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	if (is_r15(insn, 12))
-		return INSN_REJECTED;	/* Rd is PC */
-
-	insn &= 0xffff0fff;	/* Rd = r0 */
-	asi->insn[0] = insn;
-	asi->insn_handler = emulate_rd12_modify;
-	return INSN_GOOD;
-}
-
-static enum kprobe_insn __kprobes
-prep_emulate_rd12rn0_modify(kprobe_opcode_t insn,
-			    struct arch_specific_insn *asi)
-{
-	if (is_r15(insn, 12))
-		return INSN_REJECTED;	/* Rd is PC */
-
-	insn &= 0xffff0ff0;	/* Rd = r0 */
-	insn |= 0x00000001;	/* Rn = r1 */
-	asi->insn[0] = insn;
-	asi->insn_handler = emulate_rd12rn0_modify;
-	return INSN_GOOD;
-}
-
-static enum kprobe_insn __kprobes
-prep_emulate_rd12rm0(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	if (is_r15(insn, 12))
-		return INSN_REJECTED;	/* Rd is PC */
-
-	insn &= 0xffff0ff0;	/* Rd = r0, Rm = r0 */
-	asi->insn[0] = insn;
-	asi->insn_handler = emulate_rd12rm0;
-	return INSN_GOOD;
-}
-
-static enum kprobe_insn __kprobes
-prep_emulate_rd12rn16rm0_wflags(kprobe_opcode_t insn,
-				struct arch_specific_insn *asi)
-{
-	if (is_r15(insn, 12))
-		return INSN_REJECTED;	/* Rd is PC */
-
-	insn &= 0xfff00ff0;	/* Rd = r0, Rn = r0 */
-	insn |= 0x00000001;	/* Rm = r1 */
-	asi->insn[0] = insn;
-	asi->insn_handler = emulate_rd12rn16rm0_rwflags;
-	return INSN_GOOD;
-}
-
-static enum kprobe_insn __kprobes
-prep_emulate_rd16rs8rm0_wflags(kprobe_opcode_t insn,
-			       struct arch_specific_insn *asi)
-{
-	if (is_r15(insn, 16))
-		return INSN_REJECTED;	/* Rd is PC */
-
-	insn &= 0xfff0f0f0;	/* Rd = r0, Rs = r0 */
-	insn |= 0x00000001;	/* Rm = r1          */
-	asi->insn[0] = insn;
-	asi->insn_handler = emulate_rd16rs8rm0_rwflags;
-	return INSN_GOOD;
-}
-
-static enum kprobe_insn __kprobes
-prep_emulate_rd16rn12rs8rm0_wflags(kprobe_opcode_t insn,
-				   struct arch_specific_insn *asi)
-{
-	if (is_r15(insn, 16))
-		return INSN_REJECTED;	/* Rd is PC */
-
-	insn &= 0xfff000f0;	/* Rd = r0, Rn = r0 */
-	insn |= 0x00000102;	/* Rs = r1, Rm = r2 */
-	asi->insn[0] = insn;
-	asi->insn_handler = emulate_rd16rn12rs8rm0_rwflags;
-	return INSN_GOOD;
-}
-
-static enum kprobe_insn __kprobes
-prep_emulate_rdhi16rdlo12rs8rm0_wflags(kprobe_opcode_t insn,
-				       struct arch_specific_insn *asi)
-{
-	if (is_r15(insn, 16) || is_r15(insn, 12))
-		return INSN_REJECTED;	/* RdHi or RdLo is PC */
-
-	insn &= 0xfff000f0;	/* RdHi = r0, RdLo = r1 */
-	insn |= 0x00001203;	/* Rs = r2, Rm = r3 */
-	asi->insn[0] = insn;
-	asi->insn_handler = emulate_rdhi16rdlo12rs8rm0_rwflags;
-	return INSN_GOOD;
-}
-
-/*
- * For the instruction masking and comparisons in all the "space_*"
- * functions below, Do _not_ rearrange the order of tests unless
- * you're very, very sure of what you are doing.  For the sake of
- * efficiency, the masks for some tests sometimes assume other test
- * have been done prior to them so the number of patterns to test
- * for an instruction set can be as broad as possible to reduce the
- * number of tests needed.
- */
-
-static enum kprobe_insn __kprobes
-space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	/* memory hint : 1111 0100 x001 xxxx xxxx xxxx xxxx xxxx : */
-	/* PLDI        : 1111 0100 x101 xxxx xxxx xxxx xxxx xxxx : */
-	/* PLDW        : 1111 0101 x001 xxxx xxxx xxxx xxxx xxxx : */
-	/* PLD         : 1111 0101 x101 xxxx xxxx xxxx xxxx xxxx : */
-	if ((insn & 0xfe300000) == 0xf4100000) {
-		asi->insn_handler = emulate_nop;
-		return INSN_GOOD_NO_SLOT;
-	}
-
-	/* BLX(1) : 1111 101x xxxx xxxx xxxx xxxx xxxx xxxx : */
-	if ((insn & 0xfe000000) == 0xfa000000) {
-		asi->insn_handler = simulate_blx1;
-		return INSN_GOOD_NO_SLOT;
-	}
-
-	/* CPS   : 1111 0001 0000 xxx0 xxxx xxxx xx0x xxxx */
-	/* SETEND: 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */
-
-	/* SRS   : 1111 100x x1x0 xxxx xxxx xxxx xxxx xxxx */
-	/* RFE   : 1111 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
-
-	/* Coprocessor instructions... */
-	/* MCRR2 : 1111 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */
-	/* MRRC2 : 1111 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */
-	/* LDC2  : 1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
-	/* STC2  : 1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
-	/* CDP2  : 1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
-	/* MCR2  : 1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
-	/* MRC2  : 1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
-
-	return INSN_REJECTED;
-}
-
-static enum kprobe_insn __kprobes
-space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	/* cccc 0001 0xx0 xxxx xxxx xxxx xxxx xxx0 xxxx */
-	if ((insn & 0x0f900010) == 0x01000000) {
-
-		/* MRS cpsr : cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */
-		if ((insn & 0x0ff000f0) == 0x01000000) {
-			if (is_r15(insn, 12))
-				return INSN_REJECTED;	/* Rd is PC */
-			asi->insn_handler = simulate_mrs;
-			return INSN_GOOD_NO_SLOT;
-		}
-
-		/* SMLALxy : cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */
-		if ((insn & 0x0ff00090) == 0x01400080)
-			return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn,
-									asi);
-
-		/* SMULWy : cccc 0001 0010 xxxx xxxx xxxx 1x10 xxxx */
-		/* SMULxy : cccc 0001 0110 xxxx xxxx xxxx 1xx0 xxxx */
-		if ((insn & 0x0ff000b0) == 0x012000a0 ||
-		    (insn & 0x0ff00090) == 0x01600080)
-			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
-
-		/* SMLAxy : cccc 0001 0000 xxxx xxxx xxxx 1xx0 xxxx : Q */
-		/* SMLAWy : cccc 0001 0010 xxxx xxxx xxxx 1x00 xxxx : Q */
-		if ((insn & 0x0ff00090) == 0x01000080 ||
-		    (insn & 0x0ff000b0) == 0x01200080)
-			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
-
-		/* BXJ      : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */
-		/* MSR      : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */
-		/* MRS spsr : cccc 0001 0100 xxxx xxxx xxxx 0000 xxxx */
-
-		/* Other instruction encodings aren't yet defined */
-		return INSN_REJECTED;
-	}
-
-	/* cccc 0001 0xx0 xxxx xxxx xxxx xxxx 0xx1 xxxx */
-	else if ((insn & 0x0f900090) == 0x01000010) {
-
-		/* BLX(2) : cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */
-		/* BX     : cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */
-		if ((insn & 0x0ff000d0) == 0x01200010) {
-			if ((insn & 0x0ff000ff) == 0x0120003f)
-				return INSN_REJECTED; /* BLX pc */
-			asi->insn_handler = simulate_blx2bx;
-			return INSN_GOOD_NO_SLOT;
-		}
-
-		/* CLZ : cccc 0001 0110 xxxx xxxx xxxx 0001 xxxx */
-		if ((insn & 0x0ff000f0) == 0x01600010)
-			return prep_emulate_rd12rm0(insn, asi);
-
-		/* QADD    : cccc 0001 0000 xxxx xxxx xxxx 0101 xxxx :Q */
-		/* QSUB    : cccc 0001 0010 xxxx xxxx xxxx 0101 xxxx :Q */
-		/* QDADD   : cccc 0001 0100 xxxx xxxx xxxx 0101 xxxx :Q */
-		/* QDSUB   : cccc 0001 0110 xxxx xxxx xxxx 0101 xxxx :Q */
-		if ((insn & 0x0f9000f0) == 0x01000050)
-			return prep_emulate_rd12rn16rm0_wflags(insn, asi);
-
-		/* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
-		/* SMC  : cccc 0001 0110 xxxx xxxx xxxx 0111 xxxx */
-
-		/* Other instruction encodings aren't yet defined */
-		return INSN_REJECTED;
-	}
-
-	/* cccc 0000 xxxx xxxx xxxx xxxx xxxx 1001 xxxx */
-	else if ((insn & 0x0f0000f0) == 0x00000090) {
-
-		/* MUL    : cccc 0000 0000 xxxx xxxx xxxx 1001 xxxx :   */
-		/* MULS   : cccc 0000 0001 xxxx xxxx xxxx 1001 xxxx :cc */
-		/* MLA    : cccc 0000 0010 xxxx xxxx xxxx 1001 xxxx :   */
-		/* MLAS   : cccc 0000 0011 xxxx xxxx xxxx 1001 xxxx :cc */
-		/* UMAAL  : cccc 0000 0100 xxxx xxxx xxxx 1001 xxxx :   */
-		/* undef  : cccc 0000 0101 xxxx xxxx xxxx 1001 xxxx :   */
-		/* MLS    : cccc 0000 0110 xxxx xxxx xxxx 1001 xxxx :   */
-		/* undef  : cccc 0000 0111 xxxx xxxx xxxx 1001 xxxx :   */
-		/* UMULL  : cccc 0000 1000 xxxx xxxx xxxx 1001 xxxx :   */
-		/* UMULLS : cccc 0000 1001 xxxx xxxx xxxx 1001 xxxx :cc */
-		/* UMLAL  : cccc 0000 1010 xxxx xxxx xxxx 1001 xxxx :   */
-		/* UMLALS : cccc 0000 1011 xxxx xxxx xxxx 1001 xxxx :cc */
-		/* SMULL  : cccc 0000 1100 xxxx xxxx xxxx 1001 xxxx :   */
-		/* SMULLS : cccc 0000 1101 xxxx xxxx xxxx 1001 xxxx :cc */
-		/* SMLAL  : cccc 0000 1110 xxxx xxxx xxxx 1001 xxxx :   */
-		/* SMLALS : cccc 0000 1111 xxxx xxxx xxxx 1001 xxxx :cc */
-		if ((insn & 0x00d00000) == 0x00500000)
-			return INSN_REJECTED;
-		else if ((insn & 0x00e00000) == 0x00000000)
-			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
-		else if ((insn & 0x00a00000) == 0x00200000)
-			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
-		else
-			return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn,
-									asi);
-	}
-
-	/* cccc 000x xxxx xxxx xxxx xxxx xxxx 1xx1 xxxx */
-	else if ((insn & 0x0e000090) == 0x00000090) {
-
-		/* SWP   : cccc 0001 0000 xxxx xxxx xxxx 1001 xxxx */
-		/* SWPB  : cccc 0001 0100 xxxx xxxx xxxx 1001 xxxx */
-		/* ???   : cccc 0001 0x01 xxxx xxxx xxxx 1001 xxxx */
-		/* ???   : cccc 0001 0x10 xxxx xxxx xxxx 1001 xxxx */
-		/* ???   : cccc 0001 0x11 xxxx xxxx xxxx 1001 xxxx */
-		/* STREX : cccc 0001 1000 xxxx xxxx xxxx 1001 xxxx */
-		/* LDREX : cccc 0001 1001 xxxx xxxx xxxx 1001 xxxx */
-		/* STREXD: cccc 0001 1010 xxxx xxxx xxxx 1001 xxxx */
-		/* LDREXD: cccc 0001 1011 xxxx xxxx xxxx 1001 xxxx */
-		/* STREXB: cccc 0001 1100 xxxx xxxx xxxx 1001 xxxx */
-		/* LDREXB: cccc 0001 1101 xxxx xxxx xxxx 1001 xxxx */
-		/* STREXH: cccc 0001 1110 xxxx xxxx xxxx 1001 xxxx */
-		/* LDREXH: cccc 0001 1111 xxxx xxxx xxxx 1001 xxxx */
-
-		/* LDRD  : cccc 000x xxx0 xxxx xxxx xxxx 1101 xxxx */
-		/* STRD  : cccc 000x xxx0 xxxx xxxx xxxx 1111 xxxx */
-		/* LDRH  : cccc 000x xxx1 xxxx xxxx xxxx 1011 xxxx */
-		/* STRH  : cccc 000x xxx0 xxxx xxxx xxxx 1011 xxxx */
-		/* LDRSB : cccc 000x xxx1 xxxx xxxx xxxx 1101 xxxx */
-		/* LDRSH : cccc 000x xxx1 xxxx xxxx xxxx 1111 xxxx */
-		if ((insn & 0x0f0000f0) == 0x01000090) {
-			if ((insn & 0x0fb000f0) == 0x01000090) {
-				/* SWP/SWPB */
-				return prep_emulate_rd12rn16rm0_wflags(insn,
-									asi);
-			} else {
-				/* STREX/LDREX variants and unallocaed space */
-				return INSN_REJECTED;
-			}
-
-		} else if ((insn & 0x0e1000d0) == 0x00000d0) {
-			/* STRD/LDRD */
-			if ((insn & 0x0000e000) == 0x0000e000)
-				return INSN_REJECTED;	/* Rd is LR or PC */
-			if (is_writeback(insn) && is_r15(insn, 16))
-				return INSN_REJECTED;	/* Writeback to PC */
-
-			insn &= 0xfff00fff;
-			insn |= 0x00002000;	/* Rn = r0, Rd = r2 */
-			if (!(insn & (1 << 22))) {
-				/* Register index */
-				insn &= ~0xf;
-				insn |= 1;	/* Rm = r1 */
-			}
-			asi->insn[0] = insn;
-			asi->insn_handler =
-				(insn & (1 << 5)) ? emulate_strd : emulate_ldrd;
-			return INSN_GOOD;
-		}
-
-		/* LDRH/STRH/LDRSB/LDRSH */
-		if (is_r15(insn, 12))
-			return INSN_REJECTED;	/* Rd is PC */
-		return prep_emulate_ldr_str(insn, asi);
-	}
-
-	/* cccc 000x xxxx xxxx xxxx xxxx xxxx xxxx xxxx */
-
-	/*
-	 * ALU op with S bit and Rd == 15 :
-	 *	cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx
-	 */
-	if ((insn & 0x0e10f000) == 0x0010f000)
-		return INSN_REJECTED;
-
-	/*
-	 * "mov ip, sp" is the most common kprobe'd instruction by far.
-	 * Check and optimize for it explicitly.
-	 */
-	if (insn == 0xe1a0c00d) {
-		asi->insn_handler = simulate_mov_ipsp;
-		return INSN_GOOD_NO_SLOT;
-	}
-
-	/*
-	 * Data processing: Immediate-shift / Register-shift
-	 * ALU op : cccc 000x xxxx xxxx xxxx xxxx xxxx xxxx
-	 * CPY    : cccc 0001 1010 xxxx xxxx 0000 0000 xxxx
-	 * MOV    : cccc 0001 101x xxxx xxxx xxxx xxxx xxxx
-	 * *S (bit 20) updates condition codes
-	 * ADC/SBC/RSC reads the C flag
-	 */
-	insn &= 0xfff00ff0;	/* Rn = r0, Rd = r0 */
-	insn |= 0x00000001;	/* Rm = r1 */
-	if (insn & 0x010) {
-		insn &= 0xfffff0ff;     /* register shift */
-		insn |= 0x00000200;     /* Rs = r2 */
-	}
-	asi->insn[0] = insn;
-
-	if ((insn & 0x0f900000) == 0x01100000) {
-		/*
-		 * TST : cccc 0001 0001 xxxx xxxx xxxx xxxx xxxx
-		 * TEQ : cccc 0001 0011 xxxx xxxx xxxx xxxx xxxx
-		 * CMP : cccc 0001 0101 xxxx xxxx xxxx xxxx xxxx
-		 * CMN : cccc 0001 0111 xxxx xxxx xxxx xxxx xxxx
-		 */
-		asi->insn_handler = emulate_alu_tests;
-	} else {
-		/* ALU ops which write to Rd */
-		asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
-				emulate_alu_rwflags : emulate_alu_rflags;
-	}
-	return INSN_GOOD;
-}
-
-static enum kprobe_insn __kprobes
-space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	/* MOVW  : cccc 0011 0000 xxxx xxxx xxxx xxxx xxxx */
-	/* MOVT  : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx */
-	if ((insn & 0x0fb00000) == 0x03000000)
-		return prep_emulate_rd12_modify(insn, asi);
-
-	/* hints : cccc 0011 0010 0000 xxxx xxxx xxxx xxxx */
-	if ((insn & 0x0fff0000) == 0x03200000) {
-		unsigned op2 = insn & 0x000000ff;
-		if (op2 == 0x01 || op2 == 0x04) {
-			/* YIELD : cccc 0011 0010 0000 xxxx xxxx 0000 0001 */
-			/* SEV   : cccc 0011 0010 0000 xxxx xxxx 0000 0100 */
-			asi->insn[0] = insn;
-			asi->insn_handler = emulate_none;
-			return INSN_GOOD;
-		} else if (op2 <= 0x03) {
-			/* NOP   : cccc 0011 0010 0000 xxxx xxxx 0000 0000 */
-			/* WFE   : cccc 0011 0010 0000 xxxx xxxx 0000 0010 */
-			/* WFI   : cccc 0011 0010 0000 xxxx xxxx 0000 0011 */
-			/*
-			 * We make WFE and WFI true NOPs to avoid stalls due
-			 * to missing events whilst processing the probe.
-			 */
-			asi->insn_handler = emulate_nop;
-			return INSN_GOOD_NO_SLOT;
-		}
-		/* For DBG and unallocated hints it's safest to reject them */
-		return INSN_REJECTED;
-	}
-
-	/*
-	 * MSR   : cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx
-	 * ALU op with S bit and Rd == 15 :
-	 *	   cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx
-	 */
-	if ((insn & 0x0fb00000) == 0x03200000 ||	/* MSR */
-	    (insn & 0x0e10f000) == 0x0210f000)		/* ALU s-bit, R15  */
-		return INSN_REJECTED;
-
-	/*
-	 * Data processing: 32-bit Immediate
-	 * ALU op : cccc 001x xxxx xxxx xxxx xxxx xxxx xxxx
-	 * MOV    : cccc 0011 101x xxxx xxxx xxxx xxxx xxxx
-	 * *S (bit 20) updates condition codes
-	 * ADC/SBC/RSC reads the C flag
-	 */
-	insn &= 0xfff00fff;	/* Rn = r0 and Rd = r0 */
-	asi->insn[0] = insn;
-
-	if ((insn & 0x0f900000) == 0x03100000) {
-		/*
-		 * TST : cccc 0011 0001 xxxx xxxx xxxx xxxx xxxx
-		 * TEQ : cccc 0011 0011 xxxx xxxx xxxx xxxx xxxx
-		 * CMP : cccc 0011 0101 xxxx xxxx xxxx xxxx xxxx
-		 * CMN : cccc 0011 0111 xxxx xxxx xxxx xxxx xxxx
-		 */
-		asi->insn_handler = emulate_alu_tests_imm;
-	} else {
-		/* ALU ops which write to Rd */
-		asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
-			emulate_alu_imm_rwflags : emulate_alu_imm_rflags;
-	}
-	return INSN_GOOD;
-}
-
-static enum kprobe_insn __kprobes
-space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	/* SEL : cccc 0110 1000 xxxx xxxx xxxx 1011 xxxx GE: !!! */
-	if ((insn & 0x0ff000f0) == 0x068000b0) {
-		if (is_r15(insn, 12))
-			return INSN_REJECTED;	/* Rd is PC */
-		insn &= 0xfff00ff0;	/* Rd = r0, Rn = r0 */
-		insn |= 0x00000001;	/* Rm = r1 */
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_sel;
-		return INSN_GOOD;
-	}
-
-	/* SSAT   : cccc 0110 101x xxxx xxxx xxxx xx01 xxxx :Q */
-	/* USAT   : cccc 0110 111x xxxx xxxx xxxx xx01 xxxx :Q */
-	/* SSAT16 : cccc 0110 1010 xxxx xxxx xxxx 0011 xxxx :Q */
-	/* USAT16 : cccc 0110 1110 xxxx xxxx xxxx 0011 xxxx :Q */
-	if ((insn & 0x0fa00030) == 0x06a00010 ||
-	    (insn & 0x0fb000f0) == 0x06a00030) {
-		if (is_r15(insn, 12))
-			return INSN_REJECTED;	/* Rd is PC */
-		insn &= 0xffff0ff0;	/* Rd = r0, Rm = r0 */
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_sat;
-		return INSN_GOOD;
-	}
-
-	/* REV    : cccc 0110 1011 xxxx xxxx xxxx 0011 xxxx */
-	/* REV16  : cccc 0110 1011 xxxx xxxx xxxx 1011 xxxx */
-	/* RBIT   : cccc 0110 1111 xxxx xxxx xxxx 0011 xxxx */
-	/* REVSH  : cccc 0110 1111 xxxx xxxx xxxx 1011 xxxx */
-	if ((insn & 0x0ff00070) == 0x06b00030 ||
-	    (insn & 0x0ff00070) == 0x06f00030)
-		return prep_emulate_rd12rm0(insn, asi);
-
-	/* ???       : cccc 0110 0000 xxxx xxxx xxxx xxx1 xxxx :   */
-	/* SADD16    : cccc 0110 0001 xxxx xxxx xxxx 0001 xxxx :GE */
-	/* SADDSUBX  : cccc 0110 0001 xxxx xxxx xxxx 0011 xxxx :GE */
-	/* SSUBADDX  : cccc 0110 0001 xxxx xxxx xxxx 0101 xxxx :GE */
-	/* SSUB16    : cccc 0110 0001 xxxx xxxx xxxx 0111 xxxx :GE */
-	/* SADD8     : cccc 0110 0001 xxxx xxxx xxxx 1001 xxxx :GE */
-	/* ???       : cccc 0110 0001 xxxx xxxx xxxx 1011 xxxx :   */
-	/* ???       : cccc 0110 0001 xxxx xxxx xxxx 1101 xxxx :   */
-	/* SSUB8     : cccc 0110 0001 xxxx xxxx xxxx 1111 xxxx :GE */
-	/* QADD16    : cccc 0110 0010 xxxx xxxx xxxx 0001 xxxx :   */
-	/* QADDSUBX  : cccc 0110 0010 xxxx xxxx xxxx 0011 xxxx :   */
-	/* QSUBADDX  : cccc 0110 0010 xxxx xxxx xxxx 0101 xxxx :   */
-	/* QSUB16    : cccc 0110 0010 xxxx xxxx xxxx 0111 xxxx :   */
-	/* QADD8     : cccc 0110 0010 xxxx xxxx xxxx 1001 xxxx :   */
-	/* ???       : cccc 0110 0010 xxxx xxxx xxxx 1011 xxxx :   */
-	/* ???       : cccc 0110 0010 xxxx xxxx xxxx 1101 xxxx :   */
-	/* QSUB8     : cccc 0110 0010 xxxx xxxx xxxx 1111 xxxx :   */
-	/* SHADD16   : cccc 0110 0011 xxxx xxxx xxxx 0001 xxxx :   */
-	/* SHADDSUBX : cccc 0110 0011 xxxx xxxx xxxx 0011 xxxx :   */
-	/* SHSUBADDX : cccc 0110 0011 xxxx xxxx xxxx 0101 xxxx :   */
-	/* SHSUB16   : cccc 0110 0011 xxxx xxxx xxxx 0111 xxxx :   */
-	/* SHADD8    : cccc 0110 0011 xxxx xxxx xxxx 1001 xxxx :   */
-	/* ???       : cccc 0110 0011 xxxx xxxx xxxx 1011 xxxx :   */
-	/* ???       : cccc 0110 0011 xxxx xxxx xxxx 1101 xxxx :   */
-	/* SHSUB8    : cccc 0110 0011 xxxx xxxx xxxx 1111 xxxx :   */
-	/* ???       : cccc 0110 0100 xxxx xxxx xxxx xxx1 xxxx :   */
-	/* UADD16    : cccc 0110 0101 xxxx xxxx xxxx 0001 xxxx :GE */
-	/* UADDSUBX  : cccc 0110 0101 xxxx xxxx xxxx 0011 xxxx :GE */
-	/* USUBADDX  : cccc 0110 0101 xxxx xxxx xxxx 0101 xxxx :GE */
-	/* USUB16    : cccc 0110 0101 xxxx xxxx xxxx 0111 xxxx :GE */
-	/* UADD8     : cccc 0110 0101 xxxx xxxx xxxx 1001 xxxx :GE */
-	/* ???       : cccc 0110 0101 xxxx xxxx xxxx 1011 xxxx :   */
-	/* ???       : cccc 0110 0101 xxxx xxxx xxxx 1101 xxxx :   */
-	/* USUB8     : cccc 0110 0101 xxxx xxxx xxxx 1111 xxxx :GE */
-	/* UQADD16   : cccc 0110 0110 xxxx xxxx xxxx 0001 xxxx :   */
-	/* UQADDSUBX : cccc 0110 0110 xxxx xxxx xxxx 0011 xxxx :   */
-	/* UQSUBADDX : cccc 0110 0110 xxxx xxxx xxxx 0101 xxxx :   */
-	/* UQSUB16   : cccc 0110 0110 xxxx xxxx xxxx 0111 xxxx :   */
-	/* UQADD8    : cccc 0110 0110 xxxx xxxx xxxx 1001 xxxx :   */
-	/* ???       : cccc 0110 0110 xxxx xxxx xxxx 1011 xxxx :   */
-	/* ???       : cccc 0110 0110 xxxx xxxx xxxx 1101 xxxx :   */
-	/* UQSUB8    : cccc 0110 0110 xxxx xxxx xxxx 1111 xxxx :   */
-	/* UHADD16   : cccc 0110 0111 xxxx xxxx xxxx 0001 xxxx :   */
-	/* UHADDSUBX : cccc 0110 0111 xxxx xxxx xxxx 0011 xxxx :   */
-	/* UHSUBADDX : cccc 0110 0111 xxxx xxxx xxxx 0101 xxxx :   */
-	/* UHSUB16   : cccc 0110 0111 xxxx xxxx xxxx 0111 xxxx :   */
-	/* UHADD8    : cccc 0110 0111 xxxx xxxx xxxx 1001 xxxx :   */
-	/* ???       : cccc 0110 0111 xxxx xxxx xxxx 1011 xxxx :   */
-	/* ???       : cccc 0110 0111 xxxx xxxx xxxx 1101 xxxx :   */
-	/* UHSUB8    : cccc 0110 0111 xxxx xxxx xxxx 1111 xxxx :   */
-	if ((insn & 0x0f800010) == 0x06000010) {
-		if ((insn & 0x00300000) == 0x00000000 ||
-		    (insn & 0x000000e0) == 0x000000a0 ||
-		    (insn & 0x000000e0) == 0x000000c0)
-			return INSN_REJECTED;	/* Unallocated space */
-		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
-	}
-
-	/* PKHBT     : cccc 0110 1000 xxxx xxxx xxxx x001 xxxx :   */
-	/* PKHTB     : cccc 0110 1000 xxxx xxxx xxxx x101 xxxx :   */
-	if ((insn & 0x0ff00030) == 0x06800010)
-		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
-
-	/* SXTAB16   : cccc 0110 1000 xxxx xxxx xxxx 0111 xxxx :   */
-	/* SXTB16    : cccc 0110 1000 1111 xxxx xxxx 0111 xxxx :   */
-	/* ???       : cccc 0110 1001 xxxx xxxx xxxx 0111 xxxx :   */
-	/* SXTAB     : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx :   */
-	/* SXTB      : cccc 0110 1010 1111 xxxx xxxx 0111 xxxx :   */
-	/* SXTAH     : cccc 0110 1011 xxxx xxxx xxxx 0111 xxxx :   */
-	/* SXTH      : cccc 0110 1011 1111 xxxx xxxx 0111 xxxx :   */
-	/* UXTAB16   : cccc 0110 1100 xxxx xxxx xxxx 0111 xxxx :   */
-	/* UXTB16    : cccc 0110 1100 1111 xxxx xxxx 0111 xxxx :   */
-	/* ???       : cccc 0110 1101 xxxx xxxx xxxx 0111 xxxx :   */
-	/* UXTAB     : cccc 0110 1110 xxxx xxxx xxxx 0111 xxxx :   */
-	/* UXTB      : cccc 0110 1110 1111 xxxx xxxx 0111 xxxx :   */
-	/* UXTAH     : cccc 0110 1111 xxxx xxxx xxxx 0111 xxxx :   */
-	/* UXTH      : cccc 0110 1111 1111 xxxx xxxx 0111 xxxx :   */
-	if ((insn & 0x0f8000f0) == 0x06800070) {
-		if ((insn & 0x00300000) == 0x00100000)
-			return INSN_REJECTED;	/* Unallocated space */
-
-		if ((insn & 0x000f0000) == 0x000f0000)
-			return prep_emulate_rd12rm0(insn, asi);
-		else
-			return prep_emulate_rd12rn16rm0_wflags(insn, asi);
-	}
-
-	/* Other instruction encodings aren't yet defined */
-	return INSN_REJECTED;
-}
-
-static enum kprobe_insn __kprobes
-space_cccc_0111__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	/* Undef : cccc 0111 1111 xxxx xxxx xxxx 1111 xxxx */
-	if ((insn & 0x0ff000f0) == 0x03f000f0)
-		return INSN_REJECTED;
-
-	/* SMLALD : cccc 0111 0100 xxxx xxxx xxxx 00x1 xxxx */
-	/* SMLSLD : cccc 0111 0100 xxxx xxxx xxxx 01x1 xxxx */
-	if ((insn & 0x0ff00090) == 0x07400010)
-		return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi);
-
-	/* SMLAD  : cccc 0111 0000 xxxx xxxx xxxx 00x1 xxxx :Q */
-	/* SMUAD  : cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx :Q */
-	/* SMLSD  : cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx :Q */
-	/* SMUSD  : cccc 0111 0000 xxxx 1111 xxxx 01x1 xxxx :  */
-	/* SMMLA  : cccc 0111 0101 xxxx xxxx xxxx 00x1 xxxx :  */
-	/* SMMUL  : cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx :  */
-	/* USADA8 : cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx :  */
-	/* USAD8  : cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx :  */
-	if ((insn & 0x0ff00090) == 0x07000010 ||
-	    (insn & 0x0ff000d0) == 0x07500010 ||
-	    (insn & 0x0ff000f0) == 0x07800010) {
-
-		if ((insn & 0x0000f000) == 0x0000f000)
-			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
-		else
-			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
-	}
-
-	/* SMMLS  : cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx :  */
-	if ((insn & 0x0ff000d0) == 0x075000d0)
-		return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
-
-	/* SBFX   : cccc 0111 101x xxxx xxxx xxxx x101 xxxx :  */
-	/* UBFX   : cccc 0111 111x xxxx xxxx xxxx x101 xxxx :  */
-	if ((insn & 0x0fa00070) == 0x07a00050)
-		return prep_emulate_rd12rm0(insn, asi);
-
-	/* BFI    : cccc 0111 110x xxxx xxxx xxxx x001 xxxx :  */
-	/* BFC    : cccc 0111 110x xxxx xxxx xxxx x001 1111 :  */
-	if ((insn & 0x0fe00070) == 0x07c00010) {
-
-		if ((insn & 0x0000000f) == 0x0000000f)
-			return prep_emulate_rd12_modify(insn, asi);
-		else
-			return prep_emulate_rd12rn0_modify(insn, asi);
-	}
-
-	return INSN_REJECTED;
-}
-
-static enum kprobe_insn __kprobes
-space_cccc_01xx(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	/* LDR   : cccc 01xx x0x1 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRB  : cccc 01xx x1x1 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRBT : cccc 01x0 x111 xxxx xxxx xxxx xxxx xxxx */
-	/* LDRT  : cccc 01x0 x011 xxxx xxxx xxxx xxxx xxxx */
-	/* STR   : cccc 01xx x0x0 xxxx xxxx xxxx xxxx xxxx */
-	/* STRB  : cccc 01xx x1x0 xxxx xxxx xxxx xxxx xxxx */
-	/* STRBT : cccc 01x0 x110 xxxx xxxx xxxx xxxx xxxx */
-	/* STRT  : cccc 01x0 x010 xxxx xxxx xxxx xxxx xxxx */
-
-	if ((insn & 0x00500000) == 0x00500000 && is_r15(insn, 12))
-		return INSN_REJECTED;	/* LDRB into PC */
-
-	return prep_emulate_ldr_str(insn, asi);
-}
-
-static enum kprobe_insn __kprobes
-space_cccc_100x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	/* LDM(2) : cccc 100x x101 xxxx 0xxx xxxx xxxx xxxx */
-	/* LDM(3) : cccc 100x x1x1 xxxx 1xxx xxxx xxxx xxxx */
-	if ((insn & 0x0e708000) == 0x85000000 ||
-	    (insn & 0x0e508000) == 0x85010000)
-		return INSN_REJECTED;
-
-	/* LDM(1) : cccc 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
-	/* STM(1) : cccc 100x x0x0 xxxx xxxx xxxx xxxx xxxx */
-	asi->insn_handler = ((insn & 0x108000) == 0x008000) ? /* STM & R15 */
-				simulate_stm1_pc : simulate_ldm1stm1;
-	return INSN_GOOD_NO_SLOT;
-}
-
-static enum kprobe_insn __kprobes
-space_cccc_101x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	/* B  : cccc 1010 xxxx xxxx xxxx xxxx xxxx xxxx */
-	/* BL : cccc 1011 xxxx xxxx xxxx xxxx xxxx xxxx */
-	asi->insn_handler = simulate_bbl;
-	return INSN_GOOD_NO_SLOT;
-}
-
-static enum kprobe_insn __kprobes
-space_cccc_11xx(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	/* Coprocessor instructions... */
-	/* MCRR : cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */
-	/* MRRC : cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */
-	/* LDC  : cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
-	/* STC  : cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
-	/* CDP  : cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
-	/* MCR  : cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
-	/* MRC  : cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
-
-	/* SVC  : cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */
-
-	return INSN_REJECTED;
-}
-
-static unsigned long __kprobes __check_eq(unsigned long cpsr)
-{
-	return cpsr & PSR_Z_BIT;
-}
-
-static unsigned long __kprobes __check_ne(unsigned long cpsr)
-{
-	return (~cpsr) & PSR_Z_BIT;
-}
-
-static unsigned long __kprobes __check_cs(unsigned long cpsr)
-{
-	return cpsr & PSR_C_BIT;
-}
-
-static unsigned long __kprobes __check_cc(unsigned long cpsr)
-{
-	return (~cpsr) & PSR_C_BIT;
-}
-
-static unsigned long __kprobes __check_mi(unsigned long cpsr)
-{
-	return cpsr & PSR_N_BIT;
-}
-
-static unsigned long __kprobes __check_pl(unsigned long cpsr)
-{
-	return (~cpsr) & PSR_N_BIT;
-}
-
-static unsigned long __kprobes __check_vs(unsigned long cpsr)
-{
-	return cpsr & PSR_V_BIT;
-}
-
-static unsigned long __kprobes __check_vc(unsigned long cpsr)
-{
-	return (~cpsr) & PSR_V_BIT;
-}
-
-static unsigned long __kprobes __check_hi(unsigned long cpsr)
-{
-	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
-	return cpsr & PSR_C_BIT;
-}
-
-static unsigned long __kprobes __check_ls(unsigned long cpsr)
-{
-	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
-	return (~cpsr) & PSR_C_BIT;
-}
-
-static unsigned long __kprobes __check_ge(unsigned long cpsr)
-{
-	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
-	return (~cpsr) & PSR_N_BIT;
-}
-
-static unsigned long __kprobes __check_lt(unsigned long cpsr)
-{
-	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
-	return cpsr & PSR_N_BIT;
-}
-
-static unsigned long __kprobes __check_gt(unsigned long cpsr)
-{
-	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
-	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
-	return (~temp) & PSR_N_BIT;
-}
-
-static unsigned long __kprobes __check_le(unsigned long cpsr)
-{
-	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
-	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
-	return temp & PSR_N_BIT;
-}
-
-static unsigned long __kprobes __check_al(unsigned long cpsr)
-{
-	return true;
-}
-
-static kprobe_check_cc * const condition_checks[16] = {
-	&__check_eq, &__check_ne, &__check_cs, &__check_cc,
-	&__check_mi, &__check_pl, &__check_vs, &__check_vc,
-	&__check_hi, &__check_ls, &__check_ge, &__check_lt,
-	&__check_gt, &__check_le, &__check_al, &__check_al
-};
-
-/* Return:
- *   INSN_REJECTED     If instruction is one not allowed to kprobe,
- *   INSN_GOOD         If instruction is supported and uses instruction slot,
- *   INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
- *
- * For instructions we don't want to kprobe (INSN_REJECTED return result):
- *   These are generally ones that modify the processor state making
- *   them "hard" to simulate such as switches processor modes or
- *   make accesses in alternate modes.  Any of these could be simulated
- *   if the work was put into it, but low return considering they
- *   should also be very rare.
- */
-enum kprobe_insn __kprobes
-arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	asi->insn_check_cc = condition_checks[insn>>28];
-	asi->insn[1] = KPROBE_RETURN_INSTRUCTION;
-
-	if ((insn & 0xf0000000) == 0xf0000000)
-
-		return space_1111(insn, asi);
-
-	else if ((insn & 0x0e000000) == 0x00000000)
-
-		return space_cccc_000x(insn, asi);
-
-	else if ((insn & 0x0e000000) == 0x02000000)
-
-		return space_cccc_001x(insn, asi);
-
-	else if ((insn & 0x0f000010) == 0x06000010)
-
-		return space_cccc_0110__1(insn, asi);
-
-	else if ((insn & 0x0f000010) == 0x07000010)
-
-		return space_cccc_0111__1(insn, asi);
-
-	else if ((insn & 0x0c000000) == 0x04000000)
-
-		return space_cccc_01xx(insn, asi);
-
-	else if ((insn & 0x0e000000) == 0x08000000)
-
-		return space_cccc_100x(insn, asi);
-
-	else if ((insn & 0x0e000000) == 0x0a000000)
-
-		return space_cccc_101x(insn, asi);
-
-	return space_cccc_11xx(insn, asi);
-}
-
-void __init arm_kprobe_decode_init(void)
-{
-	find_str_pc_offset();
-}

+ 1462 - 0
arch/arm/kernel/kprobes-thumb.c

@@ -0,0 +1,1462 @@
+/*
+ * arch/arm/kernel/kprobes-thumb.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+
+#include "kprobes.h"
+
+
+/*
+ * True if current instruction is in an IT block.
+ */
+#define in_it_block(cpsr)	((cpsr & 0x06000c00) != 0x00000000)
+
+/*
+ * Return the condition code to check for the currently executing instruction.
+ * This is in ITSTATE<7:4> which is in CPSR<15:12> but is only valid if
+ * in_it_block returns true.
+ */
+#define current_cond(cpsr)	((cpsr >> 12) & 0xf)
+
+/*
+ * Return the PC value for a probe in thumb code.
+ * This is the address of the probed instruction plus 4.
+ * We subtract one because the address will have bit zero set to indicate
+ * a pointer to thumb code.
+ */
+static inline unsigned long __kprobes thumb_probe_pc(struct kprobe *p)
+{
+	return (unsigned long)p->addr - 1 + 4;
+}
+
+static void __kprobes
+t32_simulate_table_branch(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long pc = thumb_probe_pc(p);
+	int rn = (insn >> 16) & 0xf;
+	int rm = insn & 0xf;
+
+	unsigned long rnv = (rn == 15) ? pc : regs->uregs[rn];
+	unsigned long rmv = regs->uregs[rm];
+	unsigned int halfwords;
+
+	if (insn & 0x10) /* TBH */
+		halfwords = ((u16 *)rnv)[rmv];
+	else /* TBB */
+		halfwords = ((u8 *)rnv)[rmv];
+
+	regs->ARM_pc = pc + 2 * halfwords;
+}
+
+static void __kprobes
+t32_simulate_mrs(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rd = (insn >> 8) & 0xf;
+	unsigned long mask = 0xf8ff03df; /* Mask out execution state */
+	regs->uregs[rd] = regs->ARM_cpsr & mask;
+}
+
+static void __kprobes
+t32_simulate_cond_branch(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long pc = thumb_probe_pc(p);
+
+	long offset = insn & 0x7ff;		/* imm11 */
+	offset += (insn & 0x003f0000) >> 5;	/* imm6 */
+	offset += (insn & 0x00002000) << 4;	/* J1 */
+	offset += (insn & 0x00000800) << 7;	/* J2 */
+	offset -= (insn & 0x04000000) >> 7;	/* Apply sign bit */
+
+	regs->ARM_pc = pc + (offset * 2);
+}
+
+static enum kprobe_insn __kprobes
+t32_decode_cond_branch(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	int cc = (insn >> 22) & 0xf;
+	asi->insn_check_cc = kprobe_condition_checks[cc];
+	asi->insn_handler = t32_simulate_cond_branch;
+	return INSN_GOOD_NO_SLOT;
+}
+
+static void __kprobes
+t32_simulate_branch(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long pc = thumb_probe_pc(p);
+
+	long offset = insn & 0x7ff;		/* imm11 */
+	offset += (insn & 0x03ff0000) >> 5;	/* imm10 */
+	offset += (insn & 0x00002000) << 9;	/* J1 */
+	offset += (insn & 0x00000800) << 10;	/* J2 */
+	if (insn & 0x04000000)
+		offset -= 0x00800000; /* Apply sign bit */
+	else
+		offset ^= 0x00600000; /* Invert J1 and J2 */
+
+	if (insn & (1 << 14)) {
+		/* BL or BLX */
+		regs->ARM_lr = (unsigned long)p->addr + 4;
+		if (!(insn & (1 << 12))) {
+			/* BLX so switch to ARM mode */
+			regs->ARM_cpsr &= ~PSR_T_BIT;
+			pc &= ~3;
+		}
+	}
+
+	regs->ARM_pc = pc + (offset * 2);
+}
+
+static void __kprobes
+t32_simulate_ldr_literal(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long addr = thumb_probe_pc(p) & ~3;
+	int rt = (insn >> 12) & 0xf;
+	unsigned long rtv;
+
+	long offset = insn & 0xfff;
+	if (insn & 0x00800000)
+		addr += offset;
+	else
+		addr -= offset;
+
+	if (insn & 0x00400000) {
+		/* LDR */
+		rtv = *(unsigned long *)addr;
+		if (rt == 15) {
+			bx_write_pc(rtv, regs);
+			return;
+		}
+	} else if (insn & 0x00200000) {
+		/* LDRH */
+		if (insn & 0x01000000)
+			rtv = *(s16 *)addr;
+		else
+			rtv = *(u16 *)addr;
+	} else {
+		/* LDRB */
+		if (insn & 0x01000000)
+			rtv = *(s8 *)addr;
+		else
+			rtv = *(u8 *)addr;
+	}
+
+	regs->uregs[rt] = rtv;
+}
+
+static enum kprobe_insn __kprobes
+t32_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	enum kprobe_insn ret = kprobe_decode_ldmstm(insn, asi);
+
+	/* Fixup modified instruction to have halfwords in correct order...*/
+	insn = asi->insn[0];
+	((u16 *)asi->insn)[0] = insn >> 16;
+	((u16 *)asi->insn)[1] = insn & 0xffff;
+
+	return ret;
+}
+
+static void __kprobes
+t32_emulate_ldrdstrd(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long pc = thumb_probe_pc(p) & ~3;
+	int rt1 = (insn >> 12) & 0xf;
+	int rt2 = (insn >> 8) & 0xf;
+	int rn = (insn >> 16) & 0xf;
+
+	register unsigned long rt1v asm("r0") = regs->uregs[rt1];
+	register unsigned long rt2v asm("r1") = regs->uregs[rt2];
+	register unsigned long rnv asm("r2") = (rn == 15) ? pc
+							  : regs->uregs[rn];
+
+	__asm__ __volatile__ (
+		"blx    %[fn]"
+		: "=r" (rt1v), "=r" (rt2v), "=r" (rnv)
+		: "0" (rt1v), "1" (rt2v), "2" (rnv), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	if (rn != 15)
+		regs->uregs[rn] = rnv; /* Writeback base register */
+	regs->uregs[rt1] = rt1v;
+	regs->uregs[rt2] = rt2v;
+}
+
+static void __kprobes
+t32_emulate_ldrstr(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rt = (insn >> 12) & 0xf;
+	int rn = (insn >> 16) & 0xf;
+	int rm = insn & 0xf;
+
+	register unsigned long rtv asm("r0") = regs->uregs[rt];
+	register unsigned long rnv asm("r2") = regs->uregs[rn];
+	register unsigned long rmv asm("r3") = regs->uregs[rm];
+
+	__asm__ __volatile__ (
+		"blx    %[fn]"
+		: "=r" (rtv), "=r" (rnv)
+		: "0" (rtv), "1" (rnv), "r" (rmv), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	regs->uregs[rn] = rnv; /* Writeback base register */
+	if (rt == 15) /* Can't be true for a STR as they aren't allowed */
+		bx_write_pc(rtv, regs);
+	else
+		regs->uregs[rt] = rtv;
+}
+
+static void __kprobes
+t32_emulate_rd8rn16rm0_rwflags(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rd = (insn >> 8) & 0xf;
+	int rn = (insn >> 16) & 0xf;
+	int rm = insn & 0xf;
+
+	register unsigned long rdv asm("r1") = regs->uregs[rd];
+	register unsigned long rnv asm("r2") = regs->uregs[rn];
+	register unsigned long rmv asm("r3") = regs->uregs[rm];
+	unsigned long cpsr = regs->ARM_cpsr;
+
+	__asm__ __volatile__ (
+		"msr	cpsr_fs, %[cpsr]	\n\t"
+		"blx    %[fn]			\n\t"
+		"mrs	%[cpsr], cpsr		\n\t"
+		: "=r" (rdv), [cpsr] "=r" (cpsr)
+		: "0" (rdv), "r" (rnv), "r" (rmv),
+		  "1" (cpsr), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	regs->uregs[rd] = rdv;
+	regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK);
+}
+
+static void __kprobes
+t32_emulate_rd8pc16_noflags(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long pc = thumb_probe_pc(p);
+	int rd = (insn >> 8) & 0xf;
+
+	register unsigned long rdv asm("r1") = regs->uregs[rd];
+	register unsigned long rnv asm("r2") = pc & ~3;
+
+	__asm__ __volatile__ (
+		"blx    %[fn]"
+		: "=r" (rdv)
+		: "0" (rdv), "r" (rnv), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	regs->uregs[rd] = rdv;
+}
+
+static void __kprobes
+t32_emulate_rd8rn16_noflags(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rd = (insn >> 8) & 0xf;
+	int rn = (insn >> 16) & 0xf;
+
+	register unsigned long rdv asm("r1") = regs->uregs[rd];
+	register unsigned long rnv asm("r2") = regs->uregs[rn];
+
+	__asm__ __volatile__ (
+		"blx    %[fn]"
+		: "=r" (rdv)
+		: "0" (rdv), "r" (rnv), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	regs->uregs[rd] = rdv;
+}
+
+static void __kprobes
+t32_emulate_rdlo12rdhi8rn16rm0_noflags(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rdlo = (insn >> 12) & 0xf;
+	int rdhi = (insn >> 8) & 0xf;
+	int rn = (insn >> 16) & 0xf;
+	int rm = insn & 0xf;
+
+	register unsigned long rdlov asm("r0") = regs->uregs[rdlo];
+	register unsigned long rdhiv asm("r1") = regs->uregs[rdhi];
+	register unsigned long rnv asm("r2") = regs->uregs[rn];
+	register unsigned long rmv asm("r3") = regs->uregs[rm];
+
+	__asm__ __volatile__ (
+		"blx    %[fn]"
+		: "=r" (rdlov), "=r" (rdhiv)
+		: "0" (rdlov), "1" (rdhiv), "r" (rnv), "r" (rmv),
+		  [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	regs->uregs[rdlo] = rdlov;
+	regs->uregs[rdhi] = rdhiv;
+}
+
+/* These emulation encodings are functionally equivalent... */
+#define t32_emulate_rd8rn16rm0ra12_noflags \
+		t32_emulate_rdlo12rdhi8rn16rm0_noflags
+
+static const union decode_item t32_table_1110_100x_x0xx[] = {
+	/* Load/store multiple instructions */
+
+	/* Rn is PC		1110 100x x0xx 1111 xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfe4f0000, 0xe80f0000),
+
+	/* SRS			1110 1000 00x0 xxxx xxxx xxxx xxxx xxxx */
+	/* RFE			1110 1000 00x1 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xffc00000, 0xe8000000),
+	/* SRS			1110 1001 10x0 xxxx xxxx xxxx xxxx xxxx */
+	/* RFE			1110 1001 10x1 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xffc00000, 0xe9800000),
+
+	/* STM Rn, {...pc}	1110 100x x0x0 xxxx 1xxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfe508000, 0xe8008000),
+	/* LDM Rn, {...lr,pc}	1110 100x x0x1 xxxx 11xx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfe50c000, 0xe810c000),
+	/* LDM/STM Rn, {...sp}	1110 100x x0xx xxxx xx1x xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfe402000, 0xe8002000),
+
+	/* STMIA		1110 1000 10x0 xxxx xxxx xxxx xxxx xxxx */
+	/* LDMIA		1110 1000 10x1 xxxx xxxx xxxx xxxx xxxx */
+	/* STMDB		1110 1001 00x0 xxxx xxxx xxxx xxxx xxxx */
+	/* LDMDB		1110 1001 00x1 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_CUSTOM	(0xfe400000, 0xe8000000, t32_decode_ldmstm),
+
+	DECODE_END
+};
+
+static const union decode_item t32_table_1110_100x_x1xx[] = {
+	/* Load/store dual, load/store exclusive, table branch */
+
+	/* STRD (immediate)	1110 1000 x110 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRD (immediate)	1110 1000 x111 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_OR	(0xff600000, 0xe8600000),
+	/* STRD (immediate)	1110 1001 x1x0 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRD (immediate)	1110 1001 x1x1 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xff400000, 0xe9400000, t32_emulate_ldrdstrd,
+						 REGS(NOPCWB, NOSPPC, NOSPPC, 0, 0)),
+
+	/* TBB			1110 1000 1101 xxxx xxxx xxxx 0000 xxxx */
+	/* TBH			1110 1000 1101 xxxx xxxx xxxx 0001 xxxx */
+	DECODE_SIMULATEX(0xfff000e0, 0xe8d00000, t32_simulate_table_branch,
+						 REGS(NOSP, 0, 0, 0, NOSPPC)),
+
+	/* STREX		1110 1000 0100 xxxx xxxx xxxx xxxx xxxx */
+	/* LDREX		1110 1000 0101 xxxx xxxx xxxx xxxx xxxx */
+	/* STREXB		1110 1000 1100 xxxx xxxx xxxx 0100 xxxx */
+	/* STREXH		1110 1000 1100 xxxx xxxx xxxx 0101 xxxx */
+	/* STREXD		1110 1000 1100 xxxx xxxx xxxx 0111 xxxx */
+	/* LDREXB		1110 1000 1101 xxxx xxxx xxxx 0100 xxxx */
+	/* LDREXH		1110 1000 1101 xxxx xxxx xxxx 0101 xxxx */
+	/* LDREXD		1110 1000 1101 xxxx xxxx xxxx 0111 xxxx */
+	/* And unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item t32_table_1110_101x[] = {
+	/* Data-processing (shifted register)				*/
+
+	/* TST			1110 1010 0001 xxxx xxxx 1111 xxxx xxxx */
+	/* TEQ			1110 1010 1001 xxxx xxxx 1111 xxxx xxxx */
+	DECODE_EMULATEX	(0xff700f00, 0xea100f00, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(NOSPPC, 0, 0, 0, NOSPPC)),
+
+	/* CMN			1110 1011 0001 xxxx xxxx 1111 xxxx xxxx */
+	DECODE_OR	(0xfff00f00, 0xeb100f00),
+	/* CMP			1110 1011 1011 xxxx xxxx 1111 xxxx xxxx */
+	DECODE_EMULATEX	(0xfff00f00, 0xebb00f00, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(NOPC, 0, 0, 0, NOSPPC)),
+
+	/* MOV			1110 1010 010x 1111 xxxx xxxx xxxx xxxx */
+	/* MVN			1110 1010 011x 1111 xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xffcf0000, 0xea4f0000, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(0, 0, NOSPPC, 0, NOSPPC)),
+
+	/* ???			1110 1010 101x xxxx xxxx xxxx xxxx xxxx */
+	/* ???			1110 1010 111x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xffa00000, 0xeaa00000),
+	/* ???			1110 1011 001x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xffe00000, 0xeb200000),
+	/* ???			1110 1011 100x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xffe00000, 0xeb800000),
+	/* ???			1110 1011 111x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xffe00000, 0xebe00000),
+
+	/* ADD/SUB SP, SP, Rm, LSL #0..3				*/
+	/*			1110 1011 x0xx 1101 x000 1101 xx00 xxxx */
+	DECODE_EMULATEX	(0xff4f7f30, 0xeb0d0d00, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(SP, 0, SP, 0, NOSPPC)),
+
+	/* ADD/SUB SP, SP, Rm, shift					*/
+	/*			1110 1011 x0xx 1101 xxxx 1101 xxxx xxxx */
+	DECODE_REJECT	(0xff4f0f00, 0xeb0d0d00),
+
+	/* ADD/SUB Rd, SP, Rm, shift					*/
+	/*			1110 1011 x0xx 1101 xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xff4f0000, 0xeb0d0000, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(SP, 0, NOPC, 0, NOSPPC)),
+
+	/* AND			1110 1010 000x xxxx xxxx xxxx xxxx xxxx */
+	/* BIC			1110 1010 001x xxxx xxxx xxxx xxxx xxxx */
+	/* ORR			1110 1010 010x xxxx xxxx xxxx xxxx xxxx */
+	/* ORN			1110 1010 011x xxxx xxxx xxxx xxxx xxxx */
+	/* EOR			1110 1010 100x xxxx xxxx xxxx xxxx xxxx */
+	/* PKH			1110 1010 110x xxxx xxxx xxxx xxxx xxxx */
+	/* ADD			1110 1011 000x xxxx xxxx xxxx xxxx xxxx */
+	/* ADC			1110 1011 010x xxxx xxxx xxxx xxxx xxxx */
+	/* SBC			1110 1011 011x xxxx xxxx xxxx xxxx xxxx */
+	/* SUB			1110 1011 101x xxxx xxxx xxxx xxxx xxxx */
+	/* RSB			1110 1011 110x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfe000000, 0xea000000, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(NOSPPC, 0, NOSPPC, 0, NOSPPC)),
+
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_0x0x___0[] = {
+	/* Data-processing (modified immediate)				*/
+
+	/* TST			1111 0x00 0001 xxxx 0xxx 1111 xxxx xxxx */
+	/* TEQ			1111 0x00 1001 xxxx 0xxx 1111 xxxx xxxx */
+	DECODE_EMULATEX	(0xfb708f00, 0xf0100f00, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(NOSPPC, 0, 0, 0, 0)),
+
+	/* CMN			1111 0x01 0001 xxxx 0xxx 1111 xxxx xxxx */
+	DECODE_OR	(0xfbf08f00, 0xf1100f00),
+	/* CMP			1111 0x01 1011 xxxx 0xxx 1111 xxxx xxxx */
+	DECODE_EMULATEX	(0xfbf08f00, 0xf1b00f00, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(NOPC, 0, 0, 0, 0)),
+
+	/* MOV			1111 0x00 010x 1111 0xxx xxxx xxxx xxxx */
+	/* MVN			1111 0x00 011x 1111 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfbcf8000, 0xf04f0000, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(0, 0, NOSPPC, 0, 0)),
+
+	/* ???			1111 0x00 101x xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfbe08000, 0xf0a00000),
+	/* ???			1111 0x00 110x xxxx 0xxx xxxx xxxx xxxx */
+	/* ???			1111 0x00 111x xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfbc08000, 0xf0c00000),
+	/* ???			1111 0x01 001x xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfbe08000, 0xf1200000),
+	/* ???			1111 0x01 100x xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfbe08000, 0xf1800000),
+	/* ???			1111 0x01 111x xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfbe08000, 0xf1e00000),
+
+	/* ADD Rd, SP, #imm	1111 0x01 000x 1101 0xxx xxxx xxxx xxxx */
+	/* SUB Rd, SP, #imm	1111 0x01 101x 1101 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfb4f8000, 0xf10d0000, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(SP, 0, NOPC, 0, 0)),
+
+	/* AND			1111 0x00 000x xxxx 0xxx xxxx xxxx xxxx */
+	/* BIC			1111 0x00 001x xxxx 0xxx xxxx xxxx xxxx */
+	/* ORR			1111 0x00 010x xxxx 0xxx xxxx xxxx xxxx */
+	/* ORN			1111 0x00 011x xxxx 0xxx xxxx xxxx xxxx */
+	/* EOR			1111 0x00 100x xxxx 0xxx xxxx xxxx xxxx */
+	/* ADD			1111 0x01 000x xxxx 0xxx xxxx xxxx xxxx */
+	/* ADC			1111 0x01 010x xxxx 0xxx xxxx xxxx xxxx */
+	/* SBC			1111 0x01 011x xxxx 0xxx xxxx xxxx xxxx */
+	/* SUB			1111 0x01 101x xxxx 0xxx xxxx xxxx xxxx */
+	/* RSB			1111 0x01 110x xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfa008000, 0xf0000000, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(NOSPPC, 0, NOSPPC, 0, 0)),
+
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_0x1x___0[] = {
+	/* Data-processing (plain binary immediate)			*/
+
+	/* ADDW Rd, PC, #imm	1111 0x10 0000 1111 0xxx xxxx xxxx xxxx */
+	DECODE_OR	(0xfbff8000, 0xf20f0000),
+	/* SUBW	Rd, PC, #imm	1111 0x10 1010 1111 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfbff8000, 0xf2af0000, t32_emulate_rd8pc16_noflags,
+						 REGS(PC, 0, NOSPPC, 0, 0)),
+
+	/* ADDW SP, SP, #imm	1111 0x10 0000 1101 0xxx 1101 xxxx xxxx */
+	DECODE_OR	(0xfbff8f00, 0xf20d0d00),
+	/* SUBW	SP, SP, #imm	1111 0x10 1010 1101 0xxx 1101 xxxx xxxx */
+	DECODE_EMULATEX	(0xfbff8f00, 0xf2ad0d00, t32_emulate_rd8rn16_noflags,
+						 REGS(SP, 0, SP, 0, 0)),
+
+	/* ADDW			1111 0x10 0000 xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_OR	(0xfbf08000, 0xf2000000),
+	/* SUBW			1111 0x10 1010 xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfbf08000, 0xf2a00000, t32_emulate_rd8rn16_noflags,
+						 REGS(NOPCX, 0, NOSPPC, 0, 0)),
+
+	/* MOVW			1111 0x10 0100 xxxx 0xxx xxxx xxxx xxxx */
+	/* MOVT			1111 0x10 1100 xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfb708000, 0xf2400000, t32_emulate_rd8rn16_noflags,
+						 REGS(0, 0, NOSPPC, 0, 0)),
+
+	/* SSAT16		1111 0x11 0010 xxxx 0000 xxxx 00xx xxxx */
+	/* SSAT			1111 0x11 00x0 xxxx 0xxx xxxx xxxx xxxx */
+	/* USAT16		1111 0x11 1010 xxxx 0000 xxxx 00xx xxxx */
+	/* USAT			1111 0x11 10x0 xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfb508000, 0xf3000000, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(NOSPPC, 0, NOSPPC, 0, 0)),
+
+	/* SFBX			1111 0x11 0100 xxxx 0xxx xxxx xxxx xxxx */
+	/* UFBX			1111 0x11 1100 xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfb708000, 0xf3400000, t32_emulate_rd8rn16_noflags,
+						 REGS(NOSPPC, 0, NOSPPC, 0, 0)),
+
+	/* BFC			1111 0x11 0110 1111 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfbff8000, 0xf36f0000, t32_emulate_rd8rn16_noflags,
+						 REGS(0, 0, NOSPPC, 0, 0)),
+
+	/* BFI			1111 0x11 0110 xxxx 0xxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfbf08000, 0xf3600000, t32_emulate_rd8rn16_noflags,
+						 REGS(NOSPPCX, 0, NOSPPC, 0, 0)),
+
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_0xxx___1[] = {
+	/* Branches and miscellaneous control				*/
+
+	/* YIELD		1111 0011 1010 xxxx 10x0 x000 0000 0001 */
+	DECODE_OR	(0xfff0d7ff, 0xf3a08001),
+	/* SEV			1111 0011 1010 xxxx 10x0 x000 0000 0100 */
+	DECODE_EMULATE	(0xfff0d7ff, 0xf3a08004, kprobe_emulate_none),
+	/* NOP			1111 0011 1010 xxxx 10x0 x000 0000 0000 */
+	/* WFE			1111 0011 1010 xxxx 10x0 x000 0000 0010 */
+	/* WFI			1111 0011 1010 xxxx 10x0 x000 0000 0011 */
+	DECODE_SIMULATE	(0xfff0d7fc, 0xf3a08000, kprobe_simulate_nop),
+
+	/* MRS Rd, CPSR		1111 0011 1110 xxxx 10x0 xxxx xxxx xxxx */
+	DECODE_SIMULATEX(0xfff0d000, 0xf3e08000, t32_simulate_mrs,
+						 REGS(0, 0, NOSPPC, 0, 0)),
+
+	/*
+	 * Unsupported instructions
+	 *			1111 0x11 1xxx xxxx 10x0 xxxx xxxx xxxx
+	 *
+	 * MSR			1111 0011 100x xxxx 10x0 xxxx xxxx xxxx
+	 * DBG hint		1111 0011 1010 xxxx 10x0 x000 1111 xxxx
+	 * Unallocated hints	1111 0011 1010 xxxx 10x0 x000 xxxx xxxx
+	 * CPS			1111 0011 1010 xxxx 10x0 xxxx xxxx xxxx
+	 * CLREX/DSB/DMB/ISB	1111 0011 1011 xxxx 10x0 xxxx xxxx xxxx
+	 * BXJ			1111 0011 1100 xxxx 10x0 xxxx xxxx xxxx
+	 * SUBS PC,LR,#<imm8>	1111 0011 1101 xxxx 10x0 xxxx xxxx xxxx
+	 * MRS Rd, SPSR		1111 0011 1111 xxxx 10x0 xxxx xxxx xxxx
+	 * SMC			1111 0111 1111 xxxx 1000 xxxx xxxx xxxx
+	 * UNDEFINED		1111 0111 1111 xxxx 1010 xxxx xxxx xxxx
+	 * ???			1111 0111 1xxx xxxx 1010 xxxx xxxx xxxx
+	 */
+	DECODE_REJECT	(0xfb80d000, 0xf3808000),
+
+	/* Bcc			1111 0xxx xxxx xxxx 10x0 xxxx xxxx xxxx */
+	DECODE_CUSTOM	(0xf800d000, 0xf0008000, t32_decode_cond_branch),
+
+	/* BLX			1111 0xxx xxxx xxxx 11x0 xxxx xxxx xxx0 */
+	DECODE_OR	(0xf800d001, 0xf000c000),
+	/* B			1111 0xxx xxxx xxxx 10x1 xxxx xxxx xxxx */
+	/* BL			1111 0xxx xxxx xxxx 11x1 xxxx xxxx xxxx */
+	DECODE_SIMULATE	(0xf8009000, 0xf0009000, t32_simulate_branch),
+
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_100x_x0x1__1111[] = {
+	/* Memory hints							*/
+
+	/* PLD (literal)	1111 1000 x001 1111 1111 xxxx xxxx xxxx */
+	/* PLI (literal)	1111 1001 x001 1111 1111 xxxx xxxx xxxx */
+	DECODE_SIMULATE	(0xfe7ff000, 0xf81ff000, kprobe_simulate_nop),
+
+	/* PLD{W} (immediate)	1111 1000 10x1 xxxx 1111 xxxx xxxx xxxx */
+	DECODE_OR	(0xffd0f000, 0xf890f000),
+	/* PLD{W} (immediate)	1111 1000 00x1 xxxx 1111 1100 xxxx xxxx */
+	DECODE_OR	(0xffd0ff00, 0xf810fc00),
+	/* PLI (immediate)	1111 1001 1001 xxxx 1111 xxxx xxxx xxxx */
+	DECODE_OR	(0xfff0f000, 0xf990f000),
+	/* PLI (immediate)	1111 1001 0001 xxxx 1111 1100 xxxx xxxx */
+	DECODE_SIMULATEX(0xfff0ff00, 0xf910fc00, kprobe_simulate_nop,
+						 REGS(NOPCX, 0, 0, 0, 0)),
+
+	/* PLD{W} (register)	1111 1000 00x1 xxxx 1111 0000 00xx xxxx */
+	DECODE_OR	(0xffd0ffc0, 0xf810f000),
+	/* PLI (register)	1111 1001 0001 xxxx 1111 0000 00xx xxxx */
+	DECODE_SIMULATEX(0xfff0ffc0, 0xf910f000, kprobe_simulate_nop,
+						 REGS(NOPCX, 0, 0, 0, NOSPPC)),
+
+	/* Other unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_100x[] = {
+	/* Store/Load single data item					*/
+
+	/* ???			1111 100x x11x xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfe600000, 0xf8600000),
+
+	/* ???			1111 1001 0101 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xfff00000, 0xf9500000),
+
+	/* ???			1111 100x 0xxx xxxx xxxx 10x0 xxxx xxxx */
+	DECODE_REJECT	(0xfe800d00, 0xf8000800),
+
+	/* STRBT		1111 1000 0000 xxxx xxxx 1110 xxxx xxxx */
+	/* STRHT		1111 1000 0010 xxxx xxxx 1110 xxxx xxxx */
+	/* STRT			1111 1000 0100 xxxx xxxx 1110 xxxx xxxx */
+	/* LDRBT		1111 1000 0001 xxxx xxxx 1110 xxxx xxxx */
+	/* LDRSBT		1111 1001 0001 xxxx xxxx 1110 xxxx xxxx */
+	/* LDRHT		1111 1000 0011 xxxx xxxx 1110 xxxx xxxx */
+	/* LDRSHT		1111 1001 0011 xxxx xxxx 1110 xxxx xxxx */
+	/* LDRT			1111 1000 0101 xxxx xxxx 1110 xxxx xxxx */
+	DECODE_REJECT	(0xfe800f00, 0xf8000e00),
+
+	/* STR{,B,H} Rn,[PC...]	1111 1000 xxx0 1111 xxxx xxxx xxxx xxxx */
+	DECODE_REJECT	(0xff1f0000, 0xf80f0000),
+
+	/* STR{,B,H} PC,[Rn...]	1111 1000 xxx0 xxxx 1111 xxxx xxxx xxxx */
+	DECODE_REJECT	(0xff10f000, 0xf800f000),
+
+	/* LDR (literal)	1111 1000 x101 1111 xxxx xxxx xxxx xxxx */
+	DECODE_SIMULATEX(0xff7f0000, 0xf85f0000, t32_simulate_ldr_literal,
+						 REGS(PC, ANY, 0, 0, 0)),
+
+	/* STR (immediate)	1111 1000 0100 xxxx xxxx 1xxx xxxx xxxx */
+	/* LDR (immediate)	1111 1000 0101 xxxx xxxx 1xxx xxxx xxxx */
+	DECODE_OR	(0xffe00800, 0xf8400800),
+	/* STR (immediate)	1111 1000 1100 xxxx xxxx xxxx xxxx xxxx */
+	/* LDR (immediate)	1111 1000 1101 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xffe00000, 0xf8c00000, t32_emulate_ldrstr,
+						 REGS(NOPCX, ANY, 0, 0, 0)),
+
+	/* STR (register)	1111 1000 0100 xxxx xxxx 0000 00xx xxxx */
+	/* LDR (register)	1111 1000 0101 xxxx xxxx 0000 00xx xxxx */
+	DECODE_EMULATEX	(0xffe00fc0, 0xf8400000, t32_emulate_ldrstr,
+						 REGS(NOPCX, ANY, 0, 0, NOSPPC)),
+
+	/* LDRB (literal)	1111 1000 x001 1111 xxxx xxxx xxxx xxxx */
+	/* LDRSB (literal)	1111 1001 x001 1111 xxxx xxxx xxxx xxxx */
+	/* LDRH (literal)	1111 1000 x011 1111 xxxx xxxx xxxx xxxx */
+	/* LDRSH (literal)	1111 1001 x011 1111 xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfe5f0000, 0xf81f0000, t32_simulate_ldr_literal,
+						 REGS(PC, NOSPPCX, 0, 0, 0)),
+
+	/* STRB (immediate)	1111 1000 0000 xxxx xxxx 1xxx xxxx xxxx */
+	/* STRH (immediate)	1111 1000 0010 xxxx xxxx 1xxx xxxx xxxx */
+	/* LDRB (immediate)	1111 1000 0001 xxxx xxxx 1xxx xxxx xxxx */
+	/* LDRSB (immediate)	1111 1001 0001 xxxx xxxx 1xxx xxxx xxxx */
+	/* LDRH (immediate)	1111 1000 0011 xxxx xxxx 1xxx xxxx xxxx */
+	/* LDRSH (immediate)	1111 1001 0011 xxxx xxxx 1xxx xxxx xxxx */
+	DECODE_OR	(0xfec00800, 0xf8000800),
+	/* STRB (immediate)	1111 1000 1000 xxxx xxxx xxxx xxxx xxxx */
+	/* STRH (immediate)	1111 1000 1010 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRB (immediate)	1111 1000 1001 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRSB (immediate)	1111 1001 1001 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRH (immediate)	1111 1000 1011 xxxx xxxx xxxx xxxx xxxx */
+	/* LDRSH (immediate)	1111 1001 1011 xxxx xxxx xxxx xxxx xxxx */
+	DECODE_EMULATEX	(0xfec00000, 0xf8800000, t32_emulate_ldrstr,
+						 REGS(NOPCX, NOSPPCX, 0, 0, 0)),
+
+	/* STRB (register)	1111 1000 0000 xxxx xxxx 0000 00xx xxxx */
+	/* STRH (register)	1111 1000 0010 xxxx xxxx 0000 00xx xxxx */
+	/* LDRB (register)	1111 1000 0001 xxxx xxxx 0000 00xx xxxx */
+	/* LDRSB (register)	1111 1001 0001 xxxx xxxx 0000 00xx xxxx */
+	/* LDRH (register)	1111 1000 0011 xxxx xxxx 0000 00xx xxxx */
+	/* LDRSH (register)	1111 1001 0011 xxxx xxxx 0000 00xx xxxx */
+	DECODE_EMULATEX	(0xfe800fc0, 0xf8000000, t32_emulate_ldrstr,
+						 REGS(NOPCX, NOSPPCX, 0, 0, NOSPPC)),
+
+	/* Other unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_1010___1111[] = {
+	/* Data-processing (register)					*/
+
+	/* ???			1111 1010 011x xxxx 1111 xxxx 1xxx xxxx */
+	DECODE_REJECT	(0xffe0f080, 0xfa60f080),
+
+	/* SXTH			1111 1010 0000 1111 1111 xxxx 1xxx xxxx */
+	/* UXTH			1111 1010 0001 1111 1111 xxxx 1xxx xxxx */
+	/* SXTB16		1111 1010 0010 1111 1111 xxxx 1xxx xxxx */
+	/* UXTB16		1111 1010 0011 1111 1111 xxxx 1xxx xxxx */
+	/* SXTB			1111 1010 0100 1111 1111 xxxx 1xxx xxxx */
+	/* UXTB			1111 1010 0101 1111 1111 xxxx 1xxx xxxx */
+	DECODE_EMULATEX	(0xff8ff080, 0xfa0ff080, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(0, 0, NOSPPC, 0, NOSPPC)),
+
+
+	/* ???			1111 1010 1xxx xxxx 1111 xxxx 0x11 xxxx */
+	DECODE_REJECT	(0xff80f0b0, 0xfa80f030),
+	/* ???			1111 1010 1x11 xxxx 1111 xxxx 0xxx xxxx */
+	DECODE_REJECT	(0xffb0f080, 0xfab0f000),
+
+	/* SADD16		1111 1010 1001 xxxx 1111 xxxx 0000 xxxx */
+	/* SASX			1111 1010 1010 xxxx 1111 xxxx 0000 xxxx */
+	/* SSAX			1111 1010 1110 xxxx 1111 xxxx 0000 xxxx */
+	/* SSUB16		1111 1010 1101 xxxx 1111 xxxx 0000 xxxx */
+	/* SADD8		1111 1010 1000 xxxx 1111 xxxx 0000 xxxx */
+	/* SSUB8		1111 1010 1100 xxxx 1111 xxxx 0000 xxxx */
+
+	/* QADD16		1111 1010 1001 xxxx 1111 xxxx 0001 xxxx */
+	/* QASX			1111 1010 1010 xxxx 1111 xxxx 0001 xxxx */
+	/* QSAX			1111 1010 1110 xxxx 1111 xxxx 0001 xxxx */
+	/* QSUB16		1111 1010 1101 xxxx 1111 xxxx 0001 xxxx */
+	/* QADD8		1111 1010 1000 xxxx 1111 xxxx 0001 xxxx */
+	/* QSUB8		1111 1010 1100 xxxx 1111 xxxx 0001 xxxx */
+
+	/* SHADD16		1111 1010 1001 xxxx 1111 xxxx 0010 xxxx */
+	/* SHASX		1111 1010 1010 xxxx 1111 xxxx 0010 xxxx */
+	/* SHSAX		1111 1010 1110 xxxx 1111 xxxx 0010 xxxx */
+	/* SHSUB16		1111 1010 1101 xxxx 1111 xxxx 0010 xxxx */
+	/* SHADD8		1111 1010 1000 xxxx 1111 xxxx 0010 xxxx */
+	/* SHSUB8		1111 1010 1100 xxxx 1111 xxxx 0010 xxxx */
+
+	/* UADD16		1111 1010 1001 xxxx 1111 xxxx 0100 xxxx */
+	/* UASX			1111 1010 1010 xxxx 1111 xxxx 0100 xxxx */
+	/* USAX			1111 1010 1110 xxxx 1111 xxxx 0100 xxxx */
+	/* USUB16		1111 1010 1101 xxxx 1111 xxxx 0100 xxxx */
+	/* UADD8		1111 1010 1000 xxxx 1111 xxxx 0100 xxxx */
+	/* USUB8		1111 1010 1100 xxxx 1111 xxxx 0100 xxxx */
+
+	/* UQADD16		1111 1010 1001 xxxx 1111 xxxx 0101 xxxx */
+	/* UQASX		1111 1010 1010 xxxx 1111 xxxx 0101 xxxx */
+	/* UQSAX		1111 1010 1110 xxxx 1111 xxxx 0101 xxxx */
+	/* UQSUB16		1111 1010 1101 xxxx 1111 xxxx 0101 xxxx */
+	/* UQADD8		1111 1010 1000 xxxx 1111 xxxx 0101 xxxx */
+	/* UQSUB8		1111 1010 1100 xxxx 1111 xxxx 0101 xxxx */
+
+	/* UHADD16		1111 1010 1001 xxxx 1111 xxxx 0110 xxxx */
+	/* UHASX		1111 1010 1010 xxxx 1111 xxxx 0110 xxxx */
+	/* UHSAX		1111 1010 1110 xxxx 1111 xxxx 0110 xxxx */
+	/* UHSUB16		1111 1010 1101 xxxx 1111 xxxx 0110 xxxx */
+	/* UHADD8		1111 1010 1000 xxxx 1111 xxxx 0110 xxxx */
+	/* UHSUB8		1111 1010 1100 xxxx 1111 xxxx 0110 xxxx */
+	DECODE_OR	(0xff80f080, 0xfa80f000),
+
+	/* SXTAH		1111 1010 0000 xxxx 1111 xxxx 1xxx xxxx */
+	/* UXTAH		1111 1010 0001 xxxx 1111 xxxx 1xxx xxxx */
+	/* SXTAB16		1111 1010 0010 xxxx 1111 xxxx 1xxx xxxx */
+	/* UXTAB16		1111 1010 0011 xxxx 1111 xxxx 1xxx xxxx */
+	/* SXTAB		1111 1010 0100 xxxx 1111 xxxx 1xxx xxxx */
+	/* UXTAB		1111 1010 0101 xxxx 1111 xxxx 1xxx xxxx */
+	DECODE_OR	(0xff80f080, 0xfa00f080),
+
+	/* QADD			1111 1010 1000 xxxx 1111 xxxx 1000 xxxx */
+	/* QDADD		1111 1010 1000 xxxx 1111 xxxx 1001 xxxx */
+	/* QSUB			1111 1010 1000 xxxx 1111 xxxx 1010 xxxx */
+	/* QDSUB		1111 1010 1000 xxxx 1111 xxxx 1011 xxxx */
+	DECODE_OR	(0xfff0f0c0, 0xfa80f080),
+
+	/* SEL			1111 1010 1010 xxxx 1111 xxxx 1000 xxxx */
+	DECODE_OR	(0xfff0f0f0, 0xfaa0f080),
+
+	/* LSL			1111 1010 000x xxxx 1111 xxxx 0000 xxxx */
+	/* LSR			1111 1010 001x xxxx 1111 xxxx 0000 xxxx */
+	/* ASR			1111 1010 010x xxxx 1111 xxxx 0000 xxxx */
+	/* ROR			1111 1010 011x xxxx 1111 xxxx 0000 xxxx */
+	DECODE_EMULATEX	(0xff80f0f0, 0xfa00f000, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(NOSPPC, 0, NOSPPC, 0, NOSPPC)),
+
+	/* CLZ			1111 1010 1010 xxxx 1111 xxxx 1000 xxxx */
+	DECODE_OR	(0xfff0f0f0, 0xfab0f080),
+
+	/* REV			1111 1010 1001 xxxx 1111 xxxx 1000 xxxx */
+	/* REV16		1111 1010 1001 xxxx 1111 xxxx 1001 xxxx */
+	/* RBIT			1111 1010 1001 xxxx 1111 xxxx 1010 xxxx */
+	/* REVSH		1111 1010 1001 xxxx 1111 xxxx 1011 xxxx */
+	DECODE_EMULATEX	(0xfff0f0c0, 0xfa90f080, t32_emulate_rd8rn16_noflags,
+						 REGS(NOSPPC, 0, NOSPPC, 0, SAMEAS16)),
+
+	/* Other unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_1011_0[] = {
+	/* Multiply, multiply accumulate, and absolute difference	*/
+
+	/* ???			1111 1011 0000 xxxx 1111 xxxx 0001 xxxx */
+	DECODE_REJECT	(0xfff0f0f0, 0xfb00f010),
+	/* ???			1111 1011 0111 xxxx 1111 xxxx 0001 xxxx */
+	DECODE_REJECT	(0xfff0f0f0, 0xfb70f010),
+
+	/* SMULxy		1111 1011 0001 xxxx 1111 xxxx 00xx xxxx */
+	DECODE_OR	(0xfff0f0c0, 0xfb10f000),
+	/* MUL			1111 1011 0000 xxxx 1111 xxxx 0000 xxxx */
+	/* SMUAD{X}		1111 1011 0010 xxxx 1111 xxxx 000x xxxx */
+	/* SMULWy		1111 1011 0011 xxxx 1111 xxxx 000x xxxx */
+	/* SMUSD{X}		1111 1011 0100 xxxx 1111 xxxx 000x xxxx */
+	/* SMMUL{R}		1111 1011 0101 xxxx 1111 xxxx 000x xxxx */
+	/* USAD8		1111 1011 0111 xxxx 1111 xxxx 0000 xxxx */
+	DECODE_EMULATEX	(0xff80f0e0, 0xfb00f000, t32_emulate_rd8rn16rm0_rwflags,
+						 REGS(NOSPPC, 0, NOSPPC, 0, NOSPPC)),
+
+	/* ???			1111 1011 0111 xxxx xxxx xxxx 0001 xxxx */
+	DECODE_REJECT	(0xfff000f0, 0xfb700010),
+
+	/* SMLAxy		1111 1011 0001 xxxx xxxx xxxx 00xx xxxx */
+	DECODE_OR	(0xfff000c0, 0xfb100000),
+	/* MLA			1111 1011 0000 xxxx xxxx xxxx 0000 xxxx */
+	/* MLS			1111 1011 0000 xxxx xxxx xxxx 0001 xxxx */
+	/* SMLAD{X}		1111 1011 0010 xxxx xxxx xxxx 000x xxxx */
+	/* SMLAWy		1111 1011 0011 xxxx xxxx xxxx 000x xxxx */
+	/* SMLSD{X}		1111 1011 0100 xxxx xxxx xxxx 000x xxxx */
+	/* SMMLA{R}		1111 1011 0101 xxxx xxxx xxxx 000x xxxx */
+	/* SMMLS{R}		1111 1011 0110 xxxx xxxx xxxx 000x xxxx */
+	/* USADA8		1111 1011 0111 xxxx xxxx xxxx 0000 xxxx */
+	DECODE_EMULATEX	(0xff8000c0, 0xfb000000, t32_emulate_rd8rn16rm0ra12_noflags,
+						 REGS(NOSPPC, NOSPPCX, NOSPPC, 0, NOSPPC)),
+
+	/* Other unallocated instructions...				*/
+	DECODE_END
+};
+
+static const union decode_item t32_table_1111_1011_1[] = {
+	/* Long multiply, long multiply accumulate, and divide		*/
+
+	/* UMAAL		1111 1011 1110 xxxx xxxx xxxx 0110 xxxx */
+	DECODE_OR	(0xfff000f0, 0xfbe00060),
+	/* SMLALxy		1111 1011 1100 xxxx xxxx xxxx 10xx xxxx */
+	DECODE_OR	(0xfff000c0, 0xfbc00080),
+	/* SMLALD{X}		1111 1011 1100 xxxx xxxx xxxx 110x xxxx */
+	/* SMLSLD{X}		1111 1011 1101 xxxx xxxx xxxx 110x xxxx */
+	DECODE_OR	(0xffe000e0, 0xfbc000c0),
+	/* SMULL		1111 1011 1000 xxxx xxxx xxxx 0000 xxxx */
+	/* UMULL		1111 1011 1010 xxxx xxxx xxxx 0000 xxxx */
+	/* SMLAL		1111 1011 1100 xxxx xxxx xxxx 0000 xxxx */
+	/* UMLAL		1111 1011 1110 xxxx xxxx xxxx 0000 xxxx */
+	DECODE_EMULATEX	(0xff9000f0, 0xfb800000, t32_emulate_rdlo12rdhi8rn16rm0_noflags,
+						 REGS(NOSPPC, NOSPPC, NOSPPC, 0, NOSPPC)),
+
+	/* SDIV			1111 1011 1001 xxxx xxxx xxxx 1111 xxxx */
+	/* UDIV			1111 1011 1011 xxxx xxxx xxxx 1111 xxxx */
+	/* Other unallocated instructions...				*/
+	DECODE_END
+};
+
+const union decode_item kprobe_decode_thumb32_table[] = {
+
+	/*
+	 * Load/store multiple instructions
+	 *			1110 100x x0xx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfe400000, 0xe8000000, t32_table_1110_100x_x0xx),
+
+	/*
+	 * Load/store dual, load/store exclusive, table branch
+	 *			1110 100x x1xx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfe400000, 0xe8400000, t32_table_1110_100x_x1xx),
+
+	/*
+	 * Data-processing (shifted register)
+	 *			1110 101x xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfe000000, 0xea000000, t32_table_1110_101x),
+
+	/*
+	 * Coprocessor instructions
+	 *			1110 11xx xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_REJECT	(0xfc000000, 0xec000000),
+
+	/*
+	 * Data-processing (modified immediate)
+	 *			1111 0x0x xxxx xxxx 0xxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfa008000, 0xf0000000, t32_table_1111_0x0x___0),
+
+	/*
+	 * Data-processing (plain binary immediate)
+	 *			1111 0x1x xxxx xxxx 0xxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfa008000, 0xf2000000, t32_table_1111_0x1x___0),
+
+	/*
+	 * Branches and miscellaneous control
+	 *			1111 0xxx xxxx xxxx 1xxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xf8008000, 0xf0008000, t32_table_1111_0xxx___1),
+
+	/*
+	 * Advanced SIMD element or structure load/store instructions
+	 *			1111 1001 xxx0 xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_REJECT	(0xff100000, 0xf9000000),
+
+	/*
+	 * Memory hints
+	 *			1111 100x x0x1 xxxx 1111 xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfe50f000, 0xf810f000, t32_table_1111_100x_x0x1__1111),
+
+	/*
+	 * Store single data item
+	 *			1111 1000 xxx0 xxxx xxxx xxxx xxxx xxxx
+	 * Load single data items
+	 *			1111 100x xxx1 xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xfe000000, 0xf8000000, t32_table_1111_100x),
+
+	/*
+	 * Data-processing (register)
+	 *			1111 1010 xxxx xxxx 1111 xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xff00f000, 0xfa00f000, t32_table_1111_1010___1111),
+
+	/*
+	 * Multiply, multiply accumulate, and absolute difference
+	 *			1111 1011 0xxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xff800000, 0xfb000000, t32_table_1111_1011_0),
+
+	/*
+	 * Long multiply, long multiply accumulate, and divide
+	 *			1111 1011 1xxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xff800000, 0xfb800000, t32_table_1111_1011_1),
+
+	/*
+	 * Coprocessor instructions
+	 *			1111 11xx xxxx xxxx xxxx xxxx xxxx xxxx
+	 */
+	DECODE_END
+};
+
+static void __kprobes
+t16_simulate_bxblx(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long pc = thumb_probe_pc(p);
+	int rm = (insn >> 3) & 0xf;
+	unsigned long rmv = (rm == 15) ? pc : regs->uregs[rm];
+
+	if (insn & (1 << 7)) /* BLX ? */
+		regs->ARM_lr = (unsigned long)p->addr + 2;
+
+	bx_write_pc(rmv, regs);
+}
+
+static void __kprobes
+t16_simulate_ldr_literal(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long* base = (unsigned long *)(thumb_probe_pc(p) & ~3);
+	long index = insn & 0xff;
+	int rt = (insn >> 8) & 0x7;
+	regs->uregs[rt] = base[index];
+}
+
+static void __kprobes
+t16_simulate_ldrstr_sp_relative(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long* base = (unsigned long *)regs->ARM_sp;
+	long index = insn & 0xff;
+	int rt = (insn >> 8) & 0x7;
+	if (insn & 0x800) /* LDR */
+		regs->uregs[rt] = base[index];
+	else /* STR */
+		base[index] = regs->uregs[rt];
+}
+
+static void __kprobes
+t16_simulate_reladr(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long base = (insn & 0x800) ? regs->ARM_sp
+					    : (thumb_probe_pc(p) & ~3);
+	long offset = insn & 0xff;
+	int rt = (insn >> 8) & 0x7;
+	regs->uregs[rt] = base + offset * 4;
+}
+
+static void __kprobes
+t16_simulate_add_sp_imm(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	long imm = insn & 0x7f;
+	if (insn & 0x80) /* SUB */
+		regs->ARM_sp -= imm * 4;
+	else /* ADD */
+		regs->ARM_sp += imm * 4;
+}
+
+static void __kprobes
+t16_simulate_cbz(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rn = insn & 0x7;
+	kprobe_opcode_t nonzero = regs->uregs[rn] ? insn : ~insn;
+	if (nonzero & 0x800) {
+		long i = insn & 0x200;
+		long imm5 = insn & 0xf8;
+		unsigned long pc = thumb_probe_pc(p);
+		regs->ARM_pc = pc + (i >> 3) + (imm5 >> 2);
+	}
+}
+
+static void __kprobes
+t16_simulate_it(struct kprobe *p, struct pt_regs *regs)
+{
+	/*
+	 * The 8 IT state bits are split into two parts in CPSR:
+	 *	ITSTATE<1:0> are in CPSR<26:25>
+	 *	ITSTATE<7:2> are in CPSR<15:10>
+	 * The new IT state is in the lower byte of insn.
+	 */
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long cpsr = regs->ARM_cpsr;
+	cpsr &= ~PSR_IT_MASK;
+	cpsr |= (insn & 0xfc) << 8;
+	cpsr |= (insn & 0x03) << 25;
+	regs->ARM_cpsr = cpsr;
+}
+
+static void __kprobes
+t16_singlestep_it(struct kprobe *p, struct pt_regs *regs)
+{
+	regs->ARM_pc += 2;
+	t16_simulate_it(p, regs);
+}
+
+static enum kprobe_insn __kprobes
+t16_decode_it(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	asi->insn_singlestep = t16_singlestep_it;
+	return INSN_GOOD_NO_SLOT;
+}
+
+static void __kprobes
+t16_simulate_cond_branch(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long pc = thumb_probe_pc(p);
+	long offset = insn & 0x7f;
+	offset -= insn & 0x80; /* Apply sign bit */
+	regs->ARM_pc = pc + (offset * 2);
+}
+
+static enum kprobe_insn __kprobes
+t16_decode_cond_branch(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	int cc = (insn >> 8) & 0xf;
+	asi->insn_check_cc = kprobe_condition_checks[cc];
+	asi->insn_handler = t16_simulate_cond_branch;
+	return INSN_GOOD_NO_SLOT;
+}
+
+static void __kprobes
+t16_simulate_branch(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long pc = thumb_probe_pc(p);
+	long offset = insn & 0x3ff;
+	offset -= insn & 0x400; /* Apply sign bit */
+	regs->ARM_pc = pc + (offset * 2);
+}
+
+static unsigned long __kprobes
+t16_emulate_loregs(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long oldcpsr = regs->ARM_cpsr;
+	unsigned long newcpsr;
+
+	__asm__ __volatile__ (
+		"msr	cpsr_fs, %[oldcpsr]	\n\t"
+		"ldmia	%[regs], {r0-r7}	\n\t"
+		"blx	%[fn]			\n\t"
+		"stmia	%[regs], {r0-r7}	\n\t"
+		"mrs	%[newcpsr], cpsr	\n\t"
+		: [newcpsr] "=r" (newcpsr)
+		: [oldcpsr] "r" (oldcpsr), [regs] "r" (regs),
+		  [fn] "r" (p->ainsn.insn_fn)
+		: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+		  "lr", "memory", "cc"
+		);
+
+	return (oldcpsr & ~APSR_MASK) | (newcpsr & APSR_MASK);
+}
+
+static void __kprobes
+t16_emulate_loregs_rwflags(struct kprobe *p, struct pt_regs *regs)
+{
+	regs->ARM_cpsr = t16_emulate_loregs(p, regs);
+}
+
+static void __kprobes
+t16_emulate_loregs_noitrwflags(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long cpsr = t16_emulate_loregs(p, regs);
+	if (!in_it_block(cpsr))
+		regs->ARM_cpsr = cpsr;
+}
+
+static void __kprobes
+t16_emulate_hiregs(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	unsigned long pc = thumb_probe_pc(p);
+	int rdn = (insn & 0x7) | ((insn & 0x80) >> 4);
+	int rm = (insn >> 3) & 0xf;
+
+	register unsigned long rdnv asm("r1");
+	register unsigned long rmv asm("r0");
+	unsigned long cpsr = regs->ARM_cpsr;
+
+	rdnv = (rdn == 15) ? pc : regs->uregs[rdn];
+	rmv = (rm == 15) ? pc : regs->uregs[rm];
+
+	__asm__ __volatile__ (
+		"msr	cpsr_fs, %[cpsr]	\n\t"
+		"blx    %[fn]			\n\t"
+		"mrs	%[cpsr], cpsr		\n\t"
+		: "=r" (rdnv), [cpsr] "=r" (cpsr)
+		: "0" (rdnv), "r" (rmv), "1" (cpsr), [fn] "r" (p->ainsn.insn_fn)
+		: "lr", "memory", "cc"
+	);
+
+	if (rdn == 15)
+		rdnv &= ~1;
+
+	regs->uregs[rdn] = rdnv;
+	regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK);
+}
+
+static enum kprobe_insn __kprobes
+t16_decode_hiregs(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	insn &= ~0x00ff;
+	insn |= 0x001; /* Set Rdn = R1 and Rm = R0 */
+	((u16 *)asi->insn)[0] = insn;
+	asi->insn_handler = t16_emulate_hiregs;
+	return INSN_GOOD;
+}
+
+static void __kprobes
+t16_emulate_push(struct kprobe *p, struct pt_regs *regs)
+{
+	__asm__ __volatile__ (
+		"ldr	r9, [%[regs], #13*4]	\n\t"
+		"ldr	r8, [%[regs], #14*4]	\n\t"
+		"ldmia	%[regs], {r0-r7}	\n\t"
+		"blx	%[fn]			\n\t"
+		"str	r9, [%[regs], #13*4]	\n\t"
+		:
+		: [regs] "r" (regs), [fn] "r" (p->ainsn.insn_fn)
+		: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9",
+		  "lr", "memory", "cc"
+		);
+}
+
+static enum kprobe_insn __kprobes
+t16_decode_push(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	/*
+	 * To simulate a PUSH we use a Thumb-2 "STMDB R9!, {registers}"
+	 * and call it with R9=SP and LR in the register list represented
+	 * by R8.
+	 */
+	((u16 *)asi->insn)[0] = 0xe929;		/* 1st half STMDB R9!,{} */
+	((u16 *)asi->insn)[1] = insn & 0x1ff;	/* 2nd half (register list) */
+	asi->insn_handler = t16_emulate_push;
+	return INSN_GOOD;
+}
+
+static void __kprobes
+t16_emulate_pop_nopc(struct kprobe *p, struct pt_regs *regs)
+{
+	__asm__ __volatile__ (
+		"ldr	r9, [%[regs], #13*4]	\n\t"
+		"ldmia	%[regs], {r0-r7}	\n\t"
+		"blx	%[fn]			\n\t"
+		"stmia	%[regs], {r0-r7}	\n\t"
+		"str	r9, [%[regs], #13*4]	\n\t"
+		:
+		: [regs] "r" (regs), [fn] "r" (p->ainsn.insn_fn)
+		: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r9",
+		  "lr", "memory", "cc"
+		);
+}
+
+static void __kprobes
+t16_emulate_pop_pc(struct kprobe *p, struct pt_regs *regs)
+{
+	register unsigned long pc asm("r8");
+
+	__asm__ __volatile__ (
+		"ldr	r9, [%[regs], #13*4]	\n\t"
+		"ldmia	%[regs], {r0-r7}	\n\t"
+		"blx	%[fn]			\n\t"
+		"stmia	%[regs], {r0-r7}	\n\t"
+		"str	r9, [%[regs], #13*4]	\n\t"
+		: "=r" (pc)
+		: [regs] "r" (regs), [fn] "r" (p->ainsn.insn_fn)
+		: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r9",
+		  "lr", "memory", "cc"
+		);
+
+	bx_write_pc(pc, regs);
+}
+
+static enum kprobe_insn __kprobes
+t16_decode_pop(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	/*
+	 * To simulate a POP we use a Thumb-2 "LDMDB R9!, {registers}"
+	 * and call it with R9=SP and PC in the register list represented
+	 * by R8.
+	 */
+	((u16 *)asi->insn)[0] = 0xe8b9;		/* 1st half LDMIA R9!,{} */
+	((u16 *)asi->insn)[1] = insn & 0x1ff;	/* 2nd half (register list) */
+	asi->insn_handler = insn & 0x100 ? t16_emulate_pop_pc
+					 : t16_emulate_pop_nopc;
+	return INSN_GOOD;
+}
+
+static const union decode_item t16_table_1011[] = {
+	/* Miscellaneous 16-bit instructions		    */
+
+	/* ADD (SP plus immediate)	1011 0000 0xxx xxxx */
+	/* SUB (SP minus immediate)	1011 0000 1xxx xxxx */
+	DECODE_SIMULATE	(0xff00, 0xb000, t16_simulate_add_sp_imm),
+
+	/* CBZ				1011 00x1 xxxx xxxx */
+	/* CBNZ				1011 10x1 xxxx xxxx */
+	DECODE_SIMULATE	(0xf500, 0xb100, t16_simulate_cbz),
+
+	/* SXTH				1011 0010 00xx xxxx */
+	/* SXTB				1011 0010 01xx xxxx */
+	/* UXTH				1011 0010 10xx xxxx */
+	/* UXTB				1011 0010 11xx xxxx */
+	/* REV				1011 1010 00xx xxxx */
+	/* REV16			1011 1010 01xx xxxx */
+	/* ???				1011 1010 10xx xxxx */
+	/* REVSH			1011 1010 11xx xxxx */
+	DECODE_REJECT	(0xffc0, 0xba80),
+	DECODE_EMULATE	(0xf500, 0xb000, t16_emulate_loregs_rwflags),
+
+	/* PUSH				1011 010x xxxx xxxx */
+	DECODE_CUSTOM	(0xfe00, 0xb400, t16_decode_push),
+	/* POP				1011 110x xxxx xxxx */
+	DECODE_CUSTOM	(0xfe00, 0xbc00, t16_decode_pop),
+
+	/*
+	 * If-Then, and hints
+	 *				1011 1111 xxxx xxxx
+	 */
+
+	/* YIELD			1011 1111 0001 0000 */
+	DECODE_OR	(0xffff, 0xbf10),
+	/* SEV				1011 1111 0100 0000 */
+	DECODE_EMULATE	(0xffff, 0xbf40, kprobe_emulate_none),
+	/* NOP				1011 1111 0000 0000 */
+	/* WFE				1011 1111 0010 0000 */
+	/* WFI				1011 1111 0011 0000 */
+	DECODE_SIMULATE	(0xffcf, 0xbf00, kprobe_simulate_nop),
+	/* Unassigned hints		1011 1111 xxxx 0000 */
+	DECODE_REJECT	(0xff0f, 0xbf00),
+	/* IT				1011 1111 xxxx xxxx */
+	DECODE_CUSTOM	(0xff00, 0xbf00, t16_decode_it),
+
+	/* SETEND			1011 0110 010x xxxx */
+	/* CPS				1011 0110 011x xxxx */
+	/* BKPT				1011 1110 xxxx xxxx */
+	/* And unallocated instructions...		    */
+	DECODE_END
+};
+
+const union decode_item kprobe_decode_thumb16_table[] = {
+
+	/*
+	 * Shift (immediate), add, subtract, move, and compare
+	 *				00xx xxxx xxxx xxxx
+	 */
+
+	/* CMP (immediate)		0010 1xxx xxxx xxxx */
+	DECODE_EMULATE	(0xf800, 0x2800, t16_emulate_loregs_rwflags),
+
+	/* ADD (register)		0001 100x xxxx xxxx */
+	/* SUB (register)		0001 101x xxxx xxxx */
+	/* LSL (immediate)		0000 0xxx xxxx xxxx */
+	/* LSR (immediate)		0000 1xxx xxxx xxxx */
+	/* ASR (immediate)		0001 0xxx xxxx xxxx */
+	/* ADD (immediate, Thumb)	0001 110x xxxx xxxx */
+	/* SUB (immediate, Thumb)	0001 111x xxxx xxxx */
+	/* MOV (immediate)		0010 0xxx xxxx xxxx */
+	/* ADD (immediate, Thumb)	0011 0xxx xxxx xxxx */
+	/* SUB (immediate, Thumb)	0011 1xxx xxxx xxxx */
+	DECODE_EMULATE	(0xc000, 0x0000, t16_emulate_loregs_noitrwflags),
+
+	/*
+	 * 16-bit Thumb data-processing instructions
+	 *				0100 00xx xxxx xxxx
+	 */
+
+	/* TST (register)		0100 0010 00xx xxxx */
+	DECODE_EMULATE	(0xffc0, 0x4200, t16_emulate_loregs_rwflags),
+	/* CMP (register)		0100 0010 10xx xxxx */
+	/* CMN (register)		0100 0010 11xx xxxx */
+	DECODE_EMULATE	(0xff80, 0x4280, t16_emulate_loregs_rwflags),
+	/* AND (register)		0100 0000 00xx xxxx */
+	/* EOR (register)		0100 0000 01xx xxxx */
+	/* LSL (register)		0100 0000 10xx xxxx */
+	/* LSR (register)		0100 0000 11xx xxxx */
+	/* ASR (register)		0100 0001 00xx xxxx */
+	/* ADC (register)		0100 0001 01xx xxxx */
+	/* SBC (register)		0100 0001 10xx xxxx */
+	/* ROR (register)		0100 0001 11xx xxxx */
+	/* RSB (immediate)		0100 0010 01xx xxxx */
+	/* ORR (register)		0100 0011 00xx xxxx */
+	/* MUL				0100 0011 00xx xxxx */
+	/* BIC (register)		0100 0011 10xx xxxx */
+	/* MVN (register)		0100 0011 10xx xxxx */
+	DECODE_EMULATE	(0xfc00, 0x4000, t16_emulate_loregs_noitrwflags),
+
+	/*
+	 * Special data instructions and branch and exchange
+	 *				0100 01xx xxxx xxxx
+	 */
+
+	/* BLX pc			0100 0111 1111 1xxx */
+	DECODE_REJECT	(0xfff8, 0x47f8),
+
+	/* BX (register)		0100 0111 0xxx xxxx */
+	/* BLX (register)		0100 0111 1xxx xxxx */
+	DECODE_SIMULATE (0xff00, 0x4700, t16_simulate_bxblx),
+
+	/* ADD pc, pc			0100 0100 1111 1111 */
+	DECODE_REJECT	(0xffff, 0x44ff),
+
+	/* ADD (register)		0100 0100 xxxx xxxx */
+	/* CMP (register)		0100 0101 xxxx xxxx */
+	/* MOV (register)		0100 0110 xxxx xxxx */
+	DECODE_CUSTOM	(0xfc00, 0x4400, t16_decode_hiregs),
+
+	/*
+	 * Load from Literal Pool
+	 * LDR (literal)		0100 1xxx xxxx xxxx
+	 */
+	DECODE_SIMULATE	(0xf800, 0x4800, t16_simulate_ldr_literal),
+
+	/*
+	 * 16-bit Thumb Load/store instructions
+	 *				0101 xxxx xxxx xxxx
+	 *				011x xxxx xxxx xxxx
+	 *				100x xxxx xxxx xxxx
+	 */
+
+	/* STR (register)		0101 000x xxxx xxxx */
+	/* STRH (register)		0101 001x xxxx xxxx */
+	/* STRB (register)		0101 010x xxxx xxxx */
+	/* LDRSB (register)		0101 011x xxxx xxxx */
+	/* LDR (register)		0101 100x xxxx xxxx */
+	/* LDRH (register)		0101 101x xxxx xxxx */
+	/* LDRB (register)		0101 110x xxxx xxxx */
+	/* LDRSH (register)		0101 111x xxxx xxxx */
+	/* STR (immediate, Thumb)	0110 0xxx xxxx xxxx */
+	/* LDR (immediate, Thumb)	0110 1xxx xxxx xxxx */
+	/* STRB (immediate, Thumb)	0111 0xxx xxxx xxxx */
+	/* LDRB (immediate, Thumb)	0111 1xxx xxxx xxxx */
+	DECODE_EMULATE	(0xc000, 0x4000, t16_emulate_loregs_rwflags),
+	/* STRH (immediate, Thumb)	1000 0xxx xxxx xxxx */
+	/* LDRH (immediate, Thumb)	1000 1xxx xxxx xxxx */
+	DECODE_EMULATE	(0xf000, 0x8000, t16_emulate_loregs_rwflags),
+	/* STR (immediate, Thumb)	1001 0xxx xxxx xxxx */
+	/* LDR (immediate, Thumb)	1001 1xxx xxxx xxxx */
+	DECODE_SIMULATE	(0xf000, 0x9000, t16_simulate_ldrstr_sp_relative),
+
+	/*
+	 * Generate PC-/SP-relative address
+	 * ADR (literal)		1010 0xxx xxxx xxxx
+	 * ADD (SP plus immediate)	1010 1xxx xxxx xxxx
+	 */
+	DECODE_SIMULATE	(0xf000, 0xa000, t16_simulate_reladr),
+
+	/*
+	 * Miscellaneous 16-bit instructions
+	 *				1011 xxxx xxxx xxxx
+	 */
+	DECODE_TABLE	(0xf000, 0xb000, t16_table_1011),
+
+	/* STM				1100 0xxx xxxx xxxx */
+	/* LDM				1100 1xxx xxxx xxxx */
+	DECODE_EMULATE	(0xf000, 0xc000, t16_emulate_loregs_rwflags),
+
+	/*
+	 * Conditional branch, and Supervisor Call
+	 */
+
+	/* Permanently UNDEFINED	1101 1110 xxxx xxxx */
+	/* SVC				1101 1111 xxxx xxxx */
+	DECODE_REJECT	(0xfe00, 0xde00),
+
+	/* Conditional branch		1101 xxxx xxxx xxxx */
+	DECODE_CUSTOM	(0xf000, 0xd000, t16_decode_cond_branch),
+
+	/*
+	 * Unconditional branch
+	 * B				1110 0xxx xxxx xxxx
+	 */
+	DECODE_SIMULATE	(0xf800, 0xe000, t16_simulate_branch),
+
+	DECODE_END
+};
+
+static unsigned long __kprobes thumb_check_cc(unsigned long cpsr)
+{
+	if (unlikely(in_it_block(cpsr)))
+		return kprobe_condition_checks[current_cond(cpsr)](cpsr);
+	return true;
+}
+
+static void __kprobes thumb16_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	regs->ARM_pc += 2;
+	p->ainsn.insn_handler(p, regs);
+	regs->ARM_cpsr = it_advance(regs->ARM_cpsr);
+}
+
+static void __kprobes thumb32_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	regs->ARM_pc += 4;
+	p->ainsn.insn_handler(p, regs);
+	regs->ARM_cpsr = it_advance(regs->ARM_cpsr);
+}
+
+enum kprobe_insn __kprobes
+thumb16_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	asi->insn_singlestep = thumb16_singlestep;
+	asi->insn_check_cc = thumb_check_cc;
+	return kprobe_decode_insn(insn, asi, kprobe_decode_thumb16_table, true);
+}
+
+enum kprobe_insn __kprobes
+thumb32_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	asi->insn_singlestep = thumb32_singlestep;
+	asi->insn_check_cc = thumb_check_cc;
+	return kprobe_decode_insn(insn, asi, kprobe_decode_thumb32_table, true);
+}

+ 201 - 21
arch/arm/kernel/kprobes.c

@@ -28,14 +28,16 @@
 #include <asm/traps.h>
 #include <asm/traps.h>
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
 
 
+#include "kprobes.h"
+
 #define MIN_STACK_SIZE(addr) 				\
 #define MIN_STACK_SIZE(addr) 				\
 	min((unsigned long)MAX_STACK_SIZE,		\
 	min((unsigned long)MAX_STACK_SIZE,		\
 	    (unsigned long)current_thread_info() + THREAD_START_SP - (addr))
 	    (unsigned long)current_thread_info() + THREAD_START_SP - (addr))
 
 
-#define flush_insns(addr, cnt) 				\
+#define flush_insns(addr, size)				\
 	flush_icache_range((unsigned long)(addr),	\
 	flush_icache_range((unsigned long)(addr),	\
 			   (unsigned long)(addr) +	\
 			   (unsigned long)(addr) +	\
-			   sizeof(kprobe_opcode_t) * (cnt))
+			   (size))
 
 
 /* Used as a marker in ARM_pc to note when we're in a jprobe. */
 /* Used as a marker in ARM_pc to note when we're in a jprobe. */
 #define JPROBE_MAGIC_ADDR		0xffffffff
 #define JPROBE_MAGIC_ADDR		0xffffffff
@@ -49,16 +51,35 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	kprobe_opcode_t insn;
 	kprobe_opcode_t insn;
 	kprobe_opcode_t tmp_insn[MAX_INSN_SIZE];
 	kprobe_opcode_t tmp_insn[MAX_INSN_SIZE];
 	unsigned long addr = (unsigned long)p->addr;
 	unsigned long addr = (unsigned long)p->addr;
+	bool thumb;
+	kprobe_decode_insn_t *decode_insn;
 	int is;
 	int is;
 
 
-	if (addr & 0x3 || in_exception_text(addr))
+	if (in_exception_text(addr))
 		return -EINVAL;
 		return -EINVAL;
 
 
+#ifdef CONFIG_THUMB2_KERNEL
+	thumb = true;
+	addr &= ~1; /* Bit 0 would normally be set to indicate Thumb code */
+	insn = ((u16 *)addr)[0];
+	if (is_wide_instruction(insn)) {
+		insn <<= 16;
+		insn |= ((u16 *)addr)[1];
+		decode_insn = thumb32_kprobe_decode_insn;
+	} else
+		decode_insn = thumb16_kprobe_decode_insn;
+#else /* !CONFIG_THUMB2_KERNEL */
+	thumb = false;
+	if (addr & 0x3)
+		return -EINVAL;
 	insn = *p->addr;
 	insn = *p->addr;
+	decode_insn = arm_kprobe_decode_insn;
+#endif
+
 	p->opcode = insn;
 	p->opcode = insn;
 	p->ainsn.insn = tmp_insn;
 	p->ainsn.insn = tmp_insn;
 
 
-	switch (arm_kprobe_decode_insn(insn, &p->ainsn)) {
+	switch ((*decode_insn)(insn, &p->ainsn)) {
 	case INSN_REJECTED:	/* not supported */
 	case INSN_REJECTED:	/* not supported */
 		return -EINVAL;
 		return -EINVAL;
 
 
@@ -68,7 +89,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 			return -ENOMEM;
 			return -ENOMEM;
 		for (is = 0; is < MAX_INSN_SIZE; ++is)
 		for (is = 0; is < MAX_INSN_SIZE; ++is)
 			p->ainsn.insn[is] = tmp_insn[is];
 			p->ainsn.insn[is] = tmp_insn[is];
-		flush_insns(p->ainsn.insn, MAX_INSN_SIZE);
+		flush_insns(p->ainsn.insn,
+				sizeof(p->ainsn.insn[0]) * MAX_INSN_SIZE);
+		p->ainsn.insn_fn = (kprobe_insn_fn_t *)
+					((uintptr_t)p->ainsn.insn | thumb);
 		break;
 		break;
 
 
 	case INSN_GOOD_NO_SLOT:	/* instruction doesn't need insn slot */
 	case INSN_GOOD_NO_SLOT:	/* instruction doesn't need insn slot */
@@ -79,24 +103,88 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	return 0;
 	return 0;
 }
 }
 
 
+#ifdef CONFIG_THUMB2_KERNEL
+
+/*
+ * For a 32-bit Thumb breakpoint spanning two memory words we need to take
+ * special precautions to insert the breakpoint atomically, especially on SMP
+ * systems. This is achieved by calling this arming function using stop_machine.
+ */
+static int __kprobes set_t32_breakpoint(void *addr)
+{
+	((u16 *)addr)[0] = KPROBE_THUMB32_BREAKPOINT_INSTRUCTION >> 16;
+	((u16 *)addr)[1] = KPROBE_THUMB32_BREAKPOINT_INSTRUCTION & 0xffff;
+	flush_insns(addr, 2*sizeof(u16));
+	return 0;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	uintptr_t addr = (uintptr_t)p->addr & ~1; /* Remove any Thumb flag */
+
+	if (!is_wide_instruction(p->opcode)) {
+		*(u16 *)addr = KPROBE_THUMB16_BREAKPOINT_INSTRUCTION;
+		flush_insns(addr, sizeof(u16));
+	} else if (addr & 2) {
+		/* A 32-bit instruction spanning two words needs special care */
+		stop_machine(set_t32_breakpoint, (void *)addr, &cpu_online_map);
+	} else {
+		/* Word aligned 32-bit instruction can be written atomically */
+		u32 bkp = KPROBE_THUMB32_BREAKPOINT_INSTRUCTION;
+#ifndef __ARMEB__ /* Swap halfwords for little-endian */
+		bkp = (bkp >> 16) | (bkp << 16);
+#endif
+		*(u32 *)addr = bkp;
+		flush_insns(addr, sizeof(u32));
+	}
+}
+
+#else /* !CONFIG_THUMB2_KERNEL */
+
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
 {
-	*p->addr = KPROBE_BREAKPOINT_INSTRUCTION;
-	flush_insns(p->addr, 1);
+	kprobe_opcode_t insn = p->opcode;
+	kprobe_opcode_t brkp = KPROBE_ARM_BREAKPOINT_INSTRUCTION;
+	if (insn >= 0xe0000000)
+		brkp |= 0xe0000000;  /* Unconditional instruction */
+	else
+		brkp |= insn & 0xf0000000;  /* Copy condition from insn */
+	*p->addr = brkp;
+	flush_insns(p->addr, sizeof(p->addr[0]));
 }
 }
 
 
+#endif /* !CONFIG_THUMB2_KERNEL */
+
 /*
 /*
  * The actual disarming is done here on each CPU and synchronized using
  * The actual disarming is done here on each CPU and synchronized using
  * stop_machine. This synchronization is necessary on SMP to avoid removing
  * stop_machine. This synchronization is necessary on SMP to avoid removing
  * a probe between the moment the 'Undefined Instruction' exception is raised
  * a probe between the moment the 'Undefined Instruction' exception is raised
  * and the moment the exception handler reads the faulting instruction from
  * and the moment the exception handler reads the faulting instruction from
- * memory.
+ * memory. It is also needed to atomically set the two half-words of a 32-bit
+ * Thumb breakpoint.
  */
  */
 int __kprobes __arch_disarm_kprobe(void *p)
 int __kprobes __arch_disarm_kprobe(void *p)
 {
 {
 	struct kprobe *kp = p;
 	struct kprobe *kp = p;
+#ifdef CONFIG_THUMB2_KERNEL
+	u16 *addr = (u16 *)((uintptr_t)kp->addr & ~1);
+	kprobe_opcode_t insn = kp->opcode;
+	unsigned int len;
+
+	if (is_wide_instruction(insn)) {
+		((u16 *)addr)[0] = insn>>16;
+		((u16 *)addr)[1] = insn;
+		len = 2*sizeof(u16);
+	} else {
+		((u16 *)addr)[0] = insn;
+		len = sizeof(u16);
+	}
+	flush_insns(addr, len);
+
+#else /* !CONFIG_THUMB2_KERNEL */
 	*kp->addr = kp->opcode;
 	*kp->addr = kp->opcode;
-	flush_insns(kp->addr, 1);
+	flush_insns(kp->addr, sizeof(kp->addr[0]));
+#endif
 	return 0;
 	return 0;
 }
 }
 
 
@@ -130,12 +218,24 @@ static void __kprobes set_current_kprobe(struct kprobe *p)
 	__get_cpu_var(current_kprobe) = p;
 	__get_cpu_var(current_kprobe) = p;
 }
 }
 
 
-static void __kprobes singlestep(struct kprobe *p, struct pt_regs *regs,
-				 struct kprobe_ctlblk *kcb)
+static void __kprobes
+singlestep_skip(struct kprobe *p, struct pt_regs *regs)
 {
 {
+#ifdef CONFIG_THUMB2_KERNEL
+	regs->ARM_cpsr = it_advance(regs->ARM_cpsr);
+	if (is_wide_instruction(p->opcode))
+		regs->ARM_pc += 4;
+	else
+		regs->ARM_pc += 2;
+#else
 	regs->ARM_pc += 4;
 	regs->ARM_pc += 4;
-	if (p->ainsn.insn_check_cc(regs->ARM_cpsr))
-		p->ainsn.insn_handler(p, regs);
+#endif
+}
+
+static inline void __kprobes
+singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+{
+	p->ainsn.insn_singlestep(p, regs);
 }
 }
 
 
 /*
 /*
@@ -149,11 +249,23 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
 {
 {
 	struct kprobe *p, *cur;
 	struct kprobe *p, *cur;
 	struct kprobe_ctlblk *kcb;
 	struct kprobe_ctlblk *kcb;
-	kprobe_opcode_t	*addr = (kprobe_opcode_t *)regs->ARM_pc;
 
 
 	kcb = get_kprobe_ctlblk();
 	kcb = get_kprobe_ctlblk();
 	cur = kprobe_running();
 	cur = kprobe_running();
-	p = get_kprobe(addr);
+
+#ifdef CONFIG_THUMB2_KERNEL
+	/*
+	 * First look for a probe which was registered using an address with
+	 * bit 0 set, this is the usual situation for pointers to Thumb code.
+	 * If not found, fallback to looking for one with bit 0 clear.
+	 */
+	p = get_kprobe((kprobe_opcode_t *)(regs->ARM_pc | 1));
+	if (!p)
+		p = get_kprobe((kprobe_opcode_t *)regs->ARM_pc);
+
+#else /* ! CONFIG_THUMB2_KERNEL */
+	p = get_kprobe((kprobe_opcode_t *)regs->ARM_pc);
+#endif
 
 
 	if (p) {
 	if (p) {
 		if (cur) {
 		if (cur) {
@@ -173,7 +285,8 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
 				/* impossible cases */
 				/* impossible cases */
 				BUG();
 				BUG();
 			}
 			}
-		} else {
+		} else if (p->ainsn.insn_check_cc(regs->ARM_cpsr)) {
+			/* Probe hit and conditional execution check ok. */
 			set_current_kprobe(p);
 			set_current_kprobe(p);
 			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
 			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
 
 
@@ -193,6 +306,13 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
 				}
 				}
 				reset_current_kprobe();
 				reset_current_kprobe();
 			}
 			}
+		} else {
+			/*
+			 * Probe hit but conditional execution check failed,
+			 * so just skip the instruction and continue as if
+			 * nothing had happened.
+			 */
+			singlestep_skip(p, regs);
 		}
 		}
 	} else if (cur) {
 	} else if (cur) {
 		/* We probably hit a jprobe.  Call its break handler. */
 		/* We probably hit a jprobe.  Call its break handler. */
@@ -300,7 +420,11 @@ void __naked __kprobes kretprobe_trampoline(void)
 		"bl	trampoline_handler	\n\t"
 		"bl	trampoline_handler	\n\t"
 		"mov	lr, r0			\n\t"
 		"mov	lr, r0			\n\t"
 		"ldmia	sp!, {r0 - r11}		\n\t"
 		"ldmia	sp!, {r0 - r11}		\n\t"
+#ifdef CONFIG_THUMB2_KERNEL
+		"bx	lr			\n\t"
+#else
 		"mov	pc, lr			\n\t"
 		"mov	pc, lr			\n\t"
+#endif
 		: : : "memory");
 		: : : "memory");
 }
 }
 
 
@@ -378,11 +502,22 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	long sp_addr = regs->ARM_sp;
 	long sp_addr = regs->ARM_sp;
+	long cpsr;
 
 
 	kcb->jprobe_saved_regs = *regs;
 	kcb->jprobe_saved_regs = *regs;
 	memcpy(kcb->jprobes_stack, (void *)sp_addr, MIN_STACK_SIZE(sp_addr));
 	memcpy(kcb->jprobes_stack, (void *)sp_addr, MIN_STACK_SIZE(sp_addr));
 	regs->ARM_pc = (long)jp->entry;
 	regs->ARM_pc = (long)jp->entry;
-	regs->ARM_cpsr |= PSR_I_BIT;
+
+	cpsr = regs->ARM_cpsr | PSR_I_BIT;
+#ifdef CONFIG_THUMB2_KERNEL
+	/* Set correct Thumb state in cpsr */
+	if (regs->ARM_pc & 1)
+		cpsr |= PSR_T_BIT;
+	else
+		cpsr &= ~PSR_T_BIT;
+#endif
+	regs->ARM_cpsr = cpsr;
+
 	preempt_disable();
 	preempt_disable();
 	return 1;
 	return 1;
 }
 }
@@ -404,7 +539,12 @@ void __kprobes jprobe_return(void)
 		 * This is to prevent any simulated instruction from writing
 		 * This is to prevent any simulated instruction from writing
 		 * over the regs when they are accessing the stack.
 		 * over the regs when they are accessing the stack.
 		 */
 		 */
+#ifdef CONFIG_THUMB2_KERNEL
+		"sub    r0, %0, %1		\n\t"
+		"mov    sp, r0			\n\t"
+#else
 		"sub    sp, %0, %1		\n\t"
 		"sub    sp, %0, %1		\n\t"
+#endif
 		"ldr    r0, ="__stringify(JPROBE_MAGIC_ADDR)"\n\t"
 		"ldr    r0, ="__stringify(JPROBE_MAGIC_ADDR)"\n\t"
 		"str    %0, [sp, %2]		\n\t"
 		"str    %0, [sp, %2]		\n\t"
 		"str    r0, [sp, %3]		\n\t"
 		"str    r0, [sp, %3]		\n\t"
@@ -415,15 +555,28 @@ void __kprobes jprobe_return(void)
 		 * Return to the context saved by setjmp_pre_handler
 		 * Return to the context saved by setjmp_pre_handler
 		 * and restored by longjmp_break_handler.
 		 * and restored by longjmp_break_handler.
 		 */
 		 */
+#ifdef CONFIG_THUMB2_KERNEL
+		"ldr	lr, [sp, %2]		\n\t" /* lr = saved sp */
+		"ldrd	r0, r1, [sp, %5]	\n\t" /* r0,r1 = saved lr,pc */
+		"ldr	r2, [sp, %4]		\n\t" /* r2 = saved psr */
+		"stmdb	lr!, {r0, r1, r2}	\n\t" /* push saved lr and */
+						      /* rfe context */
+		"ldmia	sp, {r0 - r12}		\n\t"
+		"mov	sp, lr			\n\t"
+		"ldr	lr, [sp], #4		\n\t"
+		"rfeia	sp!			\n\t"
+#else
 		"ldr	r0, [sp, %4]		\n\t"
 		"ldr	r0, [sp, %4]		\n\t"
 		"msr	cpsr_cxsf, r0		\n\t"
 		"msr	cpsr_cxsf, r0		\n\t"
 		"ldmia	sp, {r0 - pc}		\n\t"
 		"ldmia	sp, {r0 - pc}		\n\t"
+#endif
 		:
 		:
 		: "r" (kcb->jprobe_saved_regs.ARM_sp),
 		: "r" (kcb->jprobe_saved_regs.ARM_sp),
 		  "I" (sizeof(struct pt_regs) * 2),
 		  "I" (sizeof(struct pt_regs) * 2),
 		  "J" (offsetof(struct pt_regs, ARM_sp)),
 		  "J" (offsetof(struct pt_regs, ARM_sp)),
 		  "J" (offsetof(struct pt_regs, ARM_pc)),
 		  "J" (offsetof(struct pt_regs, ARM_pc)),
-		  "J" (offsetof(struct pt_regs, ARM_cpsr))
+		  "J" (offsetof(struct pt_regs, ARM_cpsr)),
+		  "J" (offsetof(struct pt_regs, ARM_lr))
 		: "memory", "cc");
 		: "memory", "cc");
 }
 }
 
 
@@ -460,17 +613,44 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
 	return 0;
 	return 0;
 }
 }
 
 
-static struct undef_hook kprobes_break_hook = {
+#ifdef CONFIG_THUMB2_KERNEL
+
+static struct undef_hook kprobes_thumb16_break_hook = {
+	.instr_mask	= 0xffff,
+	.instr_val	= KPROBE_THUMB16_BREAKPOINT_INSTRUCTION,
+	.cpsr_mask	= MODE_MASK,
+	.cpsr_val	= SVC_MODE,
+	.fn		= kprobe_trap_handler,
+};
+
+static struct undef_hook kprobes_thumb32_break_hook = {
 	.instr_mask	= 0xffffffff,
 	.instr_mask	= 0xffffffff,
-	.instr_val	= KPROBE_BREAKPOINT_INSTRUCTION,
+	.instr_val	= KPROBE_THUMB32_BREAKPOINT_INSTRUCTION,
 	.cpsr_mask	= MODE_MASK,
 	.cpsr_mask	= MODE_MASK,
 	.cpsr_val	= SVC_MODE,
 	.cpsr_val	= SVC_MODE,
 	.fn		= kprobe_trap_handler,
 	.fn		= kprobe_trap_handler,
 };
 };
 
 
+#else  /* !CONFIG_THUMB2_KERNEL */
+
+static struct undef_hook kprobes_arm_break_hook = {
+	.instr_mask	= 0x0fffffff,
+	.instr_val	= KPROBE_ARM_BREAKPOINT_INSTRUCTION,
+	.cpsr_mask	= MODE_MASK,
+	.cpsr_val	= SVC_MODE,
+	.fn		= kprobe_trap_handler,
+};
+
+#endif /* !CONFIG_THUMB2_KERNEL */
+
 int __init arch_init_kprobes()
 int __init arch_init_kprobes()
 {
 {
 	arm_kprobe_decode_init();
 	arm_kprobe_decode_init();
-	register_undef_hook(&kprobes_break_hook);
+#ifdef CONFIG_THUMB2_KERNEL
+	register_undef_hook(&kprobes_thumb16_break_hook);
+	register_undef_hook(&kprobes_thumb32_break_hook);
+#else
+	register_undef_hook(&kprobes_arm_break_hook);
+#endif
 	return 0;
 	return 0;
 }
 }

+ 420 - 0
arch/arm/kernel/kprobes.h

@@ -0,0 +1,420 @@
+/*
+ * arch/arm/kernel/kprobes.h
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * Some contents moved here from arch/arm/include/asm/kprobes.h which is
+ * Copyright (C) 2006, 2007 Motorola Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KERNEL_KPROBES_H
+#define _ARM_KERNEL_KPROBES_H
+
+/*
+ * These undefined instructions must be unique and
+ * reserved solely for kprobes' use.
+ */
+#define KPROBE_ARM_BREAKPOINT_INSTRUCTION	0x07f001f8
+#define KPROBE_THUMB16_BREAKPOINT_INSTRUCTION	0xde18
+#define KPROBE_THUMB32_BREAKPOINT_INSTRUCTION	0xf7f0a018
+
+
+enum kprobe_insn {
+	INSN_REJECTED,
+	INSN_GOOD,
+	INSN_GOOD_NO_SLOT
+};
+
+typedef enum kprobe_insn (kprobe_decode_insn_t)(kprobe_opcode_t,
+						struct arch_specific_insn *);
+
+#ifdef CONFIG_THUMB2_KERNEL
+
+enum kprobe_insn thumb16_kprobe_decode_insn(kprobe_opcode_t,
+						struct arch_specific_insn *);
+enum kprobe_insn thumb32_kprobe_decode_insn(kprobe_opcode_t,
+						struct arch_specific_insn *);
+
+#else /* !CONFIG_THUMB2_KERNEL */
+
+enum kprobe_insn arm_kprobe_decode_insn(kprobe_opcode_t,
+					struct arch_specific_insn *);
+#endif
+
+void __init arm_kprobe_decode_init(void);
+
+extern kprobe_check_cc * const kprobe_condition_checks[16];
+
+
+#if __LINUX_ARM_ARCH__ >= 7
+
+/* str_pc_offset is architecturally defined from ARMv7 onwards */
+#define str_pc_offset 8
+#define find_str_pc_offset()
+
+#else /* __LINUX_ARM_ARCH__ < 7 */
+
+/* We need a run-time check to determine str_pc_offset */
+extern int str_pc_offset;
+void __init find_str_pc_offset(void);
+
+#endif
+
+
+/*
+ * Update ITSTATE after normal execution of an IT block instruction.
+ *
+ * The 8 IT state bits are split into two parts in CPSR:
+ *	ITSTATE<1:0> are in CPSR<26:25>
+ *	ITSTATE<7:2> are in CPSR<15:10>
+ */
+static inline unsigned long it_advance(unsigned long cpsr)
+	{
+	if ((cpsr & 0x06000400) == 0) {
+		/* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */
+		cpsr &= ~PSR_IT_MASK;
+	} else {
+		/* We need to shift left ITSTATE<4:0> */
+		const unsigned long mask = 0x06001c00;  /* Mask ITSTATE<4:0> */
+		unsigned long it = cpsr & mask;
+		it <<= 1;
+		it |= it >> (27 - 10);  /* Carry ITSTATE<2> to correct place */
+		it &= mask;
+		cpsr &= ~mask;
+		cpsr |= it;
+	}
+	return cpsr;
+}
+
+static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs)
+{
+	long cpsr = regs->ARM_cpsr;
+	if (pcv & 0x1) {
+		cpsr |= PSR_T_BIT;
+		pcv &= ~0x1;
+	} else {
+		cpsr &= ~PSR_T_BIT;
+		pcv &= ~0x2;	/* Avoid UNPREDICTABLE address allignment */
+	}
+	regs->ARM_cpsr = cpsr;
+	regs->ARM_pc = pcv;
+}
+
+
+#if __LINUX_ARM_ARCH__ >= 6
+
+/* Kernels built for >= ARMv6 should never run on <= ARMv5 hardware, so... */
+#define load_write_pc_interworks true
+#define test_load_write_pc_interworking()
+
+#else /* __LINUX_ARM_ARCH__ < 6 */
+
+/* We need run-time testing to determine if load_write_pc() should interwork. */
+extern bool load_write_pc_interworks;
+void __init test_load_write_pc_interworking(void);
+
+#endif
+
+static inline void __kprobes load_write_pc(long pcv, struct pt_regs *regs)
+{
+	if (load_write_pc_interworks)
+		bx_write_pc(pcv, regs);
+	else
+		regs->ARM_pc = pcv;
+}
+
+
+#if __LINUX_ARM_ARCH__ >= 7
+
+#define alu_write_pc_interworks true
+#define test_alu_write_pc_interworking()
+
+#elif __LINUX_ARM_ARCH__ <= 5
+
+/* Kernels built for <= ARMv5 should never run on >= ARMv6 hardware, so... */
+#define alu_write_pc_interworks false
+#define test_alu_write_pc_interworking()
+
+#else /* __LINUX_ARM_ARCH__ == 6 */
+
+/* We could be an ARMv6 binary on ARMv7 hardware so we need a run-time check. */
+extern bool alu_write_pc_interworks;
+void __init test_alu_write_pc_interworking(void);
+
+#endif /* __LINUX_ARM_ARCH__ == 6 */
+
+static inline void __kprobes alu_write_pc(long pcv, struct pt_regs *regs)
+{
+	if (alu_write_pc_interworks)
+		bx_write_pc(pcv, regs);
+	else
+		regs->ARM_pc = pcv;
+}
+
+
+void __kprobes kprobe_simulate_nop(struct kprobe *p, struct pt_regs *regs);
+void __kprobes kprobe_emulate_none(struct kprobe *p, struct pt_regs *regs);
+
+enum kprobe_insn __kprobes
+kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi);
+
+/*
+ * Test if load/store instructions writeback the address register.
+ * if P (bit 24) == 0 or W (bit 21) == 1
+ */
+#define is_writeback(insn) ((insn ^ 0x01000000) & 0x01200000)
+
+/*
+ * The following definitions and macros are used to build instruction
+ * decoding tables for use by kprobe_decode_insn.
+ *
+ * These tables are a concatenation of entries each of which consist of one of
+ * the decode_* structs. All of the fields in every type of decode structure
+ * are of the union type decode_item, therefore the entire decode table can be
+ * viewed as an array of these and declared like:
+ *
+ *	static const union decode_item table_name[] = {};
+ *
+ * In order to construct each entry in the table, macros are used to
+ * initialise a number of sequential decode_item values in a layout which
+ * matches the relevant struct. E.g. DECODE_SIMULATE initialise a struct
+ * decode_simulate by initialising four decode_item objects like this...
+ *
+ *	{.bits = _type},
+ *	{.bits = _mask},
+ *	{.bits = _value},
+ *	{.handler = _handler},
+ *
+ * Initialising a specified member of the union means that the compiler
+ * will produce a warning if the argument is of an incorrect type.
+ *
+ * Below is a list of each of the macros used to initialise entries and a
+ * description of the action performed when that entry is matched to an
+ * instruction. A match is found when (instruction & mask) == value.
+ *
+ * DECODE_TABLE(mask, value, table)
+ *	Instruction decoding jumps to parsing the new sub-table 'table'.
+ *
+ * DECODE_CUSTOM(mask, value, decoder)
+ *	The custom function 'decoder' is called to the complete decoding
+ *	of an instruction.
+ *
+ * DECODE_SIMULATE(mask, value, handler)
+ *	Set the probes instruction handler to 'handler', this will be used
+ *	to simulate the instruction when the probe is hit. Decoding returns
+ *	with INSN_GOOD_NO_SLOT.
+ *
+ * DECODE_EMULATE(mask, value, handler)
+ *	Set the probes instruction handler to 'handler', this will be used
+ *	to emulate the instruction when the probe is hit. The modified
+ *	instruction (see below) is placed in the probes instruction slot so it
+ *	may be called by the emulation code. Decoding returns with INSN_GOOD.
+ *
+ * DECODE_REJECT(mask, value)
+ *	Instruction decoding fails with INSN_REJECTED
+ *
+ * DECODE_OR(mask, value)
+ *	This allows the mask/value test of multiple table entries to be
+ *	logically ORed. Once an 'or' entry is matched the decoding action to
+ *	be performed is that of the next entry which isn't an 'or'. E.g.
+ *
+ *		DECODE_OR	(mask1, value1)
+ *		DECODE_OR	(mask2, value2)
+ *		DECODE_SIMULATE	(mask3, value3, simulation_handler)
+ *
+ *	This means that if any of the three mask/value pairs match the
+ *	instruction being decoded, then 'simulation_handler' will be used
+ *	for it.
+ *
+ * Both the SIMULATE and EMULATE macros have a second form which take an
+ * additional 'regs' argument.
+ *
+ *	DECODE_SIMULATEX(mask, value, handler, regs)
+ *	DECODE_EMULATEX	(mask, value, handler, regs)
+ *
+ * These are used to specify what kind of CPU register is encoded in each of the
+ * least significant 5 nibbles of the instruction being decoded. The regs value
+ * is specified using the REGS macro, this takes any of the REG_TYPE_* values
+ * from enum decode_reg_type as arguments; only the '*' part of the name is
+ * given. E.g.
+ *
+ *	REGS(0, ANY, NOPC, 0, ANY)
+ *
+ * This indicates an instruction is encoded like:
+ *
+ *	bits 19..16	ignore
+ *	bits 15..12	any register allowed here
+ *	bits 11.. 8	any register except PC allowed here
+ *	bits  7.. 4	ignore
+ *	bits  3.. 0	any register allowed here
+ *
+ * This register specification is checked after a decode table entry is found to
+ * match an instruction (through the mask/value test). Any invalid register then
+ * found in the instruction will cause decoding to fail with INSN_REJECTED. In
+ * the above example this would happen if bits 11..8 of the instruction were
+ * 1111, indicating R15 or PC.
+ *
+ * As well as checking for legal combinations of registers, this data is also
+ * used to modify the registers encoded in the instructions so that an
+ * emulation routines can use it. (See decode_regs() and INSN_NEW_BITS.)
+ *
+ * Here is a real example which matches ARM instructions of the form
+ * "AND <Rd>,<Rn>,<Rm>,<shift> <Rs>"
+ *
+ *	DECODE_EMULATEX	(0x0e000090, 0x00000010, emulate_rd12rn16rm0rs8_rwflags,
+ *						 REGS(ANY, ANY, NOPC, 0, ANY)),
+ *						      ^    ^    ^        ^
+ *						      Rn   Rd   Rs       Rm
+ *
+ * Decoding the instruction "AND R4, R5, R6, ASL R15" will be rejected because
+ * Rs == R15
+ *
+ * Decoding the instruction "AND R4, R5, R6, ASL R7" will be accepted and the
+ * instruction will be modified to "AND R0, R2, R3, ASL R1" and then placed into
+ * the kprobes instruction slot. This can then be called later by the handler
+ * function emulate_rd12rn16rm0rs8_rwflags in order to simulate the instruction.
+ */
+
+enum decode_type {
+	DECODE_TYPE_END,
+	DECODE_TYPE_TABLE,
+	DECODE_TYPE_CUSTOM,
+	DECODE_TYPE_SIMULATE,
+	DECODE_TYPE_EMULATE,
+	DECODE_TYPE_OR,
+	DECODE_TYPE_REJECT,
+	NUM_DECODE_TYPES /* Must be last enum */
+};
+
+#define DECODE_TYPE_BITS	4
+#define DECODE_TYPE_MASK	((1 << DECODE_TYPE_BITS) - 1)
+
+enum decode_reg_type {
+	REG_TYPE_NONE = 0, /* Not a register, ignore */
+	REG_TYPE_ANY,	   /* Any register allowed */
+	REG_TYPE_SAMEAS16, /* Register should be same as that at bits 19..16 */
+	REG_TYPE_SP,	   /* Register must be SP */
+	REG_TYPE_PC,	   /* Register must be PC */
+	REG_TYPE_NOSP,	   /* Register must not be SP */
+	REG_TYPE_NOSPPC,   /* Register must not be SP or PC */
+	REG_TYPE_NOPC,	   /* Register must not be PC */
+	REG_TYPE_NOPCWB,   /* No PC if load/store write-back flag also set */
+
+	/* The following types are used when the encoding for PC indicates
+	 * another instruction form. This distiction only matters for test
+	 * case coverage checks.
+	 */
+	REG_TYPE_NOPCX,	   /* Register must not be PC */
+	REG_TYPE_NOSPPCX,  /* Register must not be SP or PC */
+
+	/* Alias to allow '0' arg to be used in REGS macro. */
+	REG_TYPE_0 = REG_TYPE_NONE
+};
+
+#define REGS(r16, r12, r8, r4, r0)	\
+	((REG_TYPE_##r16) << 16) +	\
+	((REG_TYPE_##r12) << 12) +	\
+	((REG_TYPE_##r8) << 8) +	\
+	((REG_TYPE_##r4) << 4) +	\
+	(REG_TYPE_##r0)
+
+union decode_item {
+	u32			bits;
+	const union decode_item	*table;
+	kprobe_insn_handler_t	*handler;
+	kprobe_decode_insn_t	*decoder;
+};
+
+
+#define DECODE_END			\
+	{.bits = DECODE_TYPE_END}
+
+
+struct decode_header {
+	union decode_item	type_regs;
+	union decode_item	mask;
+	union decode_item	value;
+};
+
+#define DECODE_HEADER(_type, _mask, _value, _regs)		\
+	{.bits = (_type) | ((_regs) << DECODE_TYPE_BITS)},	\
+	{.bits = (_mask)},					\
+	{.bits = (_value)}
+
+
+struct decode_table {
+	struct decode_header	header;
+	union decode_item	table;
+};
+
+#define DECODE_TABLE(_mask, _value, _table)			\
+	DECODE_HEADER(DECODE_TYPE_TABLE, _mask, _value, 0),	\
+	{.table = (_table)}
+
+
+struct decode_custom {
+	struct decode_header	header;
+	union decode_item	decoder;
+};
+
+#define DECODE_CUSTOM(_mask, _value, _decoder)			\
+	DECODE_HEADER(DECODE_TYPE_CUSTOM, _mask, _value, 0),	\
+	{.decoder = (_decoder)}
+
+
+struct decode_simulate {
+	struct decode_header	header;
+	union decode_item	handler;
+};
+
+#define DECODE_SIMULATEX(_mask, _value, _handler, _regs)		\
+	DECODE_HEADER(DECODE_TYPE_SIMULATE, _mask, _value, _regs),	\
+	{.handler = (_handler)}
+
+#define DECODE_SIMULATE(_mask, _value, _handler)	\
+	DECODE_SIMULATEX(_mask, _value, _handler, 0)
+
+
+struct decode_emulate {
+	struct decode_header	header;
+	union decode_item	handler;
+};
+
+#define DECODE_EMULATEX(_mask, _value, _handler, _regs)			\
+	DECODE_HEADER(DECODE_TYPE_EMULATE, _mask, _value, _regs),	\
+	{.handler = (_handler)}
+
+#define DECODE_EMULATE(_mask, _value, _handler)		\
+	DECODE_EMULATEX(_mask, _value, _handler, 0)
+
+
+struct decode_or {
+	struct decode_header	header;
+};
+
+#define DECODE_OR(_mask, _value)				\
+	DECODE_HEADER(DECODE_TYPE_OR, _mask, _value, 0)
+
+
+struct decode_reject {
+	struct decode_header	header;
+};
+
+#define DECODE_REJECT(_mask, _value)				\
+	DECODE_HEADER(DECODE_TYPE_REJECT, _mask, _value, 0)
+
+
+int kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
+			const union decode_item *table, bool thumb16);
+
+
+#endif /* _ARM_KERNEL_KPROBES_H */

+ 8 - 2
arch/arm/kernel/perf_event.c

@@ -435,7 +435,7 @@ armpmu_reserve_hardware(void)
 			if (irq >= 0)
 			if (irq >= 0)
 				free_irq(irq, NULL);
 				free_irq(irq, NULL);
 		}
 		}
-		release_pmu(pmu_device);
+		release_pmu(ARM_PMU_DEVICE_CPU);
 		pmu_device = NULL;
 		pmu_device = NULL;
 	}
 	}
 
 
@@ -454,7 +454,7 @@ armpmu_release_hardware(void)
 	}
 	}
 	armpmu->stop();
 	armpmu->stop();
 
 
-	release_pmu(pmu_device);
+	release_pmu(ARM_PMU_DEVICE_CPU);
 	pmu_device = NULL;
 	pmu_device = NULL;
 }
 }
 
 
@@ -662,6 +662,12 @@ init_hw_perf_events(void)
 		case 0xC090:	/* Cortex-A9 */
 		case 0xC090:	/* Cortex-A9 */
 			armpmu = armv7_a9_pmu_init();
 			armpmu = armv7_a9_pmu_init();
 			break;
 			break;
+		case 0xC050:	/* Cortex-A5 */
+			armpmu = armv7_a5_pmu_init();
+			break;
+		case 0xC0F0:	/* Cortex-A15 */
+			armpmu = armv7_a15_pmu_init();
+			break;
 		}
 		}
 	/* Intel CPUs [xscale]. */
 	/* Intel CPUs [xscale]. */
 	} else if (0x69 == implementor) {
 	} else if (0x69 == implementor) {

+ 324 - 20
arch/arm/kernel/perf_event_v7.c

@@ -17,17 +17,23 @@
  */
  */
 
 
 #ifdef CONFIG_CPU_V7
 #ifdef CONFIG_CPU_V7
-/* Common ARMv7 event types */
+/*
+ * Common ARMv7 event types
+ *
+ * Note: An implementation may not be able to count all of these events
+ * but the encodings are considered to be `reserved' in the case that
+ * they are not available.
+ */
 enum armv7_perf_types {
 enum armv7_perf_types {
 	ARMV7_PERFCTR_PMNC_SW_INCR		= 0x00,
 	ARMV7_PERFCTR_PMNC_SW_INCR		= 0x00,
 	ARMV7_PERFCTR_IFETCH_MISS		= 0x01,
 	ARMV7_PERFCTR_IFETCH_MISS		= 0x01,
 	ARMV7_PERFCTR_ITLB_MISS			= 0x02,
 	ARMV7_PERFCTR_ITLB_MISS			= 0x02,
-	ARMV7_PERFCTR_DCACHE_REFILL		= 0x03,
-	ARMV7_PERFCTR_DCACHE_ACCESS		= 0x04,
+	ARMV7_PERFCTR_DCACHE_REFILL		= 0x03,	/* L1 */
+	ARMV7_PERFCTR_DCACHE_ACCESS		= 0x04,	/* L1 */
 	ARMV7_PERFCTR_DTLB_REFILL		= 0x05,
 	ARMV7_PERFCTR_DTLB_REFILL		= 0x05,
 	ARMV7_PERFCTR_DREAD			= 0x06,
 	ARMV7_PERFCTR_DREAD			= 0x06,
 	ARMV7_PERFCTR_DWRITE			= 0x07,
 	ARMV7_PERFCTR_DWRITE			= 0x07,
-
+	ARMV7_PERFCTR_INSTR_EXECUTED		= 0x08,
 	ARMV7_PERFCTR_EXC_TAKEN			= 0x09,
 	ARMV7_PERFCTR_EXC_TAKEN			= 0x09,
 	ARMV7_PERFCTR_EXC_EXECUTED		= 0x0A,
 	ARMV7_PERFCTR_EXC_EXECUTED		= 0x0A,
 	ARMV7_PERFCTR_CID_WRITE			= 0x0B,
 	ARMV7_PERFCTR_CID_WRITE			= 0x0B,
@@ -39,21 +45,30 @@ enum armv7_perf_types {
 	 */
 	 */
 	ARMV7_PERFCTR_PC_WRITE			= 0x0C,
 	ARMV7_PERFCTR_PC_WRITE			= 0x0C,
 	ARMV7_PERFCTR_PC_IMM_BRANCH		= 0x0D,
 	ARMV7_PERFCTR_PC_IMM_BRANCH		= 0x0D,
+	ARMV7_PERFCTR_PC_PROC_RETURN		= 0x0E,
 	ARMV7_PERFCTR_UNALIGNED_ACCESS		= 0x0F,
 	ARMV7_PERFCTR_UNALIGNED_ACCESS		= 0x0F,
+
+	/* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */
 	ARMV7_PERFCTR_PC_BRANCH_MIS_PRED	= 0x10,
 	ARMV7_PERFCTR_PC_BRANCH_MIS_PRED	= 0x10,
 	ARMV7_PERFCTR_CLOCK_CYCLES		= 0x11,
 	ARMV7_PERFCTR_CLOCK_CYCLES		= 0x11,
-
-	ARMV7_PERFCTR_PC_BRANCH_MIS_USED	= 0x12,
+	ARMV7_PERFCTR_PC_BRANCH_PRED		= 0x12,
+	ARMV7_PERFCTR_MEM_ACCESS		= 0x13,
+	ARMV7_PERFCTR_L1_ICACHE_ACCESS		= 0x14,
+	ARMV7_PERFCTR_L1_DCACHE_WB		= 0x15,
+	ARMV7_PERFCTR_L2_DCACHE_ACCESS		= 0x16,
+	ARMV7_PERFCTR_L2_DCACHE_REFILL		= 0x17,
+	ARMV7_PERFCTR_L2_DCACHE_WB		= 0x18,
+	ARMV7_PERFCTR_BUS_ACCESS		= 0x19,
+	ARMV7_PERFCTR_MEMORY_ERROR		= 0x1A,
+	ARMV7_PERFCTR_INSTR_SPEC		= 0x1B,
+	ARMV7_PERFCTR_TTBR_WRITE		= 0x1C,
+	ARMV7_PERFCTR_BUS_CYCLES		= 0x1D,
 
 
 	ARMV7_PERFCTR_CPU_CYCLES		= 0xFF
 	ARMV7_PERFCTR_CPU_CYCLES		= 0xFF
 };
 };
 
 
 /* ARMv7 Cortex-A8 specific event types */
 /* ARMv7 Cortex-A8 specific event types */
 enum armv7_a8_perf_types {
 enum armv7_a8_perf_types {
-	ARMV7_PERFCTR_INSTR_EXECUTED		= 0x08,
-
-	ARMV7_PERFCTR_PC_PROC_RETURN		= 0x0E,
-
 	ARMV7_PERFCTR_WRITE_BUFFER_FULL		= 0x40,
 	ARMV7_PERFCTR_WRITE_BUFFER_FULL		= 0x40,
 	ARMV7_PERFCTR_L2_STORE_MERGED		= 0x41,
 	ARMV7_PERFCTR_L2_STORE_MERGED		= 0x41,
 	ARMV7_PERFCTR_L2_STORE_BUFF		= 0x42,
 	ARMV7_PERFCTR_L2_STORE_BUFF		= 0x42,
@@ -138,6 +153,39 @@ enum armv7_a9_perf_types {
 	ARMV7_PERFCTR_PLE_RQST_PROG		= 0xA5
 	ARMV7_PERFCTR_PLE_RQST_PROG		= 0xA5
 };
 };
 
 
+/* ARMv7 Cortex-A5 specific event types */
+enum armv7_a5_perf_types {
+	ARMV7_PERFCTR_IRQ_TAKEN			= 0x86,
+	ARMV7_PERFCTR_FIQ_TAKEN			= 0x87,
+
+	ARMV7_PERFCTR_EXT_MEM_RQST		= 0xc0,
+	ARMV7_PERFCTR_NC_EXT_MEM_RQST		= 0xc1,
+	ARMV7_PERFCTR_PREFETCH_LINEFILL		= 0xc2,
+	ARMV7_PERFCTR_PREFETCH_LINEFILL_DROP	= 0xc3,
+	ARMV7_PERFCTR_ENTER_READ_ALLOC		= 0xc4,
+	ARMV7_PERFCTR_READ_ALLOC		= 0xc5,
+
+	ARMV7_PERFCTR_STALL_SB_FULL		= 0xc9,
+};
+
+/* ARMv7 Cortex-A15 specific event types */
+enum armv7_a15_perf_types {
+	ARMV7_PERFCTR_L1_DCACHE_READ_ACCESS	= 0x40,
+	ARMV7_PERFCTR_L1_DCACHE_WRITE_ACCESS	= 0x41,
+	ARMV7_PERFCTR_L1_DCACHE_READ_REFILL	= 0x42,
+	ARMV7_PERFCTR_L1_DCACHE_WRITE_REFILL	= 0x43,
+
+	ARMV7_PERFCTR_L1_DTLB_READ_REFILL	= 0x4C,
+	ARMV7_PERFCTR_L1_DTLB_WRITE_REFILL	= 0x4D,
+
+	ARMV7_PERFCTR_L2_DCACHE_READ_ACCESS	= 0x50,
+	ARMV7_PERFCTR_L2_DCACHE_WRITE_ACCESS	= 0x51,
+	ARMV7_PERFCTR_L2_DCACHE_READ_REFILL	= 0x52,
+	ARMV7_PERFCTR_L2_DCACHE_WRITE_REFILL	= 0x53,
+
+	ARMV7_PERFCTR_SPEC_PC_WRITE		= 0x76,
+};
+
 /*
 /*
  * Cortex-A8 HW events mapping
  * Cortex-A8 HW events mapping
  *
  *
@@ -207,11 +255,6 @@ static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 		},
 		},
 	},
 	},
 	[C(DTLB)] = {
 	[C(DTLB)] = {
-		/*
-		 * Only ITLB misses and DTLB refills are supported.
-		 * If users want the DTLB refills misses a raw counter
-		 * must be used.
-		 */
 		[C(OP_READ)] = {
 		[C(OP_READ)] = {
 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
 			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
@@ -337,11 +380,6 @@ static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 		},
 		},
 	},
 	},
 	[C(DTLB)] = {
 	[C(DTLB)] = {
-		/*
-		 * Only ITLB misses and DTLB refills are supported.
-		 * If users want the DTLB refills misses a raw counter
-		 * must be used.
-		 */
 		[C(OP_READ)] = {
 		[C(OP_READ)] = {
 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
 			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
@@ -401,6 +439,242 @@ static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	},
 	},
 };
 };
 
 
+/*
+ * Cortex-A5 HW events mapping
+ */
+static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
+};
+
+static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]
+					= ARMV7_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_DCACHE_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]
+					= ARMV7_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_DCACHE_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]
+					= ARMV7_PERFCTR_PREFETCH_LINEFILL,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PREFETCH_LINEFILL_DROP,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_IFETCH_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_IFETCH_MISS,
+		},
+		/*
+		 * The prefetch counters don't differentiate between the I
+		 * side and the D side.
+		 */
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]
+					= ARMV7_PERFCTR_PREFETCH_LINEFILL,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PREFETCH_LINEFILL_DROP,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+/*
+ * Cortex-A15 HW events mapping
+ */
+static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_SPEC_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]
+					= ARMV7_PERFCTR_L1_DCACHE_READ_ACCESS,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_L1_DCACHE_READ_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]
+					= ARMV7_PERFCTR_L1_DCACHE_WRITE_ACCESS,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_L1_DCACHE_WRITE_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		/*
+		 * Not all performance counters differentiate between read
+		 * and write accesses/misses so we're not always strictly
+		 * correct, but it's the best we can do. Writes and reads get
+		 * combined in these cases.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_IFETCH_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_IFETCH_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]
+					= ARMV7_PERFCTR_L2_DCACHE_READ_ACCESS,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_L2_DCACHE_READ_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]
+					= ARMV7_PERFCTR_L2_DCACHE_WRITE_ACCESS,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_L2_DCACHE_WRITE_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_L1_DTLB_READ_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_L1_DTLB_WRITE_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_BRANCH_PRED,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
 /*
 /*
  * Perf Events counters
  * Perf Events counters
  */
  */
@@ -933,6 +1207,26 @@ static const struct arm_pmu *__init armv7_a9_pmu_init(void)
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 	return &armv7pmu;
 }
 }
+
+static const struct arm_pmu *__init armv7_a5_pmu_init(void)
+{
+	armv7pmu.id		= ARM_PERF_PMU_ID_CA5;
+	armv7pmu.name		= "ARMv7 Cortex-A5";
+	armv7pmu.cache_map	= &armv7_a5_perf_cache_map;
+	armv7pmu.event_map	= &armv7_a5_perf_map;
+	armv7pmu.num_events	= armv7_read_num_pmnc_events();
+	return &armv7pmu;
+}
+
+static const struct arm_pmu *__init armv7_a15_pmu_init(void)
+{
+	armv7pmu.id		= ARM_PERF_PMU_ID_CA15;
+	armv7pmu.name		= "ARMv7 Cortex-A15";
+	armv7pmu.cache_map	= &armv7_a15_perf_cache_map;
+	armv7pmu.event_map	= &armv7_a15_perf_map;
+	armv7pmu.num_events	= armv7_read_num_pmnc_events();
+	return &armv7pmu;
+}
 #else
 #else
 static const struct arm_pmu *__init armv7_a8_pmu_init(void)
 static const struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
 {
@@ -943,4 +1237,14 @@ static const struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
 {
 	return NULL;
 	return NULL;
 }
 }
+
+static const struct arm_pmu *__init armv7_a5_pmu_init(void)
+{
+	return NULL;
+}
+
+static const struct arm_pmu *__init armv7_a15_pmu_init(void)
+{
+	return NULL;
+}
 #endif	/* CONFIG_CPU_V7 */
 #endif	/* CONFIG_CPU_V7 */

+ 70 - 17
arch/arm/kernel/pmu.c

@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/platform_device.h>
 
 
 #include <asm/pmu.h>
 #include <asm/pmu.h>
@@ -25,36 +26,88 @@ static volatile long pmu_lock;
 
 
 static struct platform_device *pmu_devices[ARM_NUM_PMU_DEVICES];
 static struct platform_device *pmu_devices[ARM_NUM_PMU_DEVICES];
 
 
-static int __devinit pmu_device_probe(struct platform_device *pdev)
+static int __devinit pmu_register(struct platform_device *pdev,
+					enum arm_pmu_type type)
 {
 {
-
-	if (pdev->id < 0 || pdev->id >= ARM_NUM_PMU_DEVICES) {
+	if (type < 0 || type >= ARM_NUM_PMU_DEVICES) {
 		pr_warning("received registration request for unknown "
 		pr_warning("received registration request for unknown "
-				"device %d\n", pdev->id);
+				"device %d\n", type);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	if (pmu_devices[pdev->id])
-		pr_warning("registering new PMU device type %d overwrites "
-				"previous registration!\n", pdev->id);
-	else
-		pr_info("registered new PMU device of type %d\n",
-				pdev->id);
+	if (pmu_devices[type]) {
+		pr_warning("rejecting duplicate registration of PMU device "
+			"type %d.", type);
+		return -ENOSPC;
+	}
 
 
-	pmu_devices[pdev->id] = pdev;
+	pr_info("registered new PMU device of type %d\n", type);
+	pmu_devices[type] = pdev;
 	return 0;
 	return 0;
 }
 }
 
 
-static struct platform_driver pmu_driver = {
+#define OF_MATCH_PMU(_name, _type) {	\
+	.compatible = _name,		\
+	.data = (void *)_type,		\
+}
+
+#define OF_MATCH_CPU(name)	OF_MATCH_PMU(name, ARM_PMU_DEVICE_CPU)
+
+static struct of_device_id armpmu_of_device_ids[] = {
+	OF_MATCH_CPU("arm,cortex-a9-pmu"),
+	OF_MATCH_CPU("arm,cortex-a8-pmu"),
+	OF_MATCH_CPU("arm,arm1136-pmu"),
+	OF_MATCH_CPU("arm,arm1176-pmu"),
+	{},
+};
+
+#define PLAT_MATCH_PMU(_name, _type) {	\
+	.name		= _name,	\
+	.driver_data	= _type,	\
+}
+
+#define PLAT_MATCH_CPU(_name)	PLAT_MATCH_PMU(_name, ARM_PMU_DEVICE_CPU)
+
+static struct platform_device_id armpmu_plat_device_ids[] = {
+	PLAT_MATCH_CPU("arm-pmu"),
+	{},
+};
+
+enum arm_pmu_type armpmu_device_type(struct platform_device *pdev)
+{
+	const struct of_device_id	*of_id;
+	const struct platform_device_id *pdev_id;
+
+	/* provided by of_device_id table */
+	if (pdev->dev.of_node) {
+		of_id = of_match_device(armpmu_of_device_ids, &pdev->dev);
+		BUG_ON(!of_id);
+		return (enum arm_pmu_type)of_id->data;
+	}
+
+	/* Provided by platform_device_id table */
+	pdev_id = platform_get_device_id(pdev);
+	BUG_ON(!pdev_id);
+	return pdev_id->driver_data;
+}
+
+static int __devinit armpmu_device_probe(struct platform_device *pdev)
+{
+	return pmu_register(pdev, armpmu_device_type(pdev));
+}
+
+static struct platform_driver armpmu_driver = {
 	.driver		= {
 	.driver		= {
 		.name	= "arm-pmu",
 		.name	= "arm-pmu",
+		.of_match_table = armpmu_of_device_ids,
 	},
 	},
-	.probe		= pmu_device_probe,
+	.probe		= armpmu_device_probe,
+	.id_table	= armpmu_plat_device_ids,
 };
 };
 
 
 static int __init register_pmu_driver(void)
 static int __init register_pmu_driver(void)
 {
 {
-	return platform_driver_register(&pmu_driver);
+	return platform_driver_register(&armpmu_driver);
 }
 }
 device_initcall(register_pmu_driver);
 device_initcall(register_pmu_driver);
 
 
@@ -77,11 +130,11 @@ reserve_pmu(enum arm_pmu_type device)
 EXPORT_SYMBOL_GPL(reserve_pmu);
 EXPORT_SYMBOL_GPL(reserve_pmu);
 
 
 int
 int
-release_pmu(struct platform_device *pdev)
+release_pmu(enum arm_pmu_type device)
 {
 {
-	if (WARN_ON(pdev != pmu_devices[pdev->id]))
+	if (WARN_ON(!pmu_devices[device]))
 		return -EINVAL;
 		return -EINVAL;
-	clear_bit_unlock(pdev->id, &pmu_lock);
+	clear_bit_unlock(device, &pmu_lock);
 	return 0;
 	return 0;
 }
 }
 EXPORT_SYMBOL_GPL(release_pmu);
 EXPORT_SYMBOL_GPL(release_pmu);

+ 3 - 25
arch/arm/kernel/ptrace.c

@@ -228,34 +228,12 @@ static struct undef_hook thumb_break_hook = {
 	.fn		= break_trap,
 	.fn		= break_trap,
 };
 };
 
 
-static int thumb2_break_trap(struct pt_regs *regs, unsigned int instr)
-{
-	unsigned int instr2;
-	void __user *pc;
-
-	/* Check the second half of the instruction.  */
-	pc = (void __user *)(instruction_pointer(regs) + 2);
-
-	if (processor_mode(regs) == SVC_MODE) {
-		instr2 = *(u16 *) pc;
-	} else {
-		get_user(instr2, (u16 __user *)pc);
-	}
-
-	if (instr2 == 0xa000) {
-		ptrace_break(current, regs);
-		return 0;
-	} else {
-		return 1;
-	}
-}
-
 static struct undef_hook thumb2_break_hook = {
 static struct undef_hook thumb2_break_hook = {
-	.instr_mask	= 0xffff,
-	.instr_val	= 0xf7f0,
+	.instr_mask	= 0xffffffff,
+	.instr_val	= 0xf7f0a000,
 	.cpsr_mask	= PSR_T_BIT,
 	.cpsr_mask	= PSR_T_BIT,
 	.cpsr_val	= PSR_T_BIT,
 	.cpsr_val	= PSR_T_BIT,
-	.fn		= thumb2_break_trap,
+	.fn		= break_trap,
 };
 };
 
 
 static int __init ptrace_break_init(void)
 static int __init ptrace_break_init(void)

+ 60 - 49
arch/arm/kernel/setup.c

@@ -343,54 +343,6 @@ static void __init feat_v6_fixup(void)
 		elf_hwcap &= ~HWCAP_TLS;
 		elf_hwcap &= ~HWCAP_TLS;
 }
 }
 
 
-static void __init setup_processor(void)
-{
-	struct proc_info_list *list;
-
-	/*
-	 * locate processor in the list of supported processor
-	 * types.  The linker builds this table for us from the
-	 * entries in arch/arm/mm/proc-*.S
-	 */
-	list = lookup_processor_type(read_cpuid_id());
-	if (!list) {
-		printk("CPU configuration botched (ID %08x), unable "
-		       "to continue.\n", read_cpuid_id());
-		while (1);
-	}
-
-	cpu_name = list->cpu_name;
-
-#ifdef MULTI_CPU
-	processor = *list->proc;
-#endif
-#ifdef MULTI_TLB
-	cpu_tlb = *list->tlb;
-#endif
-#ifdef MULTI_USER
-	cpu_user = *list->user;
-#endif
-#ifdef MULTI_CACHE
-	cpu_cache = *list->cache;
-#endif
-
-	printk("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n",
-	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15,
-	       proc_arch[cpu_architecture()], cr_alignment);
-
-	sprintf(init_utsname()->machine, "%s%c", list->arch_name, ENDIANNESS);
-	sprintf(elf_platform, "%s%c", list->elf_name, ENDIANNESS);
-	elf_hwcap = list->elf_hwcap;
-#ifndef CONFIG_ARM_THUMB
-	elf_hwcap &= ~HWCAP_THUMB;
-#endif
-
-	feat_v6_fixup();
-
-	cacheid_init();
-	cpu_proc_init();
-}
-
 /*
 /*
  * cpu_init - initialise one CPU.
  * cpu_init - initialise one CPU.
  *
  *
@@ -406,6 +358,8 @@ void cpu_init(void)
 		BUG();
 		BUG();
 	}
 	}
 
 
+	cpu_proc_init();
+
 	/*
 	/*
 	 * Define the placement constraint for the inline asm directive below.
 	 * Define the placement constraint for the inline asm directive below.
 	 * In Thumb-2, msr with an immediate value is not allowed.
 	 * In Thumb-2, msr with an immediate value is not allowed.
@@ -442,6 +396,54 @@ void cpu_init(void)
 	    : "r14");
 	    : "r14");
 }
 }
 
 
+static void __init setup_processor(void)
+{
+	struct proc_info_list *list;
+
+	/*
+	 * locate processor in the list of supported processor
+	 * types.  The linker builds this table for us from the
+	 * entries in arch/arm/mm/proc-*.S
+	 */
+	list = lookup_processor_type(read_cpuid_id());
+	if (!list) {
+		printk("CPU configuration botched (ID %08x), unable "
+		       "to continue.\n", read_cpuid_id());
+		while (1);
+	}
+
+	cpu_name = list->cpu_name;
+
+#ifdef MULTI_CPU
+	processor = *list->proc;
+#endif
+#ifdef MULTI_TLB
+	cpu_tlb = *list->tlb;
+#endif
+#ifdef MULTI_USER
+	cpu_user = *list->user;
+#endif
+#ifdef MULTI_CACHE
+	cpu_cache = *list->cache;
+#endif
+
+	printk("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n",
+	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15,
+	       proc_arch[cpu_architecture()], cr_alignment);
+
+	sprintf(init_utsname()->machine, "%s%c", list->arch_name, ENDIANNESS);
+	sprintf(elf_platform, "%s%c", list->elf_name, ENDIANNESS);
+	elf_hwcap = list->elf_hwcap;
+#ifndef CONFIG_ARM_THUMB
+	elf_hwcap &= ~HWCAP_THUMB;
+#endif
+
+	feat_v6_fixup();
+
+	cacheid_init();
+	cpu_init();
+}
+
 void __init dump_machine_table(void)
 void __init dump_machine_table(void)
 {
 {
 	struct machine_desc *p;
 	struct machine_desc *p;
@@ -915,9 +917,14 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #endif
 	reserve_crashkernel();
 	reserve_crashkernel();
 
 
-	cpu_init();
 	tcm_init();
 	tcm_init();
 
 
+#ifdef CONFIG_ZONE_DMA
+	if (mdesc->dma_zone_size) {
+		extern unsigned long arm_dma_zone_size;
+		arm_dma_zone_size = mdesc->dma_zone_size;
+	}
+#endif
 #ifdef CONFIG_MULTI_IRQ_HANDLER
 #ifdef CONFIG_MULTI_IRQ_HANDLER
 	handle_arch_irq = mdesc->handle_irq;
 	handle_arch_irq = mdesc->handle_irq;
 #endif
 #endif
@@ -979,6 +986,10 @@ static const char *hwcap_str[] = {
 	"neon",
 	"neon",
 	"vfpv3",
 	"vfpv3",
 	"vfpv3d16",
 	"vfpv3d16",
+	"tls",
+	"vfpv4",
+	"idiva",
+	"idivt",
 	NULL
 	NULL
 };
 };
 
 

+ 37 - 47
arch/arm/kernel/sleep.S

@@ -10,64 +10,61 @@
 /*
 /*
  * Save CPU state for a suspend
  * Save CPU state for a suspend
  *  r1 = v:p offset
  *  r1 = v:p offset
- *  r3 = virtual return function
- * Note: sp is decremented to allocate space for CPU state on stack
- * r0-r3,r9,r10,lr corrupted
+ *  r2 = suspend function arg0
+ *  r3 = suspend function
  */
  */
-ENTRY(cpu_suspend)
-	mov	r9, lr
+ENTRY(__cpu_suspend)
+	stmfd	sp!, {r4 - r11, lr}
 #ifdef MULTI_CPU
 #ifdef MULTI_CPU
 	ldr	r10, =processor
 	ldr	r10, =processor
-	mov	r2, sp			@ current virtual SP
-	ldr	r0, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
+	ldr	r5, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
 	ldr	ip, [r10, #CPU_DO_RESUME] @ virtual resume function
 	ldr	ip, [r10, #CPU_DO_RESUME] @ virtual resume function
-	sub	sp, sp, r0		@ allocate CPU state on stack
-	mov	r0, sp			@ save pointer
+#else
+	ldr	r5, =cpu_suspend_size
+	ldr	ip, =cpu_do_resume
+#endif
+	mov	r6, sp			@ current virtual SP
+	sub	sp, sp, r5		@ allocate CPU state on stack
+	mov	r0, sp			@ save pointer to CPU save block
 	add	ip, ip, r1		@ convert resume fn to phys
 	add	ip, ip, r1		@ convert resume fn to phys
-	stmfd	sp!, {r1, r2, r3, ip}	@ save v:p, virt SP, retfn, phys resume fn
-	ldr	r3, =sleep_save_sp
-	add	r2, sp, r1		@ convert SP to phys
+	stmfd	sp!, {r1, r6, ip}	@ save v:p, virt SP, phys resume fn
+	ldr	r5, =sleep_save_sp
+	add	r6, sp, r1		@ convert SP to phys
+	stmfd	sp!, {r2, r3}		@ save suspend func arg and pointer
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
 	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
 	ALT_UP(mov lr, #0)
 	ALT_UP(mov lr, #0)
 	and	lr, lr, #15
 	and	lr, lr, #15
-	str	r2, [r3, lr, lsl #2]	@ save phys SP
+	str	r6, [r5, lr, lsl #2]	@ save phys SP
 #else
 #else
-	str	r2, [r3]		@ save phys SP
+	str	r6, [r5]		@ save phys SP
 #endif
 #endif
+#ifdef MULTI_CPU
 	mov	lr, pc
 	mov	lr, pc
 	ldr	pc, [r10, #CPU_DO_SUSPEND] @ save CPU state
 	ldr	pc, [r10, #CPU_DO_SUSPEND] @ save CPU state
 #else
 #else
-	mov	r2, sp			@ current virtual SP
-	ldr	r0, =cpu_suspend_size
-	sub	sp, sp, r0		@ allocate CPU state on stack
-	mov	r0, sp			@ save pointer
-	stmfd	sp!, {r1, r2, r3}	@ save v:p, virt SP, return fn
-	ldr	r3, =sleep_save_sp
-	add	r2, sp, r1		@ convert SP to phys
-#ifdef CONFIG_SMP
-	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
-	ALT_UP(mov lr, #0)
-	and	lr, lr, #15
-	str	r2, [r3, lr, lsl #2]	@ save phys SP
-#else
-	str	r2, [r3]		@ save phys SP
-#endif
 	bl	cpu_do_suspend
 	bl	cpu_do_suspend
 #endif
 #endif
 
 
 	@ flush data cache
 	@ flush data cache
 #ifdef MULTI_CACHE
 #ifdef MULTI_CACHE
 	ldr	r10, =cpu_cache
 	ldr	r10, =cpu_cache
-	mov	lr, r9
+	mov	lr, pc
 	ldr	pc, [r10, #CACHE_FLUSH_KERN_ALL]
 	ldr	pc, [r10, #CACHE_FLUSH_KERN_ALL]
 #else
 #else
-	mov	lr, r9
-	b	__cpuc_flush_kern_all
+	bl	__cpuc_flush_kern_all
 #endif
 #endif
-ENDPROC(cpu_suspend)
+	adr	lr, BSYM(cpu_suspend_abort)
+	ldmfd	sp!, {r0, pc}		@ call suspend fn
+ENDPROC(__cpu_suspend)
 	.ltorg
 	.ltorg
 
 
+cpu_suspend_abort:
+	ldmia	sp!, {r1 - r3}		@ pop v:p, virt SP, phys resume fn
+	mov	sp, r2
+	ldmfd	sp!, {r4 - r11, pc}
+ENDPROC(cpu_suspend_abort)
+
 /*
 /*
  * r0 = control register value
  * r0 = control register value
  * r1 = v:p offset (preserved by cpu_do_resume)
  * r1 = v:p offset (preserved by cpu_do_resume)
@@ -97,7 +94,9 @@ ENDPROC(cpu_resume_turn_mmu_on)
 cpu_resume_after_mmu:
 cpu_resume_after_mmu:
 	str	r5, [r2, r4, lsl #2]	@ restore old mapping
 	str	r5, [r2, r4, lsl #2]	@ restore old mapping
 	mcr	p15, 0, r0, c1, c0, 0	@ turn on D-cache
 	mcr	p15, 0, r0, c1, c0, 0	@ turn on D-cache
-	mov	pc, lr
+	bl	cpu_init		@ restore the und/abt/irq banked regs
+	mov	r0, #0			@ return zero on success
+	ldmfd	sp!, {r4 - r11, pc}
 ENDPROC(cpu_resume_after_mmu)
 ENDPROC(cpu_resume_after_mmu)
 
 
 /*
 /*
@@ -120,20 +119,11 @@ ENTRY(cpu_resume)
 	ldr	r0, sleep_save_sp	@ stack phys addr
 	ldr	r0, sleep_save_sp	@ stack phys addr
 #endif
 #endif
 	setmode	PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1  @ set SVC, irqs off
 	setmode	PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1  @ set SVC, irqs off
-#ifdef MULTI_CPU
-	@ load v:p, stack, return fn, resume fn
-  ARM(	ldmia	r0!, {r1, sp, lr, pc}	)
-THUMB(	ldmia	r0!, {r1, r2, r3, r4}	)
+	@ load v:p, stack, resume fn
+  ARM(	ldmia	r0!, {r1, sp, pc}	)
+THUMB(	ldmia	r0!, {r1, r2, r3}	)
 THUMB(	mov	sp, r2			)
 THUMB(	mov	sp, r2			)
-THUMB(	mov	lr, r3			)
-THUMB(	bx	r4			)
-#else
-	@ load v:p, stack, return fn
-  ARM(	ldmia	r0!, {r1, sp, lr}	)
-THUMB(	ldmia	r0!, {r1, r2, lr}	)
-THUMB(	mov	sp, r2			)
-	b	cpu_do_resume
-#endif
+THUMB(	bx	r3			)
 ENDPROC(cpu_resume)
 ENDPROC(cpu_resume)
 
 
 sleep_save_sp:
 sleep_save_sp:

+ 9 - 2
arch/arm/kernel/smp.c

@@ -365,14 +365,21 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	 */
 	 */
 	if (max_cpus > ncores)
 	if (max_cpus > ncores)
 		max_cpus = ncores;
 		max_cpus = ncores;
-
-	if (max_cpus > 1) {
+	if (ncores > 1 && max_cpus) {
 		/*
 		/*
 		 * Enable the local timer or broadcast device for the
 		 * Enable the local timer or broadcast device for the
 		 * boot CPU, but only if we have more than one CPU.
 		 * boot CPU, but only if we have more than one CPU.
 		 */
 		 */
 		percpu_timer_setup();
 		percpu_timer_setup();
 
 
+		/*
+		 * Initialise the present map, which describes the set of CPUs
+		 * actually populated at the present time. A platform should
+		 * re-initialize the map in platform_smp_prepare_cpus() if
+		 * present != possible (e.g. physical hotplug).
+		 */
+		init_cpu_present(&cpu_possible_map);
+
 		/*
 		/*
 		 * Initialise the SCU if there are more than one CPU
 		 * Initialise the SCU if there are more than one CPU
 		 * and let them know where to start.
 		 * and let them know where to start.

+ 2 - 0
arch/arm/kernel/smp_scu.c

@@ -20,6 +20,7 @@
 #define SCU_INVALIDATE		0x0c
 #define SCU_INVALIDATE		0x0c
 #define SCU_FPGA_REVISION	0x10
 #define SCU_FPGA_REVISION	0x10
 
 
+#ifdef CONFIG_SMP
 /*
 /*
  * Get the number of CPU cores from the SCU configuration
  * Get the number of CPU cores from the SCU configuration
  */
  */
@@ -50,6 +51,7 @@ void __init scu_enable(void __iomem *scu_base)
 	 */
 	 */
 	flush_cache_all();
 	flush_cache_all();
 }
 }
+#endif
 
 
 /*
 /*
  * Set the executing CPUs power mode as defined.  This will be in
  * Set the executing CPUs power mode as defined.  This will be in

+ 57 - 11
arch/arm/kernel/tcm.c

@@ -19,6 +19,8 @@
 #include "tcm.h"
 #include "tcm.h"
 
 
 static struct gen_pool *tcm_pool;
 static struct gen_pool *tcm_pool;
+static bool dtcm_present;
+static bool itcm_present;
 
 
 /* TCM section definitions from the linker */
 /* TCM section definitions from the linker */
 extern char __itcm_start, __sitcm_text, __eitcm_text;
 extern char __itcm_start, __sitcm_text, __eitcm_text;
@@ -90,6 +92,18 @@ void tcm_free(void *addr, size_t len)
 }
 }
 EXPORT_SYMBOL(tcm_free);
 EXPORT_SYMBOL(tcm_free);
 
 
+bool tcm_dtcm_present(void)
+{
+	return dtcm_present;
+}
+EXPORT_SYMBOL(tcm_dtcm_present);
+
+bool tcm_itcm_present(void)
+{
+	return itcm_present;
+}
+EXPORT_SYMBOL(tcm_itcm_present);
+
 static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
 static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
 				  u32 *offset)
 				  u32 *offset)
 {
 {
@@ -134,6 +148,10 @@ static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
 			(tcm_region & 1) ? "" : "not ");
 			(tcm_region & 1) ? "" : "not ");
 	}
 	}
 
 
+	/* Not much fun you can do with a size 0 bank */
+	if (tcm_size == 0)
+		return 0;
+
 	/* Force move the TCM bank to where we want it, enable */
 	/* Force move the TCM bank to where we want it, enable */
 	tcm_region = *offset | (tcm_region & 0x00000ffeU) | 1;
 	tcm_region = *offset | (tcm_region & 0x00000ffeU) | 1;
 
 
@@ -165,12 +183,20 @@ void __init tcm_init(void)
 	u32 tcm_status = read_cpuid_tcmstatus();
 	u32 tcm_status = read_cpuid_tcmstatus();
 	u8 dtcm_banks = (tcm_status >> 16) & 0x03;
 	u8 dtcm_banks = (tcm_status >> 16) & 0x03;
 	u8 itcm_banks = (tcm_status & 0x03);
 	u8 itcm_banks = (tcm_status & 0x03);
+	size_t dtcm_code_sz = &__edtcm_data - &__sdtcm_data;
+	size_t itcm_code_sz = &__eitcm_text - &__sitcm_text;
 	char *start;
 	char *start;
 	char *end;
 	char *end;
 	char *ram;
 	char *ram;
 	int ret;
 	int ret;
 	int i;
 	int i;
 
 
+	/* Values greater than 2 for D/ITCM banks are "reserved" */
+	if (dtcm_banks > 2)
+		dtcm_banks = 0;
+	if (itcm_banks > 2)
+		itcm_banks = 0;
+
 	/* Setup DTCM if present */
 	/* Setup DTCM if present */
 	if (dtcm_banks > 0) {
 	if (dtcm_banks > 0) {
 		for (i = 0; i < dtcm_banks; i++) {
 		for (i = 0; i < dtcm_banks; i++) {
@@ -178,6 +204,13 @@ void __init tcm_init(void)
 			if (ret)
 			if (ret)
 				return;
 				return;
 		}
 		}
+		/* This means you compiled more code than fits into DTCM */
+		if (dtcm_code_sz > (dtcm_end - DTCM_OFFSET)) {
+			pr_info("CPU DTCM: %u bytes of code compiled to "
+				"DTCM but only %lu bytes of DTCM present\n",
+				dtcm_code_sz, (dtcm_end - DTCM_OFFSET));
+			goto no_dtcm;
+		}
 		dtcm_res.end = dtcm_end - 1;
 		dtcm_res.end = dtcm_end - 1;
 		request_resource(&iomem_resource, &dtcm_res);
 		request_resource(&iomem_resource, &dtcm_res);
 		dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET;
 		dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET;
@@ -186,12 +219,16 @@ void __init tcm_init(void)
 		start = &__sdtcm_data;
 		start = &__sdtcm_data;
 		end   = &__edtcm_data;
 		end   = &__edtcm_data;
 		ram   = &__dtcm_start;
 		ram   = &__dtcm_start;
-		/* This means you compiled more code than fits into DTCM */
-		BUG_ON((end - start) > (dtcm_end - DTCM_OFFSET));
-		memcpy(start, ram, (end-start));
-		pr_debug("CPU DTCM: copied data from %p - %p\n", start, end);
+		memcpy(start, ram, dtcm_code_sz);
+		pr_debug("CPU DTCM: copied data from %p - %p\n",
+			 start, end);
+		dtcm_present = true;
+	} else if (dtcm_code_sz) {
+		pr_info("CPU DTCM: %u bytes of code compiled to DTCM but no "
+			"DTCM banks present in CPU\n", dtcm_code_sz);
 	}
 	}
 
 
+no_dtcm:
 	/* Setup ITCM if present */
 	/* Setup ITCM if present */
 	if (itcm_banks > 0) {
 	if (itcm_banks > 0) {
 		for (i = 0; i < itcm_banks; i++) {
 		for (i = 0; i < itcm_banks; i++) {
@@ -199,6 +236,13 @@ void __init tcm_init(void)
 			if (ret)
 			if (ret)
 				return;
 				return;
 		}
 		}
+		/* This means you compiled more code than fits into ITCM */
+		if (itcm_code_sz > (itcm_end - ITCM_OFFSET)) {
+			pr_info("CPU ITCM: %u bytes of code compiled to "
+				"ITCM but only %lu bytes of ITCM present\n",
+				itcm_code_sz, (itcm_end - ITCM_OFFSET));
+			return;
+		}
 		itcm_res.end = itcm_end - 1;
 		itcm_res.end = itcm_end - 1;
 		request_resource(&iomem_resource, &itcm_res);
 		request_resource(&iomem_resource, &itcm_res);
 		itcm_iomap[0].length = itcm_end - ITCM_OFFSET;
 		itcm_iomap[0].length = itcm_end - ITCM_OFFSET;
@@ -207,10 +251,13 @@ void __init tcm_init(void)
 		start = &__sitcm_text;
 		start = &__sitcm_text;
 		end   = &__eitcm_text;
 		end   = &__eitcm_text;
 		ram   = &__itcm_start;
 		ram   = &__itcm_start;
-		/* This means you compiled more code than fits into ITCM */
-		BUG_ON((end - start) > (itcm_end - ITCM_OFFSET));
-		memcpy(start, ram, (end-start));
-		pr_debug("CPU ITCM: copied code from %p - %p\n", start, end);
+		memcpy(start, ram, itcm_code_sz);
+		pr_debug("CPU ITCM: copied code from %p - %p\n",
+			 start, end);
+		itcm_present = true;
+	} else if (itcm_code_sz) {
+		pr_info("CPU ITCM: %u bytes of code compiled to ITCM but no "
+			"ITCM banks present in CPU\n", itcm_code_sz);
 	}
 	}
 }
 }
 
 
@@ -221,7 +268,6 @@ void __init tcm_init(void)
  */
  */
 static int __init setup_tcm_pool(void)
 static int __init setup_tcm_pool(void)
 {
 {
-	u32 tcm_status = read_cpuid_tcmstatus();
 	u32 dtcm_pool_start = (u32) &__edtcm_data;
 	u32 dtcm_pool_start = (u32) &__edtcm_data;
 	u32 itcm_pool_start = (u32) &__eitcm_text;
 	u32 itcm_pool_start = (u32) &__eitcm_text;
 	int ret;
 	int ret;
@@ -236,7 +282,7 @@ static int __init setup_tcm_pool(void)
 	pr_debug("Setting up TCM memory pool\n");
 	pr_debug("Setting up TCM memory pool\n");
 
 
 	/* Add the rest of DTCM to the TCM pool */
 	/* Add the rest of DTCM to the TCM pool */
-	if (tcm_status & (0x03 << 16)) {
+	if (dtcm_present) {
 		if (dtcm_pool_start < dtcm_end) {
 		if (dtcm_pool_start < dtcm_end) {
 			ret = gen_pool_add(tcm_pool, dtcm_pool_start,
 			ret = gen_pool_add(tcm_pool, dtcm_pool_start,
 					   dtcm_end - dtcm_pool_start, -1);
 					   dtcm_end - dtcm_pool_start, -1);
@@ -253,7 +299,7 @@ static int __init setup_tcm_pool(void)
 	}
 	}
 
 
 	/* Add the rest of ITCM to the TCM pool */
 	/* Add the rest of ITCM to the TCM pool */
-	if (tcm_status & 0x03) {
+	if (itcm_present) {
 		if (itcm_pool_start < itcm_end) {
 		if (itcm_pool_start < itcm_end) {
 			ret = gen_pool_add(tcm_pool, itcm_pool_start,
 			ret = gen_pool_add(tcm_pool, itcm_pool_start,
 					   itcm_end - itcm_pool_start, -1);
 					   itcm_end - itcm_pool_start, -1);

+ 16 - 1
arch/arm/kernel/traps.c

@@ -355,9 +355,24 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 	pc = (void __user *)instruction_pointer(regs);
 	pc = (void __user *)instruction_pointer(regs);
 
 
 	if (processor_mode(regs) == SVC_MODE) {
 	if (processor_mode(regs) == SVC_MODE) {
-		instr = *(u32 *) pc;
+#ifdef CONFIG_THUMB2_KERNEL
+		if (thumb_mode(regs)) {
+			instr = ((u16 *)pc)[0];
+			if (is_wide_instruction(instr)) {
+				instr <<= 16;
+				instr |= ((u16 *)pc)[1];
+			}
+		} else
+#endif
+			instr = *(u32 *) pc;
 	} else if (thumb_mode(regs)) {
 	} else if (thumb_mode(regs)) {
 		get_user(instr, (u16 __user *)pc);
 		get_user(instr, (u16 __user *)pc);
+		if (is_wide_instruction(instr)) {
+			unsigned int instr2;
+			get_user(instr2, (u16 __user *)pc+1);
+			instr <<= 16;
+			instr |= instr2;
+		}
 	} else {
 	} else {
 		get_user(instr, (u32 __user *)pc);
 		get_user(instr, (u32 __user *)pc);
 	}
 	}

+ 66 - 60
arch/arm/kernel/vmlinux.lds.S

@@ -38,57 +38,6 @@ jiffies = jiffies_64 + 4;
 
 
 SECTIONS
 SECTIONS
 {
 {
-#ifdef CONFIG_XIP_KERNEL
-	. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
-#else
-	. = PAGE_OFFSET + TEXT_OFFSET;
-#endif
-
-	.init : {			/* Init code and data		*/
-		_stext = .;
-		_sinittext = .;
-			HEAD_TEXT
-			INIT_TEXT
-			ARM_EXIT_KEEP(EXIT_TEXT)
-		_einittext = .;
-		ARM_CPU_DISCARD(PROC_INFO)
-		__arch_info_begin = .;
-			*(.arch.info.init)
-		__arch_info_end = .;
-		__tagtable_begin = .;
-			*(.taglist.init)
-		__tagtable_end = .;
-#ifdef CONFIG_SMP_ON_UP
-		__smpalt_begin = .;
-			*(.alt.smp.init)
-		__smpalt_end = .;
-#endif
-
-		__pv_table_begin = .;
-			*(.pv_table)
-		__pv_table_end = .;
-
-		INIT_SETUP(16)
-
-		INIT_CALLS
-		CON_INITCALL
-		SECURITY_INITCALL
-		INIT_RAM_FS
-
-#ifndef CONFIG_XIP_KERNEL
-		__init_begin = _stext;
-		INIT_DATA
-		ARM_EXIT_KEEP(EXIT_DATA)
-#endif
-	}
-
-	PERCPU_SECTION(32)
-
-#ifndef CONFIG_XIP_KERNEL
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-#endif
-
 	/*
 	/*
 	 * unwind exit sections must be discarded before the rest of the
 	 * unwind exit sections must be discarded before the rest of the
 	 * unwind sections get included.
 	 * unwind sections get included.
@@ -105,11 +54,23 @@ SECTIONS
 #ifndef CONFIG_MMU
 #ifndef CONFIG_MMU
 		*(.fixup)
 		*(.fixup)
 		*(__ex_table)
 		*(__ex_table)
+#endif
+#ifndef CONFIG_SMP_ON_UP
+		*(.alt.smp.init)
 #endif
 #endif
 	}
 	}
 
 
+#ifdef CONFIG_XIP_KERNEL
+	. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
+#else
+	. = PAGE_OFFSET + TEXT_OFFSET;
+#endif
+	.head.text : {
+		_text = .;
+		HEAD_TEXT
+	}
 	.text : {			/* Real text segment		*/
 	.text : {			/* Real text segment		*/
-		_text = .;		/* Text and read-only data	*/
+		_stext = .;		/* Text and read-only data	*/
 			__exception_text_start = .;
 			__exception_text_start = .;
 			*(.exception.text)
 			*(.exception.text)
 			__exception_text_end = .;
 			__exception_text_end = .;
@@ -122,8 +83,6 @@ SECTIONS
 			*(.fixup)
 			*(.fixup)
 #endif
 #endif
 			*(.gnu.warning)
 			*(.gnu.warning)
-			*(.rodata)
-			*(.rodata.*)
 			*(.glue_7)
 			*(.glue_7)
 			*(.glue_7t)
 			*(.glue_7t)
 		. = ALIGN(4);
 		. = ALIGN(4);
@@ -152,10 +111,63 @@ SECTIONS
 
 
 	_etext = .;			/* End of text and rodata section */
 	_etext = .;			/* End of text and rodata section */
 
 
+#ifndef CONFIG_XIP_KERNEL
+	. = ALIGN(PAGE_SIZE);
+	__init_begin = .;
+#endif
+
+	INIT_TEXT_SECTION(8)
+	.exit.text : {
+		ARM_EXIT_KEEP(EXIT_TEXT)
+	}
+	.init.proc.info : {
+		ARM_CPU_DISCARD(PROC_INFO)
+	}
+	.init.arch.info : {
+		__arch_info_begin = .;
+		*(.arch.info.init)
+		__arch_info_end = .;
+	}
+	.init.tagtable : {
+		__tagtable_begin = .;
+		*(.taglist.init)
+		__tagtable_end = .;
+	}
+#ifdef CONFIG_SMP_ON_UP
+	.init.smpalt : {
+		__smpalt_begin = .;
+		*(.alt.smp.init)
+		__smpalt_end = .;
+	}
+#endif
+	.init.pv_table : {
+		__pv_table_begin = .;
+		*(.pv_table)
+		__pv_table_end = .;
+	}
+	.init.data : {
+#ifndef CONFIG_XIP_KERNEL
+		INIT_DATA
+#endif
+		INIT_SETUP(16)
+		INIT_CALLS
+		CON_INITCALL
+		SECURITY_INITCALL
+		INIT_RAM_FS
+	}
+#ifndef CONFIG_XIP_KERNEL
+	.exit.data : {
+		ARM_EXIT_KEEP(EXIT_DATA)
+	}
+#endif
+
+	PERCPU_SECTION(32)
+
 #ifdef CONFIG_XIP_KERNEL
 #ifdef CONFIG_XIP_KERNEL
 	__data_loc = ALIGN(4);		/* location in binary */
 	__data_loc = ALIGN(4);		/* location in binary */
 	. = PAGE_OFFSET + TEXT_OFFSET;
 	. = PAGE_OFFSET + TEXT_OFFSET;
 #else
 #else
+	__init_end = .;
 	. = ALIGN(THREAD_SIZE);
 	. = ALIGN(THREAD_SIZE);
 	__data_loc = .;
 	__data_loc = .;
 #endif
 #endif
@@ -270,12 +282,6 @@ SECTIONS
 
 
 	/* Default discards */
 	/* Default discards */
 	DISCARDS
 	DISCARDS
-
-#ifndef CONFIG_SMP_ON_UP
-	/DISCARD/ : {
-		*(.alt.smp.init)
-	}
-#endif
 }
 }
 
 
 /*
 /*

+ 0 - 4
arch/arm/mach-bcmring/include/mach/entry-macro.S

@@ -80,7 +80,3 @@
 
 
 		.macro  arch_ret_to_user, tmp1, tmp2
 		.macro  arch_ret_to_user, tmp1, tmp2
 		.endm
 		.endm
-
-		.macro	irq_prio_table
-		.endm
-

+ 1 - 0
arch/arm/mach-davinci/board-da830-evm.c

@@ -681,4 +681,5 @@ MACHINE_START(DAVINCI_DA830_EVM, "DaVinci DA830/OMAP-L137/AM17x EVM")
 	.init_irq	= cp_intc_init,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,
 	.timer		= &davinci_timer,
 	.init_machine	= da830_evm_init,
 	.init_machine	= da830_evm_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END

+ 1 - 0
arch/arm/mach-davinci/board-da850-evm.c

@@ -1261,4 +1261,5 @@ MACHINE_START(DAVINCI_DA850_EVM, "DaVinci DA850/OMAP-L138/AM18x EVM")
 	.init_irq	= cp_intc_init,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,
 	.timer		= &davinci_timer,
 	.init_machine	= da850_evm_init,
 	.init_machine	= da850_evm_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END

+ 1 - 0
arch/arm/mach-davinci/board-dm355-evm.c

@@ -356,4 +356,5 @@ MACHINE_START(DAVINCI_DM355_EVM, "DaVinci DM355 EVM")
 	.init_irq     = davinci_irq_init,
 	.init_irq     = davinci_irq_init,
 	.timer	      = &davinci_timer,
 	.timer	      = &davinci_timer,
 	.init_machine = dm355_evm_init,
 	.init_machine = dm355_evm_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END

+ 1 - 0
arch/arm/mach-davinci/board-dm355-leopard.c

@@ -275,4 +275,5 @@ MACHINE_START(DM355_LEOPARD, "DaVinci DM355 leopard")
 	.init_irq     = davinci_irq_init,
 	.init_irq     = davinci_irq_init,
 	.timer	      = &davinci_timer,
 	.timer	      = &davinci_timer,
 	.init_machine = dm355_leopard_init,
 	.init_machine = dm355_leopard_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END

+ 1 - 0
arch/arm/mach-davinci/board-dm365-evm.c

@@ -617,5 +617,6 @@ MACHINE_START(DAVINCI_DM365_EVM, "DaVinci DM365 EVM")
 	.init_irq	= davinci_irq_init,
 	.init_irq	= davinci_irq_init,
 	.timer		= &davinci_timer,
 	.timer		= &davinci_timer,
 	.init_machine	= dm365_evm_init,
 	.init_machine	= dm365_evm_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END
 
 

+ 1 - 0
arch/arm/mach-davinci/board-dm644x-evm.c

@@ -717,4 +717,5 @@ MACHINE_START(DAVINCI_EVM, "DaVinci DM644x EVM")
 	.init_irq     = davinci_irq_init,
 	.init_irq     = davinci_irq_init,
 	.timer	      = &davinci_timer,
 	.timer	      = &davinci_timer,
 	.init_machine = davinci_evm_init,
 	.init_machine = davinci_evm_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END

+ 2 - 0
arch/arm/mach-davinci/board-dm646x-evm.c

@@ -802,6 +802,7 @@ MACHINE_START(DAVINCI_DM6467_EVM, "DaVinci DM646x EVM")
 	.init_irq     = davinci_irq_init,
 	.init_irq     = davinci_irq_init,
 	.timer        = &davinci_timer,
 	.timer        = &davinci_timer,
 	.init_machine = evm_init,
 	.init_machine = evm_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END
 
 
 MACHINE_START(DAVINCI_DM6467TEVM, "DaVinci DM6467T EVM")
 MACHINE_START(DAVINCI_DM6467TEVM, "DaVinci DM6467T EVM")
@@ -810,5 +811,6 @@ MACHINE_START(DAVINCI_DM6467TEVM, "DaVinci DM6467T EVM")
 	.init_irq     = davinci_irq_init,
 	.init_irq     = davinci_irq_init,
 	.timer        = &davinci_timer,
 	.timer        = &davinci_timer,
 	.init_machine = evm_init,
 	.init_machine = evm_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END
 
 

+ 1 - 0
arch/arm/mach-davinci/board-mityomapl138.c

@@ -571,4 +571,5 @@ MACHINE_START(MITYOMAPL138, "MityDSP-L138/MityARM-1808")
 	.init_irq	= cp_intc_init,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,
 	.timer		= &davinci_timer,
 	.init_machine	= mityomapl138_init,
 	.init_machine	= mityomapl138_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END

+ 1 - 0
arch/arm/mach-davinci/board-neuros-osd2.c

@@ -277,4 +277,5 @@ MACHINE_START(NEUROS_OSD2, "Neuros OSD2")
 	.init_irq	= davinci_irq_init,
 	.init_irq	= davinci_irq_init,
 	.timer		= &davinci_timer,
 	.timer		= &davinci_timer,
 	.init_machine = davinci_ntosd2_init,
 	.init_machine = davinci_ntosd2_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END

+ 1 - 0
arch/arm/mach-davinci/board-omapl138-hawk.c

@@ -343,4 +343,5 @@ MACHINE_START(OMAPL138_HAWKBOARD, "AM18x/OMAP-L138 Hawkboard")
 	.init_irq	= cp_intc_init,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,
 	.timer		= &davinci_timer,
 	.init_machine	= omapl138_hawk_init,
 	.init_machine	= omapl138_hawk_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END

+ 1 - 0
arch/arm/mach-davinci/board-sffsdr.c

@@ -156,4 +156,5 @@ MACHINE_START(SFFSDR, "Lyrtech SFFSDR")
 	.init_irq     = davinci_irq_init,
 	.init_irq     = davinci_irq_init,
 	.timer	      = &davinci_timer,
 	.timer	      = &davinci_timer,
 	.init_machine = davinci_sffsdr_init,
 	.init_machine = davinci_sffsdr_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END

+ 1 - 0
arch/arm/mach-davinci/board-tnetv107x-evm.c

@@ -282,4 +282,5 @@ MACHINE_START(TNETV107X, "TNETV107X EVM")
 	.init_irq	= cp_intc_init,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,
 	.timer		= &davinci_timer,
 	.init_machine	= tnetv107x_evm_board_init,
 	.init_machine	= tnetv107x_evm_board_init,
+	.dma_zone_size	= SZ_128M,
 MACHINE_END
 MACHINE_END

+ 0 - 3
arch/arm/mach-davinci/include/mach/entry-macro.S

@@ -46,6 +46,3 @@
 #endif
 #endif
 1002:
 1002:
 		.endm
 		.endm
-
-		.macro	irq_prio_table
-		.endm

+ 0 - 7
arch/arm/mach-davinci/include/mach/memory.h

@@ -41,11 +41,4 @@
  */
  */
 #define CONSISTENT_DMA_SIZE (14<<20)
 #define CONSISTENT_DMA_SIZE (14<<20)
 
 
-/*
- * Restrict DMA-able region to workaround silicon bug.  The bug
- * restricts buffers available for DMA to video hardware to be
- * below 128M
- */
-#define ARM_DMA_ZONE_SIZE	SZ_128M
-
 #endif /* __ASM_ARCH_MEMORY_H */
 #endif /* __ASM_ARCH_MEMORY_H */

+ 0 - 8
arch/arm/mach-exynos4/platsmp.c

@@ -154,14 +154,6 @@ void __init smp_init_cpus(void)
 
 
 void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 {
 {
-	int i;
-
-	/*
-	 * Initialise the present map, which describes the set of CPUs
-	 * actually populated at the present time.
-	 */
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
 
 
 	scu_enable(scu_base_addr());
 	scu_enable(scu_base_addr());
 
 

+ 1 - 1
arch/arm/mach-exynos4/pm.c

@@ -280,7 +280,7 @@ static struct sleep_save exynos4_l2cc_save[] = {
 	SAVE_ITEM(S5P_VA_L2CC + L2X0_AUX_CTRL),
 	SAVE_ITEM(S5P_VA_L2CC + L2X0_AUX_CTRL),
 };
 };
 
 
-void exynos4_cpu_suspend(void)
+static int exynos4_cpu_suspend(unsigned long arg)
 {
 {
 	unsigned long tmp;
 	unsigned long tmp;
 	unsigned long mask = 0xFFFFFFFF;
 	unsigned long mask = 0xFFFFFFFF;

+ 0 - 22
arch/arm/mach-exynos4/sleep.S

@@ -32,28 +32,6 @@
 
 
 	.text
 	.text
 
 
-	/*
-	 * s3c_cpu_save
-	 *
-	 * entry:
-	 *	r1 = v:p offset
-	 */
-
-ENTRY(s3c_cpu_save)
-
-	stmfd	sp!, { r3 - r12, lr }
-	ldr	r3, =resume_with_mmu
-	bl	cpu_suspend
-
-	ldr	r0, =pm_cpu_sleep
-	ldr	r0, [ r0 ]
-	mov	pc, r0
-
-resume_with_mmu:
-	ldmfd	sp!, { r3 - r12, pc }
-
-	.ltorg
-
 	/*
 	/*
 	 * sleep magic, to allow the bootloader to check for an valid
 	 * sleep magic, to allow the bootloader to check for an valid
 	 * image to resume to. Must be the first word before the
 	 * image to resume to. Must be the first word before the

+ 1 - 0
arch/arm/mach-h720x/h7201-eval.c

@@ -33,4 +33,5 @@ MACHINE_START(H7201, "Hynix GMS30C7201")
 	.map_io		= h720x_map_io,
 	.map_io		= h720x_map_io,
 	.init_irq	= h720x_init_irq,
 	.init_irq	= h720x_init_irq,
 	.timer		= &h7201_timer,
 	.timer		= &h7201_timer,
+	.dma_zone_size	= SZ_256M,
 MACHINE_END
 MACHINE_END

+ 1 - 0
arch/arm/mach-h720x/h7202-eval.c

@@ -76,4 +76,5 @@ MACHINE_START(H7202, "Hynix HMS30C7202")
 	.init_irq	= h7202_init_irq,
 	.init_irq	= h7202_init_irq,
 	.timer		= &h7202_timer,
 	.timer		= &h7202_timer,
 	.init_machine	= init_eval_h7202,
 	.init_machine	= init_eval_h7202,
+	.dma_zone_size	= SZ_256M,
 MACHINE_END
 MACHINE_END

+ 0 - 3
arch/arm/mach-h720x/include/mach/entry-macro.S

@@ -57,9 +57,6 @@
 		tst     \irqstat, #1		       @ bit 0 should be set
 		tst     \irqstat, #1		       @ bit 0 should be set
 		.endm
 		.endm
 
 
-		.macro  irq_prio_table
-		.endm
-
 #else
 #else
 #error hynix processor selection missmatch
 #error hynix processor selection missmatch
 #endif
 #endif

+ 0 - 7
arch/arm/mach-h720x/include/mach/memory.h

@@ -8,11 +8,4 @@
 #define __ASM_ARCH_MEMORY_H
 #define __ASM_ARCH_MEMORY_H
 
 
 #define PLAT_PHYS_OFFSET	UL(0x40000000)
 #define PLAT_PHYS_OFFSET	UL(0x40000000)
-/*
- * This is the maximum DMA address that can be DMAd to.
- * There should not be more than (0xd0000000 - 0xc0000000)
- * bytes of RAM.
- */
-#define ARM_DMA_ZONE_SIZE	SZ_256M
-
 #endif
 #endif

+ 6 - 0
arch/arm/mach-ixp4xx/avila-setup.c

@@ -169,6 +169,9 @@ MACHINE_START(AVILA, "Gateworks Avila Network Platform")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= avila_init,
 	.init_machine	= avila_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END
 
 
  /*
  /*
@@ -184,6 +187,9 @@ MACHINE_START(LOFT, "Giant Shoulder Inc Loft board")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= avila_init,
 	.init_machine	= avila_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END
 #endif
 #endif
 
 

+ 6 - 6
arch/arm/mach-ixp4xx/common-pci.c

@@ -316,6 +316,11 @@ static int abort_handler(unsigned long addr, unsigned int fsr, struct pt_regs *r
 }
 }
 
 
 
 
+static int ixp4xx_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size)
+{
+	return (dma_addr + size) >= SZ_64M;
+}
+
 /*
 /*
  * Setup DMA mask to 64MB on PCI devices. Ignore all other devices.
  * Setup DMA mask to 64MB on PCI devices. Ignore all other devices.
  */
  */
@@ -324,7 +329,7 @@ static int ixp4xx_pci_platform_notify(struct device *dev)
 	if(dev->bus == &pci_bus_type) {
 	if(dev->bus == &pci_bus_type) {
 		*dev->dma_mask =  SZ_64M - 1;
 		*dev->dma_mask =  SZ_64M - 1;
 		dev->coherent_dma_mask = SZ_64M - 1;
 		dev->coherent_dma_mask = SZ_64M - 1;
-		dmabounce_register_dev(dev, 2048, 4096);
+		dmabounce_register_dev(dev, 2048, 4096, ixp4xx_needs_bounce);
 	}
 	}
 	return 0;
 	return 0;
 }
 }
@@ -337,11 +342,6 @@ static int ixp4xx_pci_platform_notify_remove(struct device *dev)
 	return 0;
 	return 0;
 }
 }
 
 
-int dma_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size)
-{
-	return (dev->bus == &pci_bus_type ) && ((dma_addr + size) >= SZ_64M);
-}
-
 void __init ixp4xx_pci_preinit(void)
 void __init ixp4xx_pci_preinit(void)
 {
 {
 	unsigned long cpuid = read_cpuid_id();
 	unsigned long cpuid = read_cpuid_id();

+ 3 - 0
arch/arm/mach-ixp4xx/coyote-setup.c

@@ -114,6 +114,9 @@ MACHINE_START(ADI_COYOTE, "ADI Engineering Coyote")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= coyote_init,
 	.init_machine	= coyote_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END
 #endif
 #endif
 
 

+ 3 - 0
arch/arm/mach-ixp4xx/dsmg600-setup.c

@@ -284,4 +284,7 @@ MACHINE_START(DSMG600, "D-Link DSM-G600 RevA")
 	.init_irq	= ixp4xx_init_irq,
 	.init_irq	= ixp4xx_init_irq,
 	.timer          = &dsmg600_timer,
 	.timer          = &dsmg600_timer,
 	.init_machine	= dsmg600_init,
 	.init_machine	= dsmg600_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END

+ 3 - 0
arch/arm/mach-ixp4xx/fsg-setup.c

@@ -275,5 +275,8 @@ MACHINE_START(FSG, "Freecom FSG-3")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= fsg_init,
 	.init_machine	= fsg_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END
 
 

+ 3 - 0
arch/arm/mach-ixp4xx/gateway7001-setup.c

@@ -101,5 +101,8 @@ MACHINE_START(GATEWAY7001, "Gateway 7001 AP")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= gateway7001_init,
 	.init_machine	= gateway7001_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END
 #endif
 #endif

+ 3 - 0
arch/arm/mach-ixp4xx/goramo_mlr.c

@@ -501,4 +501,7 @@ MACHINE_START(GORAMO_MLR, "MultiLink")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= gmlr_init,
 	.init_machine	= gmlr_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END

+ 3 - 0
arch/arm/mach-ixp4xx/gtwx5715-setup.c

@@ -169,6 +169,9 @@ MACHINE_START(GTWX5715, "Gemtek GTWX5715 (Linksys WRV54G)")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= gtwx5715_init,
 	.init_machine	= gtwx5715_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END
 
 
 
 

+ 0 - 4
arch/arm/mach-ixp4xx/include/mach/memory.h

@@ -14,8 +14,4 @@
  */
  */
 #define PLAT_PHYS_OFFSET	UL(0x00000000)
 #define PLAT_PHYS_OFFSET	UL(0x00000000)
 
 
-#ifdef CONFIG_PCI
-#define ARM_DMA_ZONE_SIZE	SZ_64M
-#endif
-
 #endif
 #endif

+ 12 - 0
arch/arm/mach-ixp4xx/ixdp425-setup.c

@@ -258,6 +258,9 @@ MACHINE_START(IXDP425, "Intel IXDP425 Development Platform")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= ixdp425_init,
 	.init_machine	= ixdp425_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END
 #endif
 #endif
 
 
@@ -269,6 +272,9 @@ MACHINE_START(IXDP465, "Intel IXDP465 Development Platform")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= ixdp425_init,
 	.init_machine	= ixdp425_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END
 #endif
 #endif
 
 
@@ -280,6 +286,9 @@ MACHINE_START(IXCDP1100, "Intel IXCDP1100 Development Platform")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= ixdp425_init,
 	.init_machine	= ixdp425_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END
 #endif
 #endif
 
 
@@ -291,5 +300,8 @@ MACHINE_START(KIXRP435, "Intel KIXRP435 Reference Platform")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= ixdp425_init,
 	.init_machine	= ixdp425_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END
 #endif
 #endif

+ 3 - 0
arch/arm/mach-ixp4xx/nas100d-setup.c

@@ -319,4 +319,7 @@ MACHINE_START(NAS100D, "Iomega NAS 100d")
 	.init_irq	= ixp4xx_init_irq,
 	.init_irq	= ixp4xx_init_irq,
 	.timer          = &ixp4xx_timer,
 	.timer          = &ixp4xx_timer,
 	.init_machine	= nas100d_init,
 	.init_machine	= nas100d_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END

+ 3 - 0
arch/arm/mach-ixp4xx/nslu2-setup.c

@@ -305,4 +305,7 @@ MACHINE_START(NSLU2, "Linksys NSLU2")
 	.init_irq	= ixp4xx_init_irq,
 	.init_irq	= ixp4xx_init_irq,
 	.timer          = &nslu2_timer,
 	.timer          = &nslu2_timer,
 	.init_machine	= nslu2_init,
 	.init_machine	= nslu2_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END

+ 3 - 0
arch/arm/mach-ixp4xx/vulcan-setup.c

@@ -241,4 +241,7 @@ MACHINE_START(ARCOM_VULCAN, "Arcom/Eurotech Vulcan")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= vulcan_init,
 	.init_machine	= vulcan_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END

+ 3 - 0
arch/arm/mach-ixp4xx/wg302v2-setup.c

@@ -102,5 +102,8 @@ MACHINE_START(WG302V2, "Netgear WG302 v2 / WAG302 v2")
 	.timer		= &ixp4xx_timer,
 	.timer		= &ixp4xx_timer,
 	.boot_params	= 0x0100,
 	.boot_params	= 0x0100,
 	.init_machine	= wg302v2_init,
 	.init_machine	= wg302v2_init,
+#if defined(CONFIG_PCI)
+	.dma_zone_size	= SZ_64M,
+#endif
 MACHINE_END
 MACHINE_END
 #endif
 #endif

+ 0 - 4
arch/arm/mach-lpc32xx/include/mach/entry-macro.S

@@ -41,7 +41,3 @@
 	rsb	\irqnr, \irqnr, #31
 	rsb	\irqnr, \irqnr, #31
 	teq	\irqstat, #0
 	teq	\irqstat, #0
 	.endm
 	.endm
-
-	.macro	irq_prio_table
-	.endm
-

+ 0 - 8
arch/arm/mach-msm/platsmp.c

@@ -157,12 +157,4 @@ void __init smp_init_cpus(void)
 
 
 void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 void __init platform_smp_prepare_cpus(unsigned int max_cpus)
 {
 {
-	int i;
-
-	/*
-	 * Initialise the present map, which describes the set of CPUs
-	 * actually populated at the present time.
-	 */
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
 }
 }

+ 4 - 3
arch/arm/mach-omap2/control.c

@@ -286,14 +286,15 @@ void omap3_save_scratchpad_contents(void)
 	scratchpad_contents.boot_config_ptr = 0x0;
 	scratchpad_contents.boot_config_ptr = 0x0;
 	if (cpu_is_omap3630())
 	if (cpu_is_omap3630())
 		scratchpad_contents.public_restore_ptr =
 		scratchpad_contents.public_restore_ptr =
-			virt_to_phys(get_omap3630_restore_pointer());
+			virt_to_phys(omap3_restore_3630);
 	else if (omap_rev() != OMAP3430_REV_ES3_0 &&
 	else if (omap_rev() != OMAP3430_REV_ES3_0 &&
 					omap_rev() != OMAP3430_REV_ES3_1)
 					omap_rev() != OMAP3430_REV_ES3_1)
 		scratchpad_contents.public_restore_ptr =
 		scratchpad_contents.public_restore_ptr =
-			virt_to_phys(get_restore_pointer());
+			virt_to_phys(omap3_restore);
 	else
 	else
 		scratchpad_contents.public_restore_ptr =
 		scratchpad_contents.public_restore_ptr =
-			virt_to_phys(get_es3_restore_pointer());
+			virt_to_phys(omap3_restore_es3);
+
 	if (omap_type() == OMAP2_DEVICE_TYPE_GP)
 	if (omap_type() == OMAP2_DEVICE_TYPE_GP)
 		scratchpad_contents.secure_ram_restore_ptr = 0x0;
 		scratchpad_contents.secure_ram_restore_ptr = 0x0;
 	else
 	else

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно