Selaa lähdekoodia

Merge branch 'clksrc' into devel

Conflicts:
	arch/arm/mach-vexpress/v2m.c
	arch/arm/plat-omap/counter_32k.c
	arch/arm/plat-versatile/Makefile
Russell King 14 vuotta sitten
vanhempi
commit
58daf18cdc
55 muutettua tiedostoa jossa 3166 lisäystä ja 2812 poistoa
  1. 22 4
      arch/arm/Kconfig
  2. 1 1
      arch/arm/Kconfig.debug
  3. 1 3
      arch/arm/common/timer-sp.c
  4. 118 0
      arch/arm/include/asm/sched_clock.h
  5. 5 0
      arch/arm/include/asm/system.h
  6. 21 2
      arch/arm/include/asm/traps.h
  7. 3 1
      arch/arm/kernel/Makefile
  8. 137 65
      arch/arm/kernel/entry-common.S
  9. 98 5
      arch/arm/kernel/ftrace.c
  10. 3 1
      arch/arm/kernel/irq.c
  11. 27 2382
      arch/arm/kernel/perf_event.c
  12. 672 0
      arch/arm/kernel/perf_event_v6.c
  13. 906 0
      arch/arm/kernel/perf_event_v7.c
  14. 807 0
      arch/arm/kernel/perf_event_xscale.c
  15. 69 0
      arch/arm/kernel/sched_clock.c
  16. 3 2
      arch/arm/kernel/smp.c
  17. 1 0
      arch/arm/kernel/vmlinux.lds.S
  18. 1 3
      arch/arm/mach-at91/at91rm9200_time.c
  19. 1 3
      arch/arm/mach-at91/at91sam926x_time.c
  20. 4 10
      arch/arm/mach-bcmring/core.c
  21. 2 5
      arch/arm/mach-davinci/time.c
  22. 1 3
      arch/arm/mach-integrator/integrator_ap.c
  23. 20 15
      arch/arm/mach-ixp4xx/common.c
  24. 1 4
      arch/arm/mach-lpc32xx/timer.c
  25. 13 26
      arch/arm/mach-mmp/time.c
  26. 2 0
      arch/arm/mach-msm/Kconfig
  27. 1 4
      arch/arm/mach-msm/timer.c
  28. 1 4
      arch/arm/mach-netx/time.c
  29. 1 5
      arch/arm/mach-ns9xxx/time-ns9360.c
  30. 1 5
      arch/arm/mach-omap1/time.c
  31. 1 4
      arch/arm/mach-omap2/timer-gp.c
  32. 10 25
      arch/arm/mach-pxa/time.c
  33. 10 0
      arch/arm/mach-realview/core.c
  34. 1 5
      arch/arm/mach-s5pv310/time.c
  35. 0 23
      arch/arm/mach-sa1100/generic.c
  36. 31 5
      arch/arm/mach-sa1100/time.c
  37. 1 4
      arch/arm/mach-tcc8k/time.c
  38. 25 6
      arch/arm/mach-tegra/timer.c
  39. 15 7
      arch/arm/mach-u300/timer.c
  40. 10 0
      arch/arm/mach-versatile/core.c
  41. 4 1
      arch/arm/mach-vexpress/v2m.c
  42. 1 4
      arch/arm/mach-w90x900/time.c
  43. 17 10
      arch/arm/plat-iop/time.c
  44. 1 4
      arch/arm/plat-mxc/epit.c
  45. 1 4
      arch/arm/plat-mxc/time.c
  46. 1 0
      arch/arm/plat-nomadik/Kconfig
  47. 11 69
      arch/arm/plat-nomadik/timer.c
  48. 32 15
      arch/arm/plat-omap/counter_32k.c
  49. 11 39
      arch/arm/plat-orion/time.c
  50. 1 5
      arch/arm/plat-spear/time.c
  51. 1 4
      arch/arm/plat-stmp3xxx/timer.c
  52. 3 2
      arch/arm/plat-versatile/Makefile
  53. 6 0
      arch/arm/plat-versatile/include/plat/sched_clock.h
  54. 28 23
      arch/arm/plat-versatile/sched-clock.c
  55. 1 0
      kernel/time/clocksource.c

+ 22 - 4
arch/arm/Kconfig

@@ -14,6 +14,7 @@ config ARM
 	select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
 	select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
 	select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
+	select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZO
@@ -38,6 +39,9 @@ config HAVE_PWM
 config SYS_SUPPORTS_APM_EMULATION
 	bool
 
+config HAVE_SCHED_CLOCK
+	bool
+
 config GENERIC_GPIO
 	bool
 
@@ -233,6 +237,7 @@ config ARCH_REALVIEW
 	bool "ARM Ltd. RealView family"
 	select ARM_AMBA
 	select COMMON_CLKDEV
+	select HAVE_SCHED_CLOCK
 	select ICST
 	select GENERIC_CLOCKEVENTS
 	select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -247,6 +252,7 @@ config ARCH_VERSATILE
 	select ARM_AMBA
 	select ARM_VIC
 	select COMMON_CLKDEV
+	select HAVE_SCHED_CLOCK
 	select ICST
 	select GENERIC_CLOCKEVENTS
 	select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -263,6 +269,7 @@ config ARCH_VEXPRESS
 	select COMMON_CLKDEV
 	select GENERIC_CLOCKEVENTS
 	select HAVE_CLK
+	select HAVE_SCHED_CLOCK
 	select ICST
 	select PLAT_VERSATILE
 	help
@@ -434,6 +441,7 @@ config ARCH_IXP4XX
 	select CPU_XSCALE
 	select GENERIC_GPIO
 	select GENERIC_CLOCKEVENTS
+	select HAVE_SCHED_CLOCK
 	select DMABOUNCE if PCI
 	help
 	  Support for Intel's IXP4XX (XScale) family of processors.
@@ -509,6 +517,7 @@ config ARCH_MMP
 	select ARCH_REQUIRE_GPIOLIB
 	select COMMON_CLKDEV
 	select GENERIC_CLOCKEVENTS
+	select HAVE_SCHED_CLOCK
 	select TICK_ONESHOT
 	select PLAT_PXA
 	select SPARSE_IRQ
@@ -565,6 +574,7 @@ config ARCH_TEGRA
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_GPIO
 	select HAVE_CLK
+	select HAVE_SCHED_CLOCK
 	select COMMON_CLKDEV
 	select ARCH_HAS_BARRIERS if CACHE_L2X0
 	select ARCH_HAS_CPUFREQ
@@ -588,6 +598,7 @@ config ARCH_PXA
 	select COMMON_CLKDEV
 	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_CLOCKEVENTS
+	select HAVE_SCHED_CLOCK
 	select TICK_ONESHOT
 	select PLAT_PXA
 	select SPARSE_IRQ
@@ -636,6 +647,7 @@ config ARCH_SA1100
 	select CPU_FREQ
 	select GENERIC_CLOCKEVENTS
 	select HAVE_CLK
+	select HAVE_SCHED_CLOCK
 	select TICK_ONESHOT
 	select ARCH_REQUIRE_GPIOLIB
 	help
@@ -782,6 +794,7 @@ config ARCH_U300
 	bool "ST-Ericsson U300 Series"
 	depends on MMU
 	select CPU_ARM926T
+	select HAVE_SCHED_CLOCK
 	select HAVE_TCM
 	select ARM_AMBA
 	select ARM_VIC
@@ -830,6 +843,7 @@ config ARCH_OMAP
 	select ARCH_REQUIRE_GPIOLIB
 	select ARCH_HAS_CPUFREQ
 	select GENERIC_CLOCKEVENTS
+	select HAVE_SCHED_CLOCK
 	select ARCH_HAS_HOLES_MEMORYMODEL
 	help
 	  Support for TI's OMAP platform (OMAP1/2/3/4).
@@ -983,9 +997,11 @@ config ARCH_ACORN
 config PLAT_IOP
 	bool
 	select GENERIC_CLOCKEVENTS
+	select HAVE_SCHED_CLOCK
 
 config PLAT_ORION
 	bool
+	select HAVE_SCHED_CLOCK
 
 config PLAT_PXA
 	bool
@@ -1212,10 +1228,11 @@ config SMP
 	depends on EXPERIMENTAL
 	depends on GENERIC_CLOCKEVENTS
 	depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \
-		 MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\
-		 ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4
+		 MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \
+		 ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || \
+		 ARCH_MSM_SCORPIONMP
 	select USE_GENERIC_SMP_HELPERS
-	select HAVE_ARM_SCU
+	select HAVE_ARM_SCU if !ARCH_MSM_SCORPIONMP
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
@@ -1290,6 +1307,7 @@ config NR_CPUS
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
 	depends on SMP && HOTPLUG && EXPERIMENTAL
+	depends on !ARCH_MSM
 	help
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu.
@@ -1298,7 +1316,7 @@ config LOCAL_TIMERS
 	bool "Use local timer interrupts"
 	depends on SMP
 	default y
-	select HAVE_ARM_TWD
+	select HAVE_ARM_TWD if !ARCH_MSM_SCORPIONMP
 	help
 	  Enable support for local timers on SMP platforms, rather then the
 	  legacy IPI broadcast method.  Local timers allows the system

+ 1 - 1
arch/arm/Kconfig.debug

@@ -23,7 +23,7 @@ config STRICT_DEVMEM
 config FRAME_POINTER
 	bool
 	depends on !THUMB2_KERNEL
-	default y if !ARM_UNWIND
+	default y if !ARM_UNWIND || FUNCTION_GRAPH_TRACER
 	help
 	  If you say N here, the resulting kernel will be slightly smaller and
 	  faster. However, if neither FRAME_POINTER nor ARM_UNWIND are enabled,

+ 1 - 3
arch/arm/common/timer-sp.c

@@ -44,7 +44,6 @@ static struct clocksource clocksource_sp804 = {
 	.rating		= 200,
 	.read		= sp804_read,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -61,8 +60,7 @@ void __init sp804_clocksource_init(void __iomem *base)
 	writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
 		clksrc_base + TIMER_CTRL);
 
-	cs->mult = clocksource_khz2mult(TIMER_FREQ_KHZ, cs->shift);
-	clocksource_register(cs);
+	clocksource_register_khz(cs, TIMER_FREQ_KHZ);
 }
 
 

+ 118 - 0
arch/arm/include/asm/sched_clock.h

@@ -0,0 +1,118 @@
+/*
+ * sched_clock.h: support for extending counters to full 64-bit ns counter
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef ASM_SCHED_CLOCK
+#define ASM_SCHED_CLOCK
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+struct clock_data {
+	u64 epoch_ns;
+	u32 epoch_cyc;
+	u32 epoch_cyc_copy;
+	u32 mult;
+	u32 shift;
+};
+
+#define DEFINE_CLOCK_DATA(name)	struct clock_data name
+
+static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift)
+{
+	return (cyc * mult) >> shift;
+}
+
+/*
+ * Atomically update the sched_clock epoch.  Your update callback will
+ * be called from a timer before the counter wraps - read the current
+ * counter value, and call this function to safely move the epochs
+ * forward.  Only use this from the update callback.
+ */
+static inline void update_sched_clock(struct clock_data *cd, u32 cyc, u32 mask)
+{
+	unsigned long flags;
+	u64 ns = cd->epoch_ns +
+		cyc_to_ns((cyc - cd->epoch_cyc) & mask, cd->mult, cd->shift);
+
+	/*
+	 * Write epoch_cyc and epoch_ns in a way that the update is
+	 * detectable in cyc_to_fixed_sched_clock().
+	 */
+	raw_local_irq_save(flags);
+	cd->epoch_cyc = cyc;
+	smp_wmb();
+	cd->epoch_ns = ns;
+	smp_wmb();
+	cd->epoch_cyc_copy = cyc;
+	raw_local_irq_restore(flags);
+}
+
+/*
+ * If your clock rate is known at compile time, using this will allow
+ * you to optimize the mult/shift loads away.  This is paired with
+ * init_fixed_sched_clock() to ensure that your mult/shift are correct.
+ */
+static inline unsigned long long cyc_to_fixed_sched_clock(struct clock_data *cd,
+	u32 cyc, u32 mask, u32 mult, u32 shift)
+{
+	u64 epoch_ns;
+	u32 epoch_cyc;
+
+	/*
+	 * Load the epoch_cyc and epoch_ns atomically.  We do this by
+	 * ensuring that we always write epoch_cyc, epoch_ns and
+	 * epoch_cyc_copy in strict order, and read them in strict order.
+	 * If epoch_cyc and epoch_cyc_copy are not equal, then we're in
+	 * the middle of an update, and we should repeat the load.
+	 */
+	do {
+		epoch_cyc = cd->epoch_cyc;
+		smp_rmb();
+		epoch_ns = cd->epoch_ns;
+		smp_rmb();
+	} while (epoch_cyc != cd->epoch_cyc_copy);
+
+	return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, mult, shift);
+}
+
+/*
+ * Otherwise, you need to use this, which will obtain the mult/shift
+ * from the clock_data structure.  Use init_sched_clock() with this.
+ */
+static inline unsigned long long cyc_to_sched_clock(struct clock_data *cd,
+	u32 cyc, u32 mask)
+{
+	return cyc_to_fixed_sched_clock(cd, cyc, mask, cd->mult, cd->shift);
+}
+
+/*
+ * Initialize the clock data - calculate the appropriate multiplier
+ * and shift.  Also setup a timer to ensure that the epoch is refreshed
+ * at the appropriate time interval, which will call your update
+ * handler.
+ */
+void init_sched_clock(struct clock_data *, void (*)(void),
+	unsigned int, unsigned long);
+
+/*
+ * Use this initialization function rather than init_sched_clock() if
+ * you're using cyc_to_fixed_sched_clock, which will warn if your
+ * constants are incorrect.
+ */
+static inline void init_fixed_sched_clock(struct clock_data *cd,
+	void (*update)(void), unsigned int bits, unsigned long rate,
+	u32 mult, u32 shift)
+{
+	init_sched_clock(cd, update, bits, rate);
+	if (cd->mult != mult || cd->shift != shift) {
+		pr_crit("sched_clock: wrong multiply/shift: %u>>%u vs calculated %u>>%u\n"
+			"sched_clock: fix multiply/shift to avoid scheduler hiccups\n",
+			mult, shift, cd->mult, cd->shift);
+	}
+}
+
+#endif

+ 5 - 0
arch/arm/include/asm/system.h

@@ -63,6 +63,11 @@
 #include <asm/outercache.h>
 
 #define __exception	__attribute__((section(".exception.text")))
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#define __exception_irq_entry	__irq_entry
+#else
+#define __exception_irq_entry	__exception
+#endif
 
 struct thread_info;
 struct task_struct;

+ 21 - 2
arch/arm/include/asm/traps.h

@@ -15,13 +15,32 @@ struct undef_hook {
 void register_undef_hook(struct undef_hook *hook);
 void unregister_undef_hook(struct undef_hook *hook);
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static inline int __in_irqentry_text(unsigned long ptr)
+{
+	extern char __irqentry_text_start[];
+	extern char __irqentry_text_end[];
+
+	return ptr >= (unsigned long)&__irqentry_text_start &&
+	       ptr < (unsigned long)&__irqentry_text_end;
+}
+#else
+static inline int __in_irqentry_text(unsigned long ptr)
+{
+	return 0;
+}
+#endif
+
 static inline int in_exception_text(unsigned long ptr)
 {
 	extern char __exception_text_start[];
 	extern char __exception_text_end[];
+	int in;
+
+	in = ptr >= (unsigned long)&__exception_text_start &&
+	     ptr < (unsigned long)&__exception_text_end;
 
-	return ptr >= (unsigned long)&__exception_text_start &&
-	       ptr < (unsigned long)&__exception_text_end;
+	return in ? : __in_irqentry_text(ptr);
 }
 
 extern void __init early_trap_init(void);

+ 3 - 1
arch/arm/kernel/Makefile

@@ -5,7 +5,7 @@
 CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
 AFLAGS_head.o        := -DTEXT_OFFSET=$(TEXT_OFFSET)
 
-ifdef CONFIG_DYNAMIC_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_ftrace.o = -pg
 endif
 
@@ -29,10 +29,12 @@ obj-$(CONFIG_MODULES)		+= armksyms.o module.o
 obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
+obj-$(CONFIG_HAVE_SCHED_CLOCK)	+= sched_clock.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
 obj-$(CONFIG_HAVE_ARM_TWD)	+= smp_twd.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o kprobes-decode.o
 obj-$(CONFIG_ATAGS_PROC)	+= atags.o

+ 137 - 65
arch/arm/kernel/entry-common.S

@@ -147,98 +147,170 @@ ENDPROC(ret_from_fork)
 #endif
 #endif
 
-#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(__gnu_mcount_nc)
-	mov	ip, lr
-	ldmia	sp!, {lr}
-	mov	pc, ip
-ENDPROC(__gnu_mcount_nc)
+.macro __mcount suffix
+	mcount_enter
+	ldr	r0, =ftrace_trace_function
+	ldr	r2, [r0]
+	adr	r0, .Lftrace_stub
+	cmp	r0, r2
+	bne	1f
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	ldr     r1, =ftrace_graph_return
+	ldr     r2, [r1]
+	cmp     r0, r2
+	bne     ftrace_graph_caller\suffix
+
+	ldr     r1, =ftrace_graph_entry
+	ldr     r2, [r1]
+	ldr     r0, =ftrace_graph_entry_stub
+	cmp     r0, r2
+	bne     ftrace_graph_caller\suffix
+#endif
 
-ENTRY(ftrace_caller)
-	stmdb	sp!, {r0-r3, lr}
-	mov	r0, lr
+	mcount_exit
+
+1: 	mcount_get_lr	r1			@ lr of instrumented func
+	mov	r0, lr				@ instrumented function
+	sub	r0, r0, #MCOUNT_INSN_SIZE
+	adr	lr, BSYM(2f)
+	mov	pc, r2
+2:	mcount_exit
+.endm
+
+.macro __ftrace_caller suffix
+	mcount_enter
+
+	mcount_get_lr	r1			@ lr of instrumented func
+	mov	r0, lr				@ instrumented function
 	sub	r0, r0, #MCOUNT_INSN_SIZE
-	ldr	r1, [sp, #20]
 
-	.global	ftrace_call
-ftrace_call:
+	.globl ftrace_call\suffix
+ftrace_call\suffix:
 	bl	ftrace_stub
-	ldmia	sp!, {r0-r3, ip, lr}
-	mov	pc, ip
-ENDPROC(ftrace_caller)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.globl ftrace_graph_call\suffix
+ftrace_graph_call\suffix:
+	mov	r0, r0
+#endif
+
+	mcount_exit
+.endm
+
+.macro __ftrace_graph_caller
+	sub	r0, fp, #4		@ &lr of instrumented routine (&parent)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	@ called from __ftrace_caller, saved in mcount_enter
+	ldr	r1, [sp, #16]		@ instrumented routine (func)
+#else
+	@ called from __mcount, untouched in lr
+	mov	r1, lr			@ instrumented routine (func)
+#endif
+	sub	r1, r1, #MCOUNT_INSN_SIZE
+	mov	r2, fp			@ frame pointer
+	bl	prepare_ftrace_return
+	mcount_exit
+.endm
 
 #ifdef CONFIG_OLD_MCOUNT
+/*
+ * mcount
+ */
+
+.macro mcount_enter
+	stmdb	sp!, {r0-r3, lr}
+.endm
+
+.macro mcount_get_lr reg
+	ldr	\reg, [fp, #-4]
+.endm
+
+.macro mcount_exit
+	ldr	lr, [fp, #-4]
+	ldmia	sp!, {r0-r3, pc}
+.endm
+
 ENTRY(mcount)
+#ifdef CONFIG_DYNAMIC_FTRACE
 	stmdb	sp!, {lr}
 	ldr	lr, [fp, #-4]
 	ldmia	sp!, {pc}
+#else
+	__mcount _old
+#endif
 ENDPROC(mcount)
 
+#ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(ftrace_caller_old)
-	stmdb	sp!, {r0-r3, lr}
-	ldr	r1, [fp, #-4]
-	mov	r0, lr
-	sub	r0, r0, #MCOUNT_INSN_SIZE
-
-	.globl ftrace_call_old
-ftrace_call_old:
-	bl	ftrace_stub
-	ldr	lr, [fp, #-4]			@ restore lr
-	ldmia	sp!, {r0-r3, pc}
+	__ftrace_caller _old
 ENDPROC(ftrace_caller_old)
 #endif
 
-#else
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller_old)
+	__ftrace_graph_caller
+ENDPROC(ftrace_graph_caller_old)
+#endif
 
-ENTRY(__gnu_mcount_nc)
+.purgem mcount_enter
+.purgem mcount_get_lr
+.purgem mcount_exit
+#endif
+
+/*
+ * __gnu_mcount_nc
+ */
+
+.macro mcount_enter
 	stmdb	sp!, {r0-r3, lr}
-	ldr	r0, =ftrace_trace_function
-	ldr	r2, [r0]
-	adr	r0, .Lftrace_stub
-	cmp	r0, r2
-	bne	gnu_trace
+.endm
+
+.macro mcount_get_lr reg
+	ldr	\reg, [sp, #20]
+.endm
+
+.macro mcount_exit
 	ldmia	sp!, {r0-r3, ip, lr}
 	mov	pc, ip
+.endm
 
-gnu_trace:
-	ldr	r1, [sp, #20]			@ lr of instrumented routine
-	mov	r0, lr
-	sub	r0, r0, #MCOUNT_INSN_SIZE
-	adr	lr, BSYM(1f)
-	mov	pc, r2
-1:
-	ldmia	sp!, {r0-r3, ip, lr}
+ENTRY(__gnu_mcount_nc)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	mov	ip, lr
+	ldmia	sp!, {lr}
 	mov	pc, ip
+#else
+	__mcount
+#endif
 ENDPROC(__gnu_mcount_nc)
 
-#ifdef CONFIG_OLD_MCOUNT
-/*
- * This is under an ifdef in order to force link-time errors for people trying
- * to build with !FRAME_POINTER with a GCC which doesn't use the new-style
- * mcount.
- */
-ENTRY(mcount)
-	stmdb	sp!, {r0-r3, lr}
-	ldr	r0, =ftrace_trace_function
-	ldr	r2, [r0]
-	adr	r0, ftrace_stub
-	cmp	r0, r2
-	bne	trace
-	ldr	lr, [fp, #-4]			@ restore lr
-	ldmia	sp!, {r0-r3, pc}
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(ftrace_caller)
+	__ftrace_caller
+ENDPROC(ftrace_caller)
+#endif
 
-trace:
-	ldr	r1, [fp, #-4]			@ lr of instrumented routine
-	mov	r0, lr
-	sub	r0, r0, #MCOUNT_INSN_SIZE
-	mov	lr, pc
-	mov	pc, r2
-	ldr	lr, [fp, #-4]			@ restore lr
-	ldmia	sp!, {r0-r3, pc}
-ENDPROC(mcount)
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+	__ftrace_graph_caller
+ENDPROC(ftrace_graph_caller)
 #endif
 
-#endif /* CONFIG_DYNAMIC_FTRACE */
+.purgem mcount_enter
+.purgem mcount_get_lr
+.purgem mcount_exit
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.globl return_to_handler
+return_to_handler:
+	stmdb	sp!, {r0-r3}
+	mov	r0, fp			@ frame pointer
+	bl	ftrace_return_to_handler
+	mov	lr, r0			@ r0 has real ret addr
+	ldmia	sp!, {r0-r3}
+	mov	pc, lr
+#endif
 
 ENTRY(ftrace_stub)
 .Lftrace_stub:

+ 98 - 5
arch/arm/kernel/ftrace.c

@@ -24,6 +24,7 @@
 #define	NOP		0xe8bd4000	/* pop {lr} */
 #endif
 
+#ifdef CONFIG_DYNAMIC_FTRACE
 #ifdef CONFIG_OLD_MCOUNT
 #define OLD_MCOUNT_ADDR	((unsigned long) mcount)
 #define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old)
@@ -59,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
 }
 #endif
 
-/* construct a branch (BL) instruction to addr */
 #ifdef CONFIG_THUMB2_KERNEL
-static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
+static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
+				       bool link)
 {
 	unsigned long s, j1, j2, i1, i2, imm10, imm11;
 	unsigned long first, second;
@@ -83,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
 	j2 = (!i2) ^ s;
 
 	first = 0xf000 | (s << 10) | imm10;
-	second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11;
+	second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11;
+	if (link)
+		second |= 1 << 14;
 
 	return (second << 16) | first;
 }
 #else
-static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
+static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
+				       bool link)
 {
+	unsigned long opcode = 0xea000000;
 	long offset;
 
+	if (link)
+		opcode |= 1 << 24;
+
 	offset = (long)addr - (long)(pc + 8);
 	if (unlikely(offset < -33554432 || offset > 33554428)) {
 		/* Can't generate branches that far (from ARM ARM). Ftrace
@@ -103,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
 
 	offset = (offset >> 2) & 0x00ffffff;
 
-	return 0xeb000000 | offset;
+	return opcode | offset;
 }
 #endif
 
+static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
+{
+	return ftrace_gen_branch(pc, addr, true);
+}
+
 static int ftrace_modify_code(unsigned long pc, unsigned long old,
 			      unsigned long new)
 {
@@ -193,3 +206,83 @@ int __init ftrace_dyn_arch_init(void *data)
 
 	return 0;
 }
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+			   unsigned long frame_pointer)
+{
+	unsigned long return_hooker = (unsigned long) &return_to_handler;
+	struct ftrace_graph_ent trace;
+	unsigned long old;
+	int err;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	old = *parent;
+	*parent = return_hooker;
+
+	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+				       frame_pointer);
+	if (err == -EBUSY) {
+		*parent = old;
+		return;
+	}
+
+	trace.func = self_addr;
+
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace)) {
+		current->curr_ret_stack--;
+		*parent = old;
+	}
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern unsigned long ftrace_graph_call;
+extern unsigned long ftrace_graph_call_old;
+extern void ftrace_graph_caller_old(void);
+
+static int __ftrace_modify_caller(unsigned long *callsite,
+				  void (*func) (void), bool enable)
+{
+	unsigned long caller_fn = (unsigned long) func;
+	unsigned long pc = (unsigned long) callsite;
+	unsigned long branch = ftrace_gen_branch(pc, caller_fn, false);
+	unsigned long nop = 0xe1a00000;	/* mov r0, r0 */
+	unsigned long old = enable ? nop : branch;
+	unsigned long new = enable ? branch : nop;
+
+	return ftrace_modify_code(pc, old, new);
+}
+
+static int ftrace_modify_graph_caller(bool enable)
+{
+	int ret;
+
+	ret = __ftrace_modify_caller(&ftrace_graph_call,
+				     ftrace_graph_caller,
+				     enable);
+
+#ifdef CONFIG_OLD_MCOUNT
+	if (!ret)
+		ret = __ftrace_modify_caller(&ftrace_graph_call_old,
+					     ftrace_graph_caller_old,
+					     enable);
+#endif
+
+	return ret;
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

+ 3 - 1
arch/arm/kernel/irq.c

@@ -35,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/kallsyms.h>
 #include <linux/proc_fs.h>
+#include <linux/ftrace.h>
 
 #include <asm/system.h>
 #include <asm/mach/irq.h>
@@ -105,7 +106,8 @@ unlock:
  * come via this function.  Instead, they should provide their
  * own 'handler'
  */
-asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
+asmlinkage void __exception_irq_entry
+asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 

Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 27 - 2382
arch/arm/kernel/perf_event.c


+ 672 - 0
arch/arm/kernel/perf_event_v6.c

@@ -0,0 +1,672 @@
+/*
+ * ARMv6 Performance counter handling code.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ *
+ * ARMv6 has 2 configurable performance counters and a single cycle counter.
+ * They all share a single reset bit but can be written to zero so we can use
+ * that for a reset.
+ *
+ * The counters can't be individually enabled or disabled so when we remove
+ * one event and replace it with another we could get spurious counts from the
+ * wrong event. However, we can take advantage of the fact that the
+ * performance counters can export events to the event bus, and the event bus
+ * itself can be monitored. This requires that we *don't* export the events to
+ * the event bus. The procedure for disabling a configurable counter is:
+ *	- change the counter to count the ETMEXTOUT[0] signal (0x20). This
+ *	  effectively stops the counter from counting.
+ *	- disable the counter's interrupt generation (each counter has it's
+ *	  own interrupt enable bit).
+ * Once stopped, the counter value can be written as 0 to reset.
+ *
+ * To enable a counter:
+ *	- enable the counter's interrupt generation.
+ *	- set the new event type.
+ *
+ * Note: the dedicated cycle counter only counts cycles and can't be
+ * enabled/disabled independently of the others. When we want to disable the
+ * cycle counter, we have to just disable the interrupt reporting and start
+ * ignoring that counter. When re-enabling, we have to reset the value and
+ * enable the interrupt.
+ */
+
+#ifdef CONFIG_CPU_V6
+enum armv6_perf_types {
+	ARMV6_PERFCTR_ICACHE_MISS	    = 0x0,
+	ARMV6_PERFCTR_IBUF_STALL	    = 0x1,
+	ARMV6_PERFCTR_DDEP_STALL	    = 0x2,
+	ARMV6_PERFCTR_ITLB_MISS		    = 0x3,
+	ARMV6_PERFCTR_DTLB_MISS		    = 0x4,
+	ARMV6_PERFCTR_BR_EXEC		    = 0x5,
+	ARMV6_PERFCTR_BR_MISPREDICT	    = 0x6,
+	ARMV6_PERFCTR_INSTR_EXEC	    = 0x7,
+	ARMV6_PERFCTR_DCACHE_HIT	    = 0x9,
+	ARMV6_PERFCTR_DCACHE_ACCESS	    = 0xA,
+	ARMV6_PERFCTR_DCACHE_MISS	    = 0xB,
+	ARMV6_PERFCTR_DCACHE_WBACK	    = 0xC,
+	ARMV6_PERFCTR_SW_PC_CHANGE	    = 0xD,
+	ARMV6_PERFCTR_MAIN_TLB_MISS	    = 0xF,
+	ARMV6_PERFCTR_EXPL_D_ACCESS	    = 0x10,
+	ARMV6_PERFCTR_LSU_FULL_STALL	    = 0x11,
+	ARMV6_PERFCTR_WBUF_DRAINED	    = 0x12,
+	ARMV6_PERFCTR_CPU_CYCLES	    = 0xFF,
+	ARMV6_PERFCTR_NOP		    = 0x20,
+};
+
+enum armv6_counters {
+	ARMV6_CYCLE_COUNTER = 1,
+	ARMV6_COUNTER0,
+	ARMV6_COUNTER1,
+};
+
+/*
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV6_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV6_PERFCTR_INSTR_EXEC,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV6_PERFCTR_BR_MISPREDICT,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
+};
+
+static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					  [PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		/*
+		 * The performance counters don't differentiate between read
+		 * and write accesses/misses so this isn't strictly correct,
+		 * but it's the best we can do. Writes and reads get
+		 * combined.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		/*
+		 * The ARM performance counters can count micro DTLB misses,
+		 * micro ITLB misses and main TLB misses. There isn't an event
+		 * for TLB misses, so use the micro misses here and if users
+		 * want the main TLB misses they can use a raw counter.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+enum armv6mpcore_perf_types {
+	ARMV6MPCORE_PERFCTR_ICACHE_MISS	    = 0x0,
+	ARMV6MPCORE_PERFCTR_IBUF_STALL	    = 0x1,
+	ARMV6MPCORE_PERFCTR_DDEP_STALL	    = 0x2,
+	ARMV6MPCORE_PERFCTR_ITLB_MISS	    = 0x3,
+	ARMV6MPCORE_PERFCTR_DTLB_MISS	    = 0x4,
+	ARMV6MPCORE_PERFCTR_BR_EXEC	    = 0x5,
+	ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
+	ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
+	ARMV6MPCORE_PERFCTR_INSTR_EXEC	    = 0x8,
+	ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
+	ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
+	ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
+	ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
+	ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
+	ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
+	ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
+	ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
+	ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
+	ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
+	ARMV6MPCORE_PERFCTR_CPU_CYCLES	    = 0xFF,
+};
+
+/*
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
+};
+
+static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]  =
+				ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
+			[C(RESULT_MISS)]    =
+				ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]  =
+				ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
+			[C(RESULT_MISS)]    =
+				ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		/*
+		 * The ARM performance counters can count micro DTLB misses,
+		 * micro ITLB misses and main TLB misses. There isn't an event
+		 * for TLB misses, so use the micro misses here and if users
+		 * want the main TLB misses they can use a raw counter.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+static inline unsigned long
+armv6_pmcr_read(void)
+{
+	u32 val;
+	asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
+	return val;
+}
+
+static inline void
+armv6_pmcr_write(unsigned long val)
+{
+	asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
+}
+
+#define ARMV6_PMCR_ENABLE		(1 << 0)
+#define ARMV6_PMCR_CTR01_RESET		(1 << 1)
+#define ARMV6_PMCR_CCOUNT_RESET		(1 << 2)
+#define ARMV6_PMCR_CCOUNT_DIV		(1 << 3)
+#define ARMV6_PMCR_COUNT0_IEN		(1 << 4)
+#define ARMV6_PMCR_COUNT1_IEN		(1 << 5)
+#define ARMV6_PMCR_CCOUNT_IEN		(1 << 6)
+#define ARMV6_PMCR_COUNT0_OVERFLOW	(1 << 8)
+#define ARMV6_PMCR_COUNT1_OVERFLOW	(1 << 9)
+#define ARMV6_PMCR_CCOUNT_OVERFLOW	(1 << 10)
+#define ARMV6_PMCR_EVT_COUNT0_SHIFT	20
+#define ARMV6_PMCR_EVT_COUNT0_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
+#define ARMV6_PMCR_EVT_COUNT1_SHIFT	12
+#define ARMV6_PMCR_EVT_COUNT1_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
+
+#define ARMV6_PMCR_OVERFLOWED_MASK \
+	(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
+	 ARMV6_PMCR_CCOUNT_OVERFLOW)
+
+static inline int
+armv6_pmcr_has_overflowed(unsigned long pmcr)
+{
+	return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
+}
+
+static inline int
+armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
+				  enum armv6_counters counter)
+{
+	int ret = 0;
+
+	if (ARMV6_CYCLE_COUNTER == counter)
+		ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
+	else if (ARMV6_COUNTER0 == counter)
+		ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
+	else if (ARMV6_COUNTER1 == counter)
+		ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
+	else
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+
+	return ret;
+}
+
+static inline u32
+armv6pmu_read_counter(int counter)
+{
+	unsigned long value = 0;
+
+	if (ARMV6_CYCLE_COUNTER == counter)
+		asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
+	else if (ARMV6_COUNTER0 == counter)
+		asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
+	else if (ARMV6_COUNTER1 == counter)
+		asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
+	else
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+
+	return value;
+}
+
+static inline void
+armv6pmu_write_counter(int counter,
+		       u32 value)
+{
+	if (ARMV6_CYCLE_COUNTER == counter)
+		asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
+	else if (ARMV6_COUNTER0 == counter)
+		asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
+	else if (ARMV6_COUNTER1 == counter)
+		asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
+	else
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+}
+
+void
+armv6pmu_enable_event(struct hw_perf_event *hwc,
+		      int idx)
+{
+	unsigned long val, mask, evt, flags;
+
+	if (ARMV6_CYCLE_COUNTER == idx) {
+		mask	= 0;
+		evt	= ARMV6_PMCR_CCOUNT_IEN;
+	} else if (ARMV6_COUNTER0 == idx) {
+		mask	= ARMV6_PMCR_EVT_COUNT0_MASK;
+		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
+			  ARMV6_PMCR_COUNT0_IEN;
+	} else if (ARMV6_COUNTER1 == idx) {
+		mask	= ARMV6_PMCR_EVT_COUNT1_MASK;
+		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
+			  ARMV6_PMCR_COUNT1_IEN;
+	} else {
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	/*
+	 * Mask out the current event and set the counter to count the event
+	 * that we're interested in.
+	 */
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val &= ~mask;
+	val |= evt;
+	armv6_pmcr_write(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static irqreturn_t
+armv6pmu_handle_irq(int irq_num,
+		    void *dev)
+{
+	unsigned long pmcr = armv6_pmcr_read();
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	if (!armv6_pmcr_has_overflowed(pmcr))
+		return IRQ_NONE;
+
+	regs = get_irq_regs();
+
+	/*
+	 * The interrupts are cleared by writing the overflow flags back to
+	 * the control register. All of the other bits don't have any effect
+	 * if they are rewritten, so write the whole value back.
+	 */
+	armv6_pmcr_write(pmcr);
+
+	perf_sample_data_init(&data, 0);
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		/*
+		 * We have a single interrupt for all counters. Check that
+		 * each counter has overflowed before we process it.
+		 */
+		if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event, hwc, idx);
+		data.period = event->hw.last_period;
+		if (!armpmu_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 0, &data, regs))
+			armpmu->disable(hwc, idx);
+	}
+
+	/*
+	 * Handle the pending perf events.
+	 *
+	 * Note: this call *must* be run with interrupts disabled. For
+	 * platforms that can have the PMU interrupts raised as an NMI, this
+	 * will not work.
+	 */
+	irq_work_run();
+
+	return IRQ_HANDLED;
+}
+
+static void
+armv6pmu_start(void)
+{
+	unsigned long flags, val;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val |= ARMV6_PMCR_ENABLE;
+	armv6_pmcr_write(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+armv6pmu_stop(void)
+{
+	unsigned long flags, val;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val &= ~ARMV6_PMCR_ENABLE;
+	armv6_pmcr_write(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static int
+armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
+		       struct hw_perf_event *event)
+{
+	/* Always place a cycle counter into the cycle counter. */
+	if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
+		if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
+			return -EAGAIN;
+
+		return ARMV6_CYCLE_COUNTER;
+	} else {
+		/*
+		 * For anything other than a cycle counter, try and use
+		 * counter0 and counter1.
+		 */
+		if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
+			return ARMV6_COUNTER1;
+
+		if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
+			return ARMV6_COUNTER0;
+
+		/* The counters are all in use. */
+		return -EAGAIN;
+	}
+}
+
+static void
+armv6pmu_disable_event(struct hw_perf_event *hwc,
+		       int idx)
+{
+	unsigned long val, mask, evt, flags;
+
+	if (ARMV6_CYCLE_COUNTER == idx) {
+		mask	= ARMV6_PMCR_CCOUNT_IEN;
+		evt	= 0;
+	} else if (ARMV6_COUNTER0 == idx) {
+		mask	= ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
+		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
+	} else if (ARMV6_COUNTER1 == idx) {
+		mask	= ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
+		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
+	} else {
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	/*
+	 * Mask out the current event and set the counter to count the number
+	 * of ETM bus signal assertion cycles. The external reporting should
+	 * be disabled and so this should never increment.
+	 */
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val &= ~mask;
+	val |= evt;
+	armv6_pmcr_write(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
+			      int idx)
+{
+	unsigned long val, mask, flags, evt = 0;
+
+	if (ARMV6_CYCLE_COUNTER == idx) {
+		mask	= ARMV6_PMCR_CCOUNT_IEN;
+	} else if (ARMV6_COUNTER0 == idx) {
+		mask	= ARMV6_PMCR_COUNT0_IEN;
+	} else if (ARMV6_COUNTER1 == idx) {
+		mask	= ARMV6_PMCR_COUNT1_IEN;
+	} else {
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	/*
+	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
+	 * simply disable the interrupt reporting.
+	 */
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = armv6_pmcr_read();
+	val &= ~mask;
+	val |= evt;
+	armv6_pmcr_write(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static const struct arm_pmu armv6pmu = {
+	.id			= ARM_PERF_PMU_ID_V6,
+	.name			= "v6",
+	.handle_irq		= armv6pmu_handle_irq,
+	.enable			= armv6pmu_enable_event,
+	.disable		= armv6pmu_disable_event,
+	.read_counter		= armv6pmu_read_counter,
+	.write_counter		= armv6pmu_write_counter,
+	.get_event_idx		= armv6pmu_get_event_idx,
+	.start			= armv6pmu_start,
+	.stop			= armv6pmu_stop,
+	.cache_map		= &armv6_perf_cache_map,
+	.event_map		= &armv6_perf_map,
+	.raw_event_mask		= 0xFF,
+	.num_events		= 3,
+	.max_period		= (1LLU << 32) - 1,
+};
+
+const struct arm_pmu *__init armv6pmu_init(void)
+{
+	return &armv6pmu;
+}
+
+/*
+ * ARMv6mpcore is almost identical to single core ARMv6 with the exception
+ * that some of the events have different enumerations and that there is no
+ * *hack* to stop the programmable counters. To stop the counters we simply
+ * disable the interrupt reporting and update the event. When unthrottling we
+ * reset the period and enable the interrupt reporting.
+ */
+static const struct arm_pmu armv6mpcore_pmu = {
+	.id			= ARM_PERF_PMU_ID_V6MP,
+	.name			= "v6mpcore",
+	.handle_irq		= armv6pmu_handle_irq,
+	.enable			= armv6pmu_enable_event,
+	.disable		= armv6mpcore_pmu_disable_event,
+	.read_counter		= armv6pmu_read_counter,
+	.write_counter		= armv6pmu_write_counter,
+	.get_event_idx		= armv6pmu_get_event_idx,
+	.start			= armv6pmu_start,
+	.stop			= armv6pmu_stop,
+	.cache_map		= &armv6mpcore_perf_cache_map,
+	.event_map		= &armv6mpcore_perf_map,
+	.raw_event_mask		= 0xFF,
+	.num_events		= 3,
+	.max_period		= (1LLU << 32) - 1,
+};
+
+const struct arm_pmu *__init armv6mpcore_pmu_init(void)
+{
+	return &armv6mpcore_pmu;
+}
+#else
+const struct arm_pmu *__init armv6pmu_init(void)
+{
+	return NULL;
+}
+
+const struct arm_pmu *__init armv6mpcore_pmu_init(void)
+{
+	return NULL;
+}
+#endif	/* CONFIG_CPU_V6 */

+ 906 - 0
arch/arm/kernel/perf_event_v7.c

@@ -0,0 +1,906 @@
+/*
+ * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
+ *
+ * ARMv7 support: Jean Pihet <jpihet@mvista.com>
+ * 2010 (c) MontaVista Software, LLC.
+ *
+ * Copied from ARMv6 code, with the low level code inspired
+ *  by the ARMv7 Oprofile code.
+ *
+ * Cortex-A8 has up to 4 configurable performance counters and
+ *  a single cycle counter.
+ * Cortex-A9 has up to 31 configurable performance counters and
+ *  a single cycle counter.
+ *
+ * All counters can be enabled/disabled and IRQ masked separately. The cycle
+ *  counter and all 4 performance counters together can be reset separately.
+ */
+
+#ifdef CONFIG_CPU_V7
+/* Common ARMv7 event types */
+enum armv7_perf_types {
+	ARMV7_PERFCTR_PMNC_SW_INCR		= 0x00,
+	ARMV7_PERFCTR_IFETCH_MISS		= 0x01,
+	ARMV7_PERFCTR_ITLB_MISS			= 0x02,
+	ARMV7_PERFCTR_DCACHE_REFILL		= 0x03,
+	ARMV7_PERFCTR_DCACHE_ACCESS		= 0x04,
+	ARMV7_PERFCTR_DTLB_REFILL		= 0x05,
+	ARMV7_PERFCTR_DREAD			= 0x06,
+	ARMV7_PERFCTR_DWRITE			= 0x07,
+
+	ARMV7_PERFCTR_EXC_TAKEN			= 0x09,
+	ARMV7_PERFCTR_EXC_EXECUTED		= 0x0A,
+	ARMV7_PERFCTR_CID_WRITE			= 0x0B,
+	/* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
+	 * It counts:
+	 *  - all branch instructions,
+	 *  - instructions that explicitly write the PC,
+	 *  - exception generating instructions.
+	 */
+	ARMV7_PERFCTR_PC_WRITE			= 0x0C,
+	ARMV7_PERFCTR_PC_IMM_BRANCH		= 0x0D,
+	ARMV7_PERFCTR_UNALIGNED_ACCESS		= 0x0F,
+	ARMV7_PERFCTR_PC_BRANCH_MIS_PRED	= 0x10,
+	ARMV7_PERFCTR_CLOCK_CYCLES		= 0x11,
+
+	ARMV7_PERFCTR_PC_BRANCH_MIS_USED	= 0x12,
+
+	ARMV7_PERFCTR_CPU_CYCLES		= 0xFF
+};
+
+/* ARMv7 Cortex-A8 specific event types */
+enum armv7_a8_perf_types {
+	ARMV7_PERFCTR_INSTR_EXECUTED		= 0x08,
+
+	ARMV7_PERFCTR_PC_PROC_RETURN		= 0x0E,
+
+	ARMV7_PERFCTR_WRITE_BUFFER_FULL		= 0x40,
+	ARMV7_PERFCTR_L2_STORE_MERGED		= 0x41,
+	ARMV7_PERFCTR_L2_STORE_BUFF		= 0x42,
+	ARMV7_PERFCTR_L2_ACCESS			= 0x43,
+	ARMV7_PERFCTR_L2_CACH_MISS		= 0x44,
+	ARMV7_PERFCTR_AXI_READ_CYCLES		= 0x45,
+	ARMV7_PERFCTR_AXI_WRITE_CYCLES		= 0x46,
+	ARMV7_PERFCTR_MEMORY_REPLAY		= 0x47,
+	ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY	= 0x48,
+	ARMV7_PERFCTR_L1_DATA_MISS		= 0x49,
+	ARMV7_PERFCTR_L1_INST_MISS		= 0x4A,
+	ARMV7_PERFCTR_L1_DATA_COLORING		= 0x4B,
+	ARMV7_PERFCTR_L1_NEON_DATA		= 0x4C,
+	ARMV7_PERFCTR_L1_NEON_CACH_DATA		= 0x4D,
+	ARMV7_PERFCTR_L2_NEON			= 0x4E,
+	ARMV7_PERFCTR_L2_NEON_HIT		= 0x4F,
+	ARMV7_PERFCTR_L1_INST			= 0x50,
+	ARMV7_PERFCTR_PC_RETURN_MIS_PRED	= 0x51,
+	ARMV7_PERFCTR_PC_BRANCH_FAILED		= 0x52,
+	ARMV7_PERFCTR_PC_BRANCH_TAKEN		= 0x53,
+	ARMV7_PERFCTR_PC_BRANCH_EXECUTED	= 0x54,
+	ARMV7_PERFCTR_OP_EXECUTED		= 0x55,
+	ARMV7_PERFCTR_CYCLES_INST_STALL		= 0x56,
+	ARMV7_PERFCTR_CYCLES_INST		= 0x57,
+	ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL	= 0x58,
+	ARMV7_PERFCTR_CYCLES_NEON_INST_STALL	= 0x59,
+	ARMV7_PERFCTR_NEON_CYCLES		= 0x5A,
+
+	ARMV7_PERFCTR_PMU0_EVENTS		= 0x70,
+	ARMV7_PERFCTR_PMU1_EVENTS		= 0x71,
+	ARMV7_PERFCTR_PMU_EVENTS		= 0x72,
+};
+
+/* ARMv7 Cortex-A9 specific event types */
+enum armv7_a9_perf_types {
+	ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC	= 0x40,
+	ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC	= 0x41,
+	ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC	= 0x42,
+
+	ARMV7_PERFCTR_COHERENT_LINE_MISS	= 0x50,
+	ARMV7_PERFCTR_COHERENT_LINE_HIT		= 0x51,
+
+	ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES	= 0x60,
+	ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES	= 0x61,
+	ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES	= 0x62,
+	ARMV7_PERFCTR_STREX_EXECUTED_PASSED	= 0x63,
+	ARMV7_PERFCTR_STREX_EXECUTED_FAILED	= 0x64,
+	ARMV7_PERFCTR_DATA_EVICTION		= 0x65,
+	ARMV7_PERFCTR_ISSUE_STAGE_NO_INST	= 0x66,
+	ARMV7_PERFCTR_ISSUE_STAGE_EMPTY		= 0x67,
+	ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE	= 0x68,
+
+	ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS	= 0x6E,
+
+	ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST	= 0x70,
+	ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST	= 0x71,
+	ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST	= 0x72,
+	ARMV7_PERFCTR_FP_EXECUTED_INST		= 0x73,
+	ARMV7_PERFCTR_NEON_EXECUTED_INST	= 0x74,
+
+	ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES	= 0x80,
+	ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES	= 0x81,
+	ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES	= 0x82,
+	ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES	= 0x83,
+	ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES	= 0x84,
+	ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES	= 0x85,
+	ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES	= 0x86,
+
+	ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES	= 0x8A,
+	ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES	= 0x8B,
+
+	ARMV7_PERFCTR_ISB_INST			= 0x90,
+	ARMV7_PERFCTR_DSB_INST			= 0x91,
+	ARMV7_PERFCTR_DMB_INST			= 0x92,
+	ARMV7_PERFCTR_EXT_INTERRUPTS		= 0x93,
+
+	ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED	= 0xA0,
+	ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED	= 0xA1,
+	ARMV7_PERFCTR_PLE_FIFO_FLUSH		= 0xA2,
+	ARMV7_PERFCTR_PLE_RQST_COMPLETED	= 0xA3,
+	ARMV7_PERFCTR_PLE_FIFO_OVERFLOW		= 0xA4,
+	ARMV7_PERFCTR_PLE_RQST_PROG		= 0xA5
+};
+
+/*
+ * Cortex-A8 HW events mapping
+ *
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					  [PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		/*
+		 * The performance counters don't differentiate between read
+		 * and write accesses/misses so this isn't strictly correct,
+		 * but it's the best we can do. Writes and reads get
+		 * combined.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_INST,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_INST_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_INST,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_INST_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L2_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACH_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L2_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACH_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		/*
+		 * Only ITLB misses and DTLB refills are supported.
+		 * If users want the DTLB refills misses a raw counter
+		 * must be used.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+/*
+ * Cortex-A9 HW events mapping
+ */
+static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    =
+					ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = ARMV7_PERFCTR_COHERENT_LINE_HIT,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = ARMV7_PERFCTR_COHERENT_LINE_MISS,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					  [PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		/*
+		 * The performance counters don't differentiate between read
+		 * and write accesses/misses so this isn't strictly correct,
+		 * but it's the best we can do. Writes and reads get
+		 * combined.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_IFETCH_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_IFETCH_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		/*
+		 * Only ITLB misses and DTLB refills are supported.
+		 * If users want the DTLB refills misses a raw counter
+		 * must be used.
+		 */
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
+			[C(RESULT_MISS)]
+					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+/*
+ * Perf Events counters
+ */
+enum armv7_counters {
+	ARMV7_CYCLE_COUNTER		= 1,	/* Cycle counter */
+	ARMV7_COUNTER0			= 2,	/* First event counter */
+};
+
+/*
+ * The cycle counter is ARMV7_CYCLE_COUNTER.
+ * The first event counter is ARMV7_COUNTER0.
+ * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
+ */
+#define	ARMV7_COUNTER_LAST	(ARMV7_COUNTER0 + armpmu->num_events - 1)
+
+/*
+ * ARMv7 low level PMNC access
+ */
+
+/*
+ * Per-CPU PMNC: config reg
+ */
+#define ARMV7_PMNC_E		(1 << 0) /* Enable all counters */
+#define ARMV7_PMNC_P		(1 << 1) /* Reset all counters */
+#define ARMV7_PMNC_C		(1 << 2) /* Cycle counter reset */
+#define ARMV7_PMNC_D		(1 << 3) /* CCNT counts every 64th cpu cycle */
+#define ARMV7_PMNC_X		(1 << 4) /* Export to ETM */
+#define ARMV7_PMNC_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/
+#define	ARMV7_PMNC_N_SHIFT	11	 /* Number of counters supported */
+#define	ARMV7_PMNC_N_MASK	0x1f
+#define	ARMV7_PMNC_MASK		0x3f	 /* Mask for writable bits */
+
+/*
+ * Available counters
+ */
+#define ARMV7_CNT0		0	/* First event counter */
+#define ARMV7_CCNT		31	/* Cycle counter */
+
+/* Perf Event to low level counters mapping */
+#define ARMV7_EVENT_CNT_TO_CNTx	(ARMV7_COUNTER0 - ARMV7_CNT0)
+
+/*
+ * CNTENS: counters enable reg
+ */
+#define ARMV7_CNTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
+#define ARMV7_CNTENS_C		(1 << ARMV7_CCNT)
+
+/*
+ * CNTENC: counters disable reg
+ */
+#define ARMV7_CNTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
+#define ARMV7_CNTENC_C		(1 << ARMV7_CCNT)
+
+/*
+ * INTENS: counters overflow interrupt enable reg
+ */
+#define ARMV7_INTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
+#define ARMV7_INTENS_C		(1 << ARMV7_CCNT)
+
+/*
+ * INTENC: counters overflow interrupt disable reg
+ */
+#define ARMV7_INTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
+#define ARMV7_INTENC_C		(1 << ARMV7_CCNT)
+
+/*
+ * EVTSEL: Event selection reg
+ */
+#define	ARMV7_EVTSEL_MASK	0xff		/* Mask for writable bits */
+
+/*
+ * SELECT: Counter selection reg
+ */
+#define	ARMV7_SELECT_MASK	0x1f		/* Mask for writable bits */
+
+/*
+ * FLAG: counters overflow flag status reg
+ */
+#define ARMV7_FLAG_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
+#define ARMV7_FLAG_C		(1 << ARMV7_CCNT)
+#define	ARMV7_FLAG_MASK		0xffffffff	/* Mask for writable bits */
+#define	ARMV7_OVERFLOWED_MASK	ARMV7_FLAG_MASK
+
+static inline unsigned long armv7_pmnc_read(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
+	return val;
+}
+
+static inline void armv7_pmnc_write(unsigned long val)
+{
+	val &= ARMV7_PMNC_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
+}
+
+static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
+{
+	return pmnc & ARMV7_OVERFLOWED_MASK;
+}
+
+static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
+					enum armv7_counters counter)
+{
+	int ret = 0;
+
+	if (counter == ARMV7_CYCLE_COUNTER)
+		ret = pmnc & ARMV7_FLAG_C;
+	else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
+		ret = pmnc & ARMV7_FLAG_P(counter);
+	else
+		pr_err("CPU%u checking wrong counter %d overflow status\n",
+			smp_processor_id(), counter);
+
+	return ret;
+}
+
+static inline int armv7_pmnc_select_counter(unsigned int idx)
+{
+	u32 val;
+
+	if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
+		pr_err("CPU%u selecting wrong PMNC counter"
+			" %d\n", smp_processor_id(), idx);
+		return -1;
+	}
+
+	val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+
+	return idx;
+}
+
+static inline u32 armv7pmu_read_counter(int idx)
+{
+	unsigned long value = 0;
+
+	if (idx == ARMV7_CYCLE_COUNTER)
+		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
+	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
+		if (armv7_pmnc_select_counter(idx) == idx)
+			asm volatile("mrc p15, 0, %0, c9, c13, 2"
+				     : "=r" (value));
+	} else
+		pr_err("CPU%u reading wrong counter %d\n",
+			smp_processor_id(), idx);
+
+	return value;
+}
+
+static inline void armv7pmu_write_counter(int idx, u32 value)
+{
+	if (idx == ARMV7_CYCLE_COUNTER)
+		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
+	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
+		if (armv7_pmnc_select_counter(idx) == idx)
+			asm volatile("mcr p15, 0, %0, c9, c13, 2"
+				     : : "r" (value));
+	} else
+		pr_err("CPU%u writing wrong counter %d\n",
+			smp_processor_id(), idx);
+}
+
+static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
+{
+	if (armv7_pmnc_select_counter(idx) == idx) {
+		val &= ARMV7_EVTSEL_MASK;
+		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+	}
+}
+
+static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
+{
+	u32 val;
+
+	if ((idx != ARMV7_CYCLE_COUNTER) &&
+	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
+		pr_err("CPU%u enabling wrong PMNC counter"
+			" %d\n", smp_processor_id(), idx);
+		return -1;
+	}
+
+	if (idx == ARMV7_CYCLE_COUNTER)
+		val = ARMV7_CNTENS_C;
+	else
+		val = ARMV7_CNTENS_P(idx);
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
+
+	return idx;
+}
+
+static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
+{
+	u32 val;
+
+
+	if ((idx != ARMV7_CYCLE_COUNTER) &&
+	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
+		pr_err("CPU%u disabling wrong PMNC counter"
+			" %d\n", smp_processor_id(), idx);
+		return -1;
+	}
+
+	if (idx == ARMV7_CYCLE_COUNTER)
+		val = ARMV7_CNTENC_C;
+	else
+		val = ARMV7_CNTENC_P(idx);
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
+
+	return idx;
+}
+
+static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
+{
+	u32 val;
+
+	if ((idx != ARMV7_CYCLE_COUNTER) &&
+	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
+		pr_err("CPU%u enabling wrong PMNC counter"
+			" interrupt enable %d\n", smp_processor_id(), idx);
+		return -1;
+	}
+
+	if (idx == ARMV7_CYCLE_COUNTER)
+		val = ARMV7_INTENS_C;
+	else
+		val = ARMV7_INTENS_P(idx);
+
+	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
+
+	return idx;
+}
+
+static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
+{
+	u32 val;
+
+	if ((idx != ARMV7_CYCLE_COUNTER) &&
+	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
+		pr_err("CPU%u disabling wrong PMNC counter"
+			" interrupt enable %d\n", smp_processor_id(), idx);
+		return -1;
+	}
+
+	if (idx == ARMV7_CYCLE_COUNTER)
+		val = ARMV7_INTENC_C;
+	else
+		val = ARMV7_INTENC_P(idx);
+
+	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+
+	return idx;
+}
+
+static inline u32 armv7_pmnc_getreset_flags(void)
+{
+	u32 val;
+
+	/* Read */
+	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+
+	/* Write to clear flags */
+	val &= ARMV7_FLAG_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
+
+	return val;
+}
+
+#ifdef DEBUG
+static void armv7_pmnc_dump_regs(void)
+{
+	u32 val;
+	unsigned int cnt;
+
+	printk(KERN_INFO "PMNC registers dump:\n");
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
+	printk(KERN_INFO "PMNC  =0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
+	printk(KERN_INFO "CNTENS=0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
+	printk(KERN_INFO "INTENS=0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+	printk(KERN_INFO "FLAGS =0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
+	printk(KERN_INFO "SELECT=0x%08x\n", val);
+
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
+	printk(KERN_INFO "CCNT  =0x%08x\n", val);
+
+	for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
+		armv7_pmnc_select_counter(cnt);
+		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
+		printk(KERN_INFO "CNT[%d] count =0x%08x\n",
+			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
+		asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
+		printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
+			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
+	}
+}
+#endif
+
+void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long flags;
+
+	/*
+	 * Enable counter and interrupt, and set the counter to count
+	 * the event that we're interested in.
+	 */
+	spin_lock_irqsave(&pmu_lock, flags);
+
+	/*
+	 * Disable counter
+	 */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Set event (if destined for PMNx counters)
+	 * We don't need to set the event if it's a cycle count
+	 */
+	if (idx != ARMV7_CYCLE_COUNTER)
+		armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+	/*
+	 * Enable interrupt for this counter
+	 */
+	armv7_pmnc_enable_intens(idx);
+
+	/*
+	 * Enable counter
+	 */
+	armv7_pmnc_enable_counter(idx);
+
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long flags;
+
+	/*
+	 * Disable counter and interrupt
+	 */
+	spin_lock_irqsave(&pmu_lock, flags);
+
+	/*
+	 * Disable counter
+	 */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Disable interrupt for this counter
+	 */
+	armv7_pmnc_disable_intens(idx);
+
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
+{
+	unsigned long pmnc;
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	/*
+	 * Get and reset the IRQ flags
+	 */
+	pmnc = armv7_pmnc_getreset_flags();
+
+	/*
+	 * Did an overflow occur?
+	 */
+	if (!armv7_pmnc_has_overflowed(pmnc))
+		return IRQ_NONE;
+
+	/*
+	 * Handle the counter(s) overflow(s)
+	 */
+	regs = get_irq_regs();
+
+	perf_sample_data_init(&data, 0);
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		/*
+		 * We have a single interrupt for all counters. Check that
+		 * each counter has overflowed before we process it.
+		 */
+		if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event, hwc, idx);
+		data.period = event->hw.last_period;
+		if (!armpmu_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 0, &data, regs))
+			armpmu->disable(hwc, idx);
+	}
+
+	/*
+	 * Handle the pending perf events.
+	 *
+	 * Note: this call *must* be run with interrupts disabled. For
+	 * platforms that can have the PMU interrupts raised as an NMI, this
+	 * will not work.
+	 */
+	irq_work_run();
+
+	return IRQ_HANDLED;
+}
+
+static void armv7pmu_start(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	/* Enable all counters */
+	armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void armv7pmu_stop(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	/* Disable all counters */
+	armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
+				  struct hw_perf_event *event)
+{
+	int idx;
+
+	/* Always place a cycle counter into the cycle counter. */
+	if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
+		if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
+			return -EAGAIN;
+
+		return ARMV7_CYCLE_COUNTER;
+	} else {
+		/*
+		 * For anything other than a cycle counter, try and use
+		 * the events counters
+		 */
+		for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
+			if (!test_and_set_bit(idx, cpuc->used_mask))
+				return idx;
+		}
+
+		/* The counters are all in use. */
+		return -EAGAIN;
+	}
+}
+
+static struct arm_pmu armv7pmu = {
+	.handle_irq		= armv7pmu_handle_irq,
+	.enable			= armv7pmu_enable_event,
+	.disable		= armv7pmu_disable_event,
+	.read_counter		= armv7pmu_read_counter,
+	.write_counter		= armv7pmu_write_counter,
+	.get_event_idx		= armv7pmu_get_event_idx,
+	.start			= armv7pmu_start,
+	.stop			= armv7pmu_stop,
+	.raw_event_mask		= 0xFF,
+	.max_period		= (1LLU << 32) - 1,
+};
+
+static u32 __init armv7_reset_read_pmnc(void)
+{
+	u32 nb_cnt;
+
+	/* Initialize & Reset PMNC: C and P bits */
+	armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
+
+	/* Read the nb of CNTx counters supported from PMNC */
+	nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
+
+	/* Add the CPU cycles counter and return */
+	return nb_cnt + 1;
+}
+
+const struct arm_pmu *__init armv7_a8_pmu_init(void)
+{
+	armv7pmu.id		= ARM_PERF_PMU_ID_CA8;
+	armv7pmu.name		= "ARMv7 Cortex-A8";
+	armv7pmu.cache_map	= &armv7_a8_perf_cache_map;
+	armv7pmu.event_map	= &armv7_a8_perf_map;
+	armv7pmu.num_events	= armv7_reset_read_pmnc();
+	return &armv7pmu;
+}
+
+const struct arm_pmu *__init armv7_a9_pmu_init(void)
+{
+	armv7pmu.id		= ARM_PERF_PMU_ID_CA9;
+	armv7pmu.name		= "ARMv7 Cortex-A9";
+	armv7pmu.cache_map	= &armv7_a9_perf_cache_map;
+	armv7pmu.event_map	= &armv7_a9_perf_map;
+	armv7pmu.num_events	= armv7_reset_read_pmnc();
+	return &armv7pmu;
+}
+#else
+const struct arm_pmu *__init armv7_a8_pmu_init(void)
+{
+	return NULL;
+}
+
+const struct arm_pmu *__init armv7_a9_pmu_init(void)
+{
+	return NULL;
+}
+#endif	/* CONFIG_CPU_V7 */

+ 807 - 0
arch/arm/kernel/perf_event_xscale.c

@@ -0,0 +1,807 @@
+/*
+ * ARMv5 [xscale] Performance counter handling code.
+ *
+ * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * Based on the previous xscale OProfile code.
+ *
+ * There are two variants of the xscale PMU that we support:
+ * 	- xscale1pmu: 2 event counters and a cycle counter
+ * 	- xscale2pmu: 4 event counters and a cycle counter
+ * The two variants share event definitions, but have different
+ * PMU structures.
+ */
+
+#ifdef CONFIG_CPU_XSCALE
+enum xscale_perf_types {
+	XSCALE_PERFCTR_ICACHE_MISS		= 0x00,
+	XSCALE_PERFCTR_ICACHE_NO_DELIVER	= 0x01,
+	XSCALE_PERFCTR_DATA_STALL		= 0x02,
+	XSCALE_PERFCTR_ITLB_MISS		= 0x03,
+	XSCALE_PERFCTR_DTLB_MISS		= 0x04,
+	XSCALE_PERFCTR_BRANCH			= 0x05,
+	XSCALE_PERFCTR_BRANCH_MISS		= 0x06,
+	XSCALE_PERFCTR_INSTRUCTION		= 0x07,
+	XSCALE_PERFCTR_DCACHE_FULL_STALL	= 0x08,
+	XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG	= 0x09,
+	XSCALE_PERFCTR_DCACHE_ACCESS		= 0x0A,
+	XSCALE_PERFCTR_DCACHE_MISS		= 0x0B,
+	XSCALE_PERFCTR_DCACHE_WRITE_BACK	= 0x0C,
+	XSCALE_PERFCTR_PC_CHANGED		= 0x0D,
+	XSCALE_PERFCTR_BCU_REQUEST		= 0x10,
+	XSCALE_PERFCTR_BCU_FULL			= 0x11,
+	XSCALE_PERFCTR_BCU_DRAIN		= 0x12,
+	XSCALE_PERFCTR_BCU_ECC_NO_ELOG		= 0x14,
+	XSCALE_PERFCTR_BCU_1_BIT_ERR		= 0x15,
+	XSCALE_PERFCTR_RMW			= 0x16,
+	/* XSCALE_PERFCTR_CCNT is not hardware defined */
+	XSCALE_PERFCTR_CCNT			= 0xFE,
+	XSCALE_PERFCTR_UNUSED			= 0xFF,
+};
+
+enum xscale_counters {
+	XSCALE_CYCLE_COUNTER	= 1,
+	XSCALE_COUNTER0,
+	XSCALE_COUNTER1,
+	XSCALE_COUNTER2,
+	XSCALE_COUNTER3,
+};
+
+static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = XSCALE_PERFCTR_CCNT,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = XSCALE_PERFCTR_INSTRUCTION,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = XSCALE_PERFCTR_BRANCH_MISS,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
+};
+
+static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					   [PERF_COUNT_HW_CACHE_OP_MAX]
+					   [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XSCALE_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DCACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= XSCALE_PERFCTR_DCACHE_ACCESS,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DCACHE_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ICACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ICACHE_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DTLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DTLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ITLB_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+#define	XSCALE_PMU_ENABLE	0x001
+#define XSCALE_PMN_RESET	0x002
+#define	XSCALE_CCNT_RESET	0x004
+#define	XSCALE_PMU_RESET	(CCNT_RESET | PMN_RESET)
+#define XSCALE_PMU_CNT64	0x008
+
+#define XSCALE1_OVERFLOWED_MASK	0x700
+#define XSCALE1_CCOUNT_OVERFLOW	0x400
+#define XSCALE1_COUNT0_OVERFLOW	0x100
+#define XSCALE1_COUNT1_OVERFLOW	0x200
+#define XSCALE1_CCOUNT_INT_EN	0x040
+#define XSCALE1_COUNT0_INT_EN	0x010
+#define XSCALE1_COUNT1_INT_EN	0x020
+#define XSCALE1_COUNT0_EVT_SHFT	12
+#define XSCALE1_COUNT0_EVT_MASK	(0xff << XSCALE1_COUNT0_EVT_SHFT)
+#define XSCALE1_COUNT1_EVT_SHFT	20
+#define XSCALE1_COUNT1_EVT_MASK	(0xff << XSCALE1_COUNT1_EVT_SHFT)
+
+static inline u32
+xscale1pmu_read_pmnc(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
+	return val;
+}
+
+static inline void
+xscale1pmu_write_pmnc(u32 val)
+{
+	/* upper 4bits and 7, 11 are write-as-0 */
+	val &= 0xffff77f;
+	asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
+}
+
+static inline int
+xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
+					enum xscale_counters counter)
+{
+	int ret = 0;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
+		break;
+	case XSCALE_COUNTER0:
+		ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
+		break;
+	case XSCALE_COUNTER1:
+		ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+	}
+
+	return ret;
+}
+
+static irqreturn_t
+xscale1pmu_handle_irq(int irq_num, void *dev)
+{
+	unsigned long pmnc;
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	/*
+	 * NOTE: there's an A stepping erratum that states if an overflow
+	 *       bit already exists and another occurs, the previous
+	 *       Overflow bit gets cleared. There's no workaround.
+	 *	 Fixed in B stepping or later.
+	 */
+	pmnc = xscale1pmu_read_pmnc();
+
+	/*
+	 * Write the value back to clear the overflow flags. Overflow
+	 * flags remain in pmnc for use below. We also disable the PMU
+	 * while we process the interrupt.
+	 */
+	xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
+
+	if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
+		return IRQ_NONE;
+
+	regs = get_irq_regs();
+
+	perf_sample_data_init(&data, 0);
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event, hwc, idx);
+		data.period = event->hw.last_period;
+		if (!armpmu_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 0, &data, regs))
+			armpmu->disable(hwc, idx);
+	}
+
+	irq_work_run();
+
+	/*
+	 * Re-enable the PMU.
+	 */
+	pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
+	xscale1pmu_write_pmnc(pmnc);
+
+	return IRQ_HANDLED;
+}
+
+static void
+xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long val, mask, evt, flags;
+
+	switch (idx) {
+	case XSCALE_CYCLE_COUNTER:
+		mask = 0;
+		evt = XSCALE1_CCOUNT_INT_EN;
+		break;
+	case XSCALE_COUNTER0:
+		mask = XSCALE1_COUNT0_EVT_MASK;
+		evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
+			XSCALE1_COUNT0_INT_EN;
+		break;
+	case XSCALE_COUNTER1:
+		mask = XSCALE1_COUNT1_EVT_MASK;
+		evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
+			XSCALE1_COUNT1_INT_EN;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = xscale1pmu_read_pmnc();
+	val &= ~mask;
+	val |= evt;
+	xscale1pmu_write_pmnc(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long val, mask, evt, flags;
+
+	switch (idx) {
+	case XSCALE_CYCLE_COUNTER:
+		mask = XSCALE1_CCOUNT_INT_EN;
+		evt = 0;
+		break;
+	case XSCALE_COUNTER0:
+		mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
+		evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER1:
+		mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
+		evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = xscale1pmu_read_pmnc();
+	val &= ~mask;
+	val |= evt;
+	xscale1pmu_write_pmnc(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static int
+xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
+			struct hw_perf_event *event)
+{
+	if (XSCALE_PERFCTR_CCNT == event->config_base) {
+		if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
+			return -EAGAIN;
+
+		return XSCALE_CYCLE_COUNTER;
+	} else {
+		if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
+			return XSCALE_COUNTER1;
+
+		if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
+			return XSCALE_COUNTER0;
+
+		return -EAGAIN;
+	}
+}
+
+static void
+xscale1pmu_start(void)
+{
+	unsigned long flags, val;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = xscale1pmu_read_pmnc();
+	val |= XSCALE_PMU_ENABLE;
+	xscale1pmu_write_pmnc(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+xscale1pmu_stop(void)
+{
+	unsigned long flags, val;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = xscale1pmu_read_pmnc();
+	val &= ~XSCALE_PMU_ENABLE;
+	xscale1pmu_write_pmnc(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static inline u32
+xscale1pmu_read_counter(int counter)
+{
+	u32 val = 0;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER0:
+		asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER1:
+		asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
+		break;
+	}
+
+	return val;
+}
+
+static inline void
+xscale1pmu_write_counter(int counter, u32 val)
+{
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER0:
+		asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER1:
+		asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
+		break;
+	}
+}
+
+static const struct arm_pmu xscale1pmu = {
+	.id		= ARM_PERF_PMU_ID_XSCALE1,
+	.name		= "xscale1",
+	.handle_irq	= xscale1pmu_handle_irq,
+	.enable		= xscale1pmu_enable_event,
+	.disable	= xscale1pmu_disable_event,
+	.read_counter	= xscale1pmu_read_counter,
+	.write_counter	= xscale1pmu_write_counter,
+	.get_event_idx	= xscale1pmu_get_event_idx,
+	.start		= xscale1pmu_start,
+	.stop		= xscale1pmu_stop,
+	.cache_map	= &xscale_perf_cache_map,
+	.event_map	= &xscale_perf_map,
+	.raw_event_mask	= 0xFF,
+	.num_events	= 3,
+	.max_period	= (1LLU << 32) - 1,
+};
+
+const struct arm_pmu *__init xscale1pmu_init(void)
+{
+	return &xscale1pmu;
+}
+
+#define XSCALE2_OVERFLOWED_MASK	0x01f
+#define XSCALE2_CCOUNT_OVERFLOW	0x001
+#define XSCALE2_COUNT0_OVERFLOW	0x002
+#define XSCALE2_COUNT1_OVERFLOW	0x004
+#define XSCALE2_COUNT2_OVERFLOW	0x008
+#define XSCALE2_COUNT3_OVERFLOW	0x010
+#define XSCALE2_CCOUNT_INT_EN	0x001
+#define XSCALE2_COUNT0_INT_EN	0x002
+#define XSCALE2_COUNT1_INT_EN	0x004
+#define XSCALE2_COUNT2_INT_EN	0x008
+#define XSCALE2_COUNT3_INT_EN	0x010
+#define XSCALE2_COUNT0_EVT_SHFT	0
+#define XSCALE2_COUNT0_EVT_MASK	(0xff << XSCALE2_COUNT0_EVT_SHFT)
+#define XSCALE2_COUNT1_EVT_SHFT	8
+#define XSCALE2_COUNT1_EVT_MASK	(0xff << XSCALE2_COUNT1_EVT_SHFT)
+#define XSCALE2_COUNT2_EVT_SHFT	16
+#define XSCALE2_COUNT2_EVT_MASK	(0xff << XSCALE2_COUNT2_EVT_SHFT)
+#define XSCALE2_COUNT3_EVT_SHFT	24
+#define XSCALE2_COUNT3_EVT_MASK	(0xff << XSCALE2_COUNT3_EVT_SHFT)
+
+static inline u32
+xscale2pmu_read_pmnc(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
+	/* bits 1-2 and 4-23 are read-unpredictable */
+	return val & 0xff000009;
+}
+
+static inline void
+xscale2pmu_write_pmnc(u32 val)
+{
+	/* bits 4-23 are write-as-0, 24-31 are write ignored */
+	val &= 0xf;
+	asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
+}
+
+static inline u32
+xscale2pmu_read_overflow_flags(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
+	return val;
+}
+
+static inline void
+xscale2pmu_write_overflow_flags(u32 val)
+{
+	asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
+}
+
+static inline u32
+xscale2pmu_read_event_select(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
+	return val;
+}
+
+static inline void
+xscale2pmu_write_event_select(u32 val)
+{
+	asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
+}
+
+static inline u32
+xscale2pmu_read_int_enable(void)
+{
+	u32 val;
+	asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
+	return val;
+}
+
+static void
+xscale2pmu_write_int_enable(u32 val)
+{
+	asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
+}
+
+static inline int
+xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
+					enum xscale_counters counter)
+{
+	int ret = 0;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
+		break;
+	case XSCALE_COUNTER0:
+		ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
+		break;
+	case XSCALE_COUNTER1:
+		ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
+		break;
+	case XSCALE_COUNTER2:
+		ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
+		break;
+	case XSCALE_COUNTER3:
+		ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
+	}
+
+	return ret;
+}
+
+static irqreturn_t
+xscale2pmu_handle_irq(int irq_num, void *dev)
+{
+	unsigned long pmnc, of_flags;
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	/* Disable the PMU. */
+	pmnc = xscale2pmu_read_pmnc();
+	xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
+
+	/* Check the overflow flag register. */
+	of_flags = xscale2pmu_read_overflow_flags();
+	if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
+		return IRQ_NONE;
+
+	/* Clear the overflow bits. */
+	xscale2pmu_write_overflow_flags(of_flags);
+
+	regs = get_irq_regs();
+
+	perf_sample_data_init(&data, 0);
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event, hwc, idx);
+		data.period = event->hw.last_period;
+		if (!armpmu_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 0, &data, regs))
+			armpmu->disable(hwc, idx);
+	}
+
+	irq_work_run();
+
+	/*
+	 * Re-enable the PMU.
+	 */
+	pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
+	xscale2pmu_write_pmnc(pmnc);
+
+	return IRQ_HANDLED;
+}
+
+static void
+xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long flags, ien, evtsel;
+
+	ien = xscale2pmu_read_int_enable();
+	evtsel = xscale2pmu_read_event_select();
+
+	switch (idx) {
+	case XSCALE_CYCLE_COUNTER:
+		ien |= XSCALE2_CCOUNT_INT_EN;
+		break;
+	case XSCALE_COUNTER0:
+		ien |= XSCALE2_COUNT0_INT_EN;
+		evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
+		evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER1:
+		ien |= XSCALE2_COUNT1_INT_EN;
+		evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
+		evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER2:
+		ien |= XSCALE2_COUNT2_INT_EN;
+		evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
+		evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER3:
+		ien |= XSCALE2_COUNT3_INT_EN;
+		evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
+		evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	xscale2pmu_write_event_select(evtsel);
+	xscale2pmu_write_int_enable(ien);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	unsigned long flags, ien, evtsel;
+
+	ien = xscale2pmu_read_int_enable();
+	evtsel = xscale2pmu_read_event_select();
+
+	switch (idx) {
+	case XSCALE_CYCLE_COUNTER:
+		ien &= ~XSCALE2_CCOUNT_INT_EN;
+		break;
+	case XSCALE_COUNTER0:
+		ien &= ~XSCALE2_COUNT0_INT_EN;
+		evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
+		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER1:
+		ien &= ~XSCALE2_COUNT1_INT_EN;
+		evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
+		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER2:
+		ien &= ~XSCALE2_COUNT2_INT_EN;
+		evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
+		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
+		break;
+	case XSCALE_COUNTER3:
+		ien &= ~XSCALE2_COUNT3_INT_EN;
+		evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
+		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
+		break;
+	default:
+		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+		return;
+	}
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	xscale2pmu_write_event_select(evtsel);
+	xscale2pmu_write_int_enable(ien);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static int
+xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
+			struct hw_perf_event *event)
+{
+	int idx = xscale1pmu_get_event_idx(cpuc, event);
+	if (idx >= 0)
+		goto out;
+
+	if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
+		idx = XSCALE_COUNTER3;
+	else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
+		idx = XSCALE_COUNTER2;
+out:
+	return idx;
+}
+
+static void
+xscale2pmu_start(void)
+{
+	unsigned long flags, val;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
+	val |= XSCALE_PMU_ENABLE;
+	xscale2pmu_write_pmnc(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static void
+xscale2pmu_stop(void)
+{
+	unsigned long flags, val;
+
+	spin_lock_irqsave(&pmu_lock, flags);
+	val = xscale2pmu_read_pmnc();
+	val &= ~XSCALE_PMU_ENABLE;
+	xscale2pmu_write_pmnc(val);
+	spin_unlock_irqrestore(&pmu_lock, flags);
+}
+
+static inline u32
+xscale2pmu_read_counter(int counter)
+{
+	u32 val = 0;
+
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER0:
+		asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER1:
+		asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER2:
+		asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
+		break;
+	case XSCALE_COUNTER3:
+		asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
+		break;
+	}
+
+	return val;
+}
+
+static inline void
+xscale2pmu_write_counter(int counter, u32 val)
+{
+	switch (counter) {
+	case XSCALE_CYCLE_COUNTER:
+		asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER0:
+		asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER1:
+		asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER2:
+		asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
+		break;
+	case XSCALE_COUNTER3:
+		asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
+		break;
+	}
+}
+
+static const struct arm_pmu xscale2pmu = {
+	.id		= ARM_PERF_PMU_ID_XSCALE2,
+	.name		= "xscale2",
+	.handle_irq	= xscale2pmu_handle_irq,
+	.enable		= xscale2pmu_enable_event,
+	.disable	= xscale2pmu_disable_event,
+	.read_counter	= xscale2pmu_read_counter,
+	.write_counter	= xscale2pmu_write_counter,
+	.get_event_idx	= xscale2pmu_get_event_idx,
+	.start		= xscale2pmu_start,
+	.stop		= xscale2pmu_stop,
+	.cache_map	= &xscale_perf_cache_map,
+	.event_map	= &xscale_perf_map,
+	.raw_event_mask	= 0xFF,
+	.num_events	= 5,
+	.max_period	= (1LLU << 32) - 1,
+};
+
+const struct arm_pmu *__init xscale2pmu_init(void)
+{
+	return &xscale2pmu;
+}
+#else
+const struct arm_pmu *__init xscale1pmu_init(void)
+{
+	return NULL;
+}
+
+const struct arm_pmu *__init xscale2pmu_init(void)
+{
+	return NULL;
+}
+#endif	/* CONFIG_CPU_XSCALE */

+ 69 - 0
arch/arm/kernel/sched_clock.c

@@ -0,0 +1,69 @@
+/*
+ * sched_clock.c: support for extending counters to full 64-bit ns counter
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+
+#include <asm/sched_clock.h>
+
+static void sched_clock_poll(unsigned long wrap_ticks);
+static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
+static void (*sched_clock_update_fn)(void);
+
+static void sched_clock_poll(unsigned long wrap_ticks)
+{
+	mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
+	sched_clock_update_fn();
+}
+
+void __init init_sched_clock(struct clock_data *cd, void (*update)(void),
+	unsigned int clock_bits, unsigned long rate)
+{
+	unsigned long r, w;
+	u64 res, wrap;
+	char r_unit;
+
+	sched_clock_update_fn = update;
+
+	/* calculate the mult/shift to convert counter ticks to ns. */
+	clocks_calc_mult_shift(&cd->mult, &cd->shift, rate, NSEC_PER_SEC, 60);
+
+	r = rate;
+	if (r >= 4000000) {
+		r /= 1000000;
+		r_unit = 'M';
+	} else {
+		r /= 1000;
+		r_unit = 'k';
+	}
+
+	/* calculate how many ns until we wrap */
+	wrap = cyc_to_ns((1ULL << clock_bits) - 1, cd->mult, cd->shift);
+	do_div(wrap, NSEC_PER_MSEC);
+	w = wrap;
+
+	/* calculate the ns resolution of this counter */
+	res = cyc_to_ns(1ULL, cd->mult, cd->shift);
+	pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n",
+		clock_bits, r, r_unit, res, w);
+
+	/*
+	 * Start the timer to keep sched_clock() properly updated and
+	 * sets the initial epoch.
+	 */
+	sched_clock_timer.data = msecs_to_jiffies(w - (w / 10));
+	sched_clock_poll(sched_clock_timer.data);
+
+	/*
+	 * Ensure that sched_clock() starts off at 0ns
+	 */
+	cd->epoch_ns = 0;
+}

+ 3 - 2
arch/arm/kernel/smp.c

@@ -16,6 +16,7 @@
 #include <linux/cache.h>
 #include <linux/profile.h>
 #include <linux/errno.h>
+#include <linux/ftrace.h>
 #include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/cpu.h>
@@ -456,7 +457,7 @@ static void ipi_timer(void)
 }
 
 #ifdef CONFIG_LOCAL_TIMERS
-asmlinkage void __exception do_local_timer(struct pt_regs *regs)
+asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	int cpu = smp_processor_id();
@@ -543,7 +544,7 @@ static void ipi_cpu_stop(unsigned int cpu)
  *
  *  Bit 0 - Inter-processor function call
  */
-asmlinkage void __exception do_IPI(struct pt_regs *regs)
+asmlinkage void __exception_irq_entry do_IPI(struct pt_regs *regs)
 {
 	unsigned int cpu = smp_processor_id();
 	struct ipi_data *ipi = &per_cpu(ipi_data, cpu);

+ 1 - 0
arch/arm/kernel/vmlinux.lds.S

@@ -101,6 +101,7 @@ SECTIONS
 			__exception_text_start = .;
 			*(.exception.text)
 			__exception_text_end = .;
+			IRQENTRY_TEXT
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT

+ 1 - 3
arch/arm/mach-at91/at91rm9200_time.c

@@ -101,7 +101,6 @@ static struct clocksource clk32k = {
 	.rating		= 150,
 	.read		= read_clk32k,
 	.mask		= CLOCKSOURCE_MASK(20),
-	.shift		= 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -201,8 +200,7 @@ void __init at91rm9200_timer_init(void)
 	clockevents_register_device(&clkevt);
 
 	/* register clocksource */
-	clk32k.mult = clocksource_hz2mult(AT91_SLOW_CLOCK, clk32k.shift);
-	clocksource_register(&clk32k);
+	clocksource_register_hz(&clk32k, AT91_SLOW_CLOCK);
 }
 
 struct sys_timer at91rm9200_timer = {

+ 1 - 3
arch/arm/mach-at91/at91sam926x_time.c

@@ -51,7 +51,6 @@ static struct clocksource pit_clk = {
 	.name		= "pit",
 	.rating		= 175,
 	.read		= read_pit_clk,
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -163,10 +162,9 @@ static void __init at91sam926x_pit_init(void)
 	 * Register clocksource.  The high order bits of PIV are unused,
 	 * so this isn't a 32-bit counter unless we get clockevent irqs.
 	 */
-	pit_clk.mult = clocksource_hz2mult(pit_rate, pit_clk.shift);
 	bits = 12 /* PICNT */ + ilog2(pit_cycle) /* PIV */;
 	pit_clk.mask = CLOCKSOURCE_MASK(bits);
-	clocksource_register(&pit_clk);
+	clocksource_register_hz(&pit_clk, pit_rate);
 
 	/* Set up irq handler */
 	setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq);

+ 4 - 10
arch/arm/mach-bcmring/core.c

@@ -294,7 +294,6 @@ static struct clocksource clocksource_bcmring_timer1 = {
 	.rating = 200,
 	.read = bcmring_get_cycles_timer1,
 	.mask = CLOCKSOURCE_MASK(32),
-	.shift = 20,
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -303,7 +302,6 @@ static struct clocksource clocksource_bcmring_timer3 = {
 	.rating = 100,
 	.read = bcmring_get_cycles_timer3,
 	.mask = CLOCKSOURCE_MASK(32),
-	.shift = 20,
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -316,10 +314,8 @@ static int __init bcmring_clocksource_init(void)
 	writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
 	       TIMER1_VA_BASE + TIMER_CTRL);
 
-	clocksource_bcmring_timer1.mult =
-	    clocksource_khz2mult(TIMER1_FREQUENCY_MHZ * 1000,
-				 clocksource_bcmring_timer1.shift);
-	clocksource_register(&clocksource_bcmring_timer1);
+	clocksource_register_khz(&clocksource_bcmring_timer1,
+				 TIMER1_FREQUENCY_MHZ * 1000);
 
 	/* setup timer3 as free-running clocksource */
 	writel(0, TIMER3_VA_BASE + TIMER_CTRL);
@@ -328,10 +324,8 @@ static int __init bcmring_clocksource_init(void)
 	writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
 	       TIMER3_VA_BASE + TIMER_CTRL);
 
-	clocksource_bcmring_timer3.mult =
-	    clocksource_khz2mult(TIMER3_FREQUENCY_KHZ,
-				 clocksource_bcmring_timer3.shift);
-	clocksource_register(&clocksource_bcmring_timer3);
+	clocksource_register_khz(&clocksource_bcmring_timer3,
+				 TIMER3_FREQUENCY_KHZ);
 
 	return 0;
 }

+ 2 - 5
arch/arm/mach-davinci/time.c

@@ -276,7 +276,6 @@ static struct clocksource clocksource_davinci = {
 	.rating		= 300,
 	.read		= read_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 24,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -378,10 +377,8 @@ static void __init davinci_timer_init(void)
 
 	/* setup clocksource */
 	clocksource_davinci.name = id_to_name[clocksource_id];
-	clocksource_davinci.mult =
-		clocksource_khz2mult(davinci_clock_tick_rate/1000,
-				     clocksource_davinci.shift);
-	if (clocksource_register(&clocksource_davinci))
+	if (clocksource_register_hz(&clocksource_davinci,
+				    davinci_clock_tick_rate))
 		printk(err, clocksource_davinci.name);
 
 	/* setup clockevent */

+ 1 - 3
arch/arm/mach-integrator/integrator_ap.c

@@ -372,7 +372,6 @@ static struct clocksource clocksource_timersp = {
 	.rating		= 200,
 	.read		= timersp_read,
 	.mask		= CLOCKSOURCE_MASK(16),
-	.shift		= 16,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -390,8 +389,7 @@ static void integrator_clocksource_init(u32 khz)
 	writel(ctrl, base + TIMER_CTRL);
 	writel(0xffff, base + TIMER_LOAD);
 
-	cs->mult = clocksource_khz2mult(khz, cs->shift);
-	clocksource_register(cs);
+	clocksource_register_khz(cs, khz);
 }
 
 static void __iomem * const clkevt_base = (void __iomem *)TIMER1_VA_BASE;

+ 20 - 15
arch/arm/mach-ixp4xx/common.c

@@ -35,6 +35,7 @@
 #include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/irq.h>
+#include <asm/sched_clock.h>
 
 #include <asm/mach/map.h>
 #include <asm/mach/irq.h>
@@ -398,6 +399,23 @@ void __init ixp4xx_sys_init(void)
 			ixp4xx_exp_bus_size >> 20);
 }
 
+/*
+ * sched_clock()
+ */
+static DEFINE_CLOCK_DATA(cd);
+
+unsigned long long notrace sched_clock(void)
+{
+	u32 cyc = *IXP4XX_OSTS;
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
+}
+
+static void notrace ixp4xx_update_sched_clock(void)
+{
+	u32 cyc = *IXP4XX_OSTS;
+	update_sched_clock(&cd, cyc, (u32)~0);
+}
+
 /*
  * clocksource
  */
@@ -411,7 +429,6 @@ static struct clocksource clocksource_ixp4xx = {
 	.rating		= 200,
 	.read		= ixp4xx_get_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift 		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -419,21 +436,9 @@ unsigned long ixp4xx_timer_freq = FREQ;
 EXPORT_SYMBOL(ixp4xx_timer_freq);
 static void __init ixp4xx_clocksource_init(void)
 {
-	clocksource_ixp4xx.mult =
-		clocksource_hz2mult(ixp4xx_timer_freq,
-				    clocksource_ixp4xx.shift);
-	clocksource_register(&clocksource_ixp4xx);
-}
-
-/*
- * sched_clock()
- */
-unsigned long long sched_clock(void)
-{
-	cycle_t cyc = ixp4xx_get_cycles(NULL);
-	struct clocksource *cs = &clocksource_ixp4xx;
+	init_sched_clock(&cd, ixp4xx_update_sched_clock, 32, ixp4xx_timer_freq);
 
-	return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
+	clocksource_register_hz(&clocksource_ixp4xx, ixp4xx_timer_freq);
 }
 
 /*

+ 1 - 4
arch/arm/mach-lpc32xx/timer.c

@@ -38,7 +38,6 @@ static cycle_t lpc32xx_clksrc_read(struct clocksource *cs)
 
 static struct clocksource lpc32xx_clksrc = {
 	.name	= "lpc32xx_clksrc",
-	.shift	= 24,
 	.rating	= 300,
 	.read	= lpc32xx_clksrc_read,
 	.mask	= CLOCKSOURCE_MASK(32),
@@ -171,9 +170,7 @@ static void __init lpc32xx_timer_init(void)
 	__raw_writel(0, LCP32XX_TIMER_MCR(LPC32XX_TIMER1_BASE));
 	__raw_writel(LCP32XX_TIMER_CNTR_TCR_EN,
 		LCP32XX_TIMER_TCR(LPC32XX_TIMER1_BASE));
-	lpc32xx_clksrc.mult = clocksource_hz2mult(clkrate,
-		lpc32xx_clksrc.shift);
-	clocksource_register(&lpc32xx_clksrc);
+	clocksource_register_hz(&lpc32xx_clksrc, clkrate);
 }
 
 struct sys_timer lpc32xx_timer = {

+ 13 - 26
arch/arm/mach-mmp/time.c

@@ -26,8 +26,8 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/sched.h>
-#include <linux/cnt32_to_63.h>
 
+#include <asm/sched_clock.h>
 #include <mach/addr-map.h>
 #include <mach/regs-timers.h>
 #include <mach/regs-apbc.h>
@@ -42,23 +42,7 @@
 #define MAX_DELTA		(0xfffffffe)
 #define MIN_DELTA		(16)
 
-#define TCR2NS_SCALE_FACTOR	10
-
-static unsigned long tcr2ns_scale;
-
-static void __init set_tcr2ns_scale(unsigned long tcr_rate)
-{
-	unsigned long long v = 1000000000ULL << TCR2NS_SCALE_FACTOR;
-	do_div(v, tcr_rate);
-	tcr2ns_scale = v;
-	/*
-	 * We want an even value to automatically clear the top bit
-	 * returned by cnt32_to_63() without an additional run time
-	 * instruction. So if the LSB is 1 then round it up.
-	 */
-	if (tcr2ns_scale & 1)
-		tcr2ns_scale++;
-}
+static DEFINE_CLOCK_DATA(cd);
 
 /*
  * FIXME: the timer needs some delay to stablize the counter capture
@@ -75,10 +59,16 @@ static inline uint32_t timer_read(void)
 	return __raw_readl(TIMERS_VIRT_BASE + TMR_CVWR(0));
 }
 
-unsigned long long sched_clock(void)
+unsigned long long notrace sched_clock(void)
 {
-	unsigned long long v = cnt32_to_63(timer_read());
-	return (v * tcr2ns_scale) >> TCR2NS_SCALE_FACTOR;
+	u32 cyc = timer_read();
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
+}
+
+static void notrace mmp_update_sched_clock(void)
+{
+	u32 cyc = timer_read();
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 static irqreturn_t timer_interrupt(int irq, void *dev_id)
@@ -146,7 +136,6 @@ static cycle_t clksrc_read(struct clocksource *cs)
 
 static struct clocksource cksrc = {
 	.name		= "clocksource",
-	.shift		= 20,
 	.rating		= 200,
 	.read		= clksrc_read,
 	.mask		= CLOCKSOURCE_MASK(32),
@@ -186,17 +175,15 @@ void __init timer_init(int irq)
 {
 	timer_config();
 
-	set_tcr2ns_scale(CLOCK_TICK_RATE);
+	init_sched_clock(&cd, mmp_update_sched_clock, 32, CLOCK_TICK_RATE);
 
 	ckevt.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, ckevt.shift);
 	ckevt.max_delta_ns = clockevent_delta2ns(MAX_DELTA, &ckevt);
 	ckevt.min_delta_ns = clockevent_delta2ns(MIN_DELTA, &ckevt);
 	ckevt.cpumask = cpumask_of(0);
 
-	cksrc.mult = clocksource_hz2mult(CLOCK_TICK_RATE, cksrc.shift);
-
 	setup_irq(irq, &timer_irq);
 
-	clocksource_register(&cksrc);
+	clocksource_register_hz(&cksrc, CLOCK_TICK_RATE);
 	clockevents_register_device(&ckevt);
 }

+ 2 - 0
arch/arm/mach-msm/Kconfig

@@ -49,6 +49,8 @@ endchoice
 
 config MSM_SOC_REV_A
 	bool
+config  ARCH_MSM_SCORPIONMP
+	bool
 
 config  ARCH_MSM_ARM11
 	bool

+ 1 - 4
arch/arm/mach-msm/timer.c

@@ -137,7 +137,6 @@ static struct msm_clock msm_clocks[] = {
 			.rating         = 200,
 			.read           = msm_gpt_read,
 			.mask           = CLOCKSOURCE_MASK(32),
-			.shift          = 17,
 			.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 		},
 		.irq = {
@@ -164,7 +163,6 @@ static struct msm_clock msm_clocks[] = {
 			.rating         = 300,
 			.read           = msm_dgt_read,
 			.mask           = CLOCKSOURCE_MASK((32 - MSM_DGT_SHIFT)),
-			.shift          = 24 - MSM_DGT_SHIFT,
 			.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 		},
 		.irq = {
@@ -205,8 +203,7 @@ static void __init msm_timer_init(void)
 		ce->min_delta_ns = clockevent_delta2ns(4, ce);
 		ce->cpumask = cpumask_of(0);
 
-		cs->mult = clocksource_hz2mult(clock->freq, cs->shift);
-		res = clocksource_register(cs);
+		res = clocksource_register_hz(cs, clock->freq);
 		if (res)
 			printk(KERN_ERR "msm_timer_init: clocksource_register "
 			       "failed for %s\n", cs->name);

+ 1 - 4
arch/arm/mach-netx/time.c

@@ -114,7 +114,6 @@ static struct clocksource clocksource_netx = {
 	.rating		= 200,
 	.read		= netx_get_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -151,9 +150,7 @@ static void __init netx_timer_init(void)
 	writel(NETX_GPIO_COUNTER_CTRL_RUN,
 			NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKSOURCE));
 
-	clocksource_netx.mult =
-		clocksource_hz2mult(CLOCK_TICK_RATE, clocksource_netx.shift);
-	clocksource_register(&clocksource_netx);
+	clocksource_register_hz(&clocksource_netx, CLOCK_TICK_RATE);
 
 	netx_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
 			netx_clockevent.shift);

+ 1 - 5
arch/arm/mach-ns9xxx/time-ns9360.c

@@ -35,7 +35,6 @@ static struct clocksource ns9360_clocksource = {
 	.rating	= 300,
 	.read	= ns9360_clocksource_read,
 	.mask	= CLOCKSOURCE_MASK(32),
-	.shift	= 20,
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -148,10 +147,7 @@ static void __init ns9360_timer_init(void)
 
 	__raw_writel(tc, SYS_TC(TIMER_CLOCKSOURCE));
 
-	ns9360_clocksource.mult = clocksource_hz2mult(ns9360_cpuclock(),
-			ns9360_clocksource.shift);
-
-	clocksource_register(&ns9360_clocksource);
+	clocksource_register_hz(&ns9360_clocksource, ns9360_cpuclock());
 
 	latch = SH_DIV(ns9360_cpuclock(), HZ, 0);
 

+ 1 - 5
arch/arm/mach-omap1/time.c

@@ -208,7 +208,6 @@ static struct clocksource clocksource_mpu = {
 	.rating		= 300,
 	.read		= mpu_read,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 24,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -217,13 +216,10 @@ static void __init omap_init_clocksource(unsigned long rate)
 	static char err[] __initdata = KERN_ERR
 			"%s: can't register clocksource!\n";
 
-	clocksource_mpu.mult
-		= clocksource_khz2mult(rate/1000, clocksource_mpu.shift);
-
 	setup_irq(INT_TIMER2, &omap_mpu_timer2_irq);
 	omap_mpu_timer_start(1, ~0, 1);
 
-	if (clocksource_register(&clocksource_mpu))
+	if (clocksource_register_hz(&clocksource_mpu, rate))
 		printk(err, clocksource_mpu.name);
 }
 

+ 1 - 4
arch/arm/mach-omap2/timer-gp.c

@@ -195,7 +195,6 @@ static struct clocksource clocksource_gpt = {
 	.rating		= 300,
 	.read		= clocksource_read_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 24,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -220,9 +219,7 @@ static void __init omap2_gp_clocksource_init(void)
 
 	omap_dm_timer_set_load_start(gpt, 1, 0);
 
-	clocksource_gpt.mult =
-		clocksource_khz2mult(tick_rate/1000, clocksource_gpt.shift);
-	if (clocksource_register(&clocksource_gpt))
+	if (clocksource_register_hz(&clocksource_gpt, tick_rate))
 		printk(err2, clocksource_gpt.name);
 }
 #endif

+ 10 - 25
arch/arm/mach-pxa/time.c

@@ -17,11 +17,11 @@
 #include <linux/interrupt.h>
 #include <linux/clockchips.h>
 #include <linux/sched.h>
-#include <linux/cnt32_to_63.h>
 
 #include <asm/div64.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
+#include <asm/sched_clock.h>
 #include <mach/regs-ost.h>
 
 /*
@@ -32,29 +32,18 @@
  * long as there is always less than 582 seconds between successive
  * calls to sched_clock() which should always be the case in practice.
  */
+static DEFINE_CLOCK_DATA(cd);
 
-#define OSCR2NS_SCALE_FACTOR 10
-
-static unsigned long oscr2ns_scale;
-
-static void __init set_oscr2ns_scale(unsigned long oscr_rate)
+unsigned long long notrace sched_clock(void)
 {
-	unsigned long long v = 1000000000ULL << OSCR2NS_SCALE_FACTOR;
-	do_div(v, oscr_rate);
-	oscr2ns_scale = v;
-	/*
-	 * We want an even value to automatically clear the top bit
-	 * returned by cnt32_to_63() without an additional run time
-	 * instruction. So if the LSB is 1 then round it up.
-	 */
-	if (oscr2ns_scale & 1)
-		oscr2ns_scale++;
+	u32 cyc = OSCR;
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
 }
 
-unsigned long long sched_clock(void)
+static void notrace pxa_update_sched_clock(void)
 {
-	unsigned long long v = cnt32_to_63(OSCR);
-	return (v * oscr2ns_scale) >> OSCR2NS_SCALE_FACTOR;
+	u32 cyc = OSCR;
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 
@@ -127,7 +116,6 @@ static struct clocksource cksrc_pxa_oscr0 = {
 	.rating         = 200,
 	.read           = pxa_read_oscr,
 	.mask           = CLOCKSOURCE_MASK(32),
-	.shift          = 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -145,7 +133,7 @@ static void __init pxa_timer_init(void)
 	OIER = 0;
 	OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3;
 
-	set_oscr2ns_scale(clock_tick_rate);
+	init_sched_clock(&cd, pxa_update_sched_clock, 32, clock_tick_rate);
 
 	ckevt_pxa_osmr0.mult =
 		div_sc(clock_tick_rate, NSEC_PER_SEC, ckevt_pxa_osmr0.shift);
@@ -155,12 +143,9 @@ static void __init pxa_timer_init(void)
 		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
 	ckevt_pxa_osmr0.cpumask = cpumask_of(0);
 
-	cksrc_pxa_oscr0.mult =
-		clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
-
 	setup_irq(IRQ_OST0, &pxa_ost0_irq);
 
-	clocksource_register(&cksrc_pxa_oscr0);
+	clocksource_register_hz(&cksrc_pxa_oscr0, clock_tick_rate);
 	clockevents_register_device(&ckevt_pxa_osmr0);
 }
 

+ 10 - 0
arch/arm/mach-realview/core.c

@@ -52,6 +52,8 @@
 #include <mach/irqs.h>
 #include <asm/hardware/timer-sp.h>
 
+#include <plat/sched_clock.h>
+
 #include "core.h"
 
 #ifdef CONFIG_ZONE_DMA
@@ -654,6 +656,12 @@ void realview_leds_event(led_event_t ledevt)
 }
 #endif	/* CONFIG_LEDS */
 
+/*
+ * The sched_clock counter
+ */
+#define REFCOUNTER		(__io_address(REALVIEW_SYS_BASE) + \
+				 REALVIEW_SYS_24MHz_OFFSET)
+
 /*
  * Where is the timer (VA)?
  */
@@ -669,6 +677,8 @@ void __init realview_timer_init(unsigned int timer_irq)
 {
 	u32 val;
 
+	versatile_sched_clock_init(REFCOUNTER, 24000000);
+
 	/* 
 	 * set clock frequency: 
 	 *	REALVIEW_REFCLK is 32KHz

+ 1 - 5
arch/arm/mach-s5pv310/time.c

@@ -211,7 +211,6 @@ struct clocksource pwm_clocksource = {
 	.rating		= 250,
 	.read		= s5pv310_pwm4_read,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS ,
 };
 
@@ -230,10 +229,7 @@ static void __init s5pv310_clocksource_init(void)
 	s5pv310_pwm_init(4, ~0);
 	s5pv310_pwm_start(4, 1);
 
-	pwm_clocksource.mult =
-		clocksource_khz2mult(clock_rate/1000, pwm_clocksource.shift);
-
-	if (clocksource_register(&pwm_clocksource))
+	if (clocksource_register_hz(&pwm_clocksource, clock_rate))
 		panic("%s: can't register clocksource\n", pwm_clocksource.name);
 }
 

+ 0 - 23
arch/arm/mach-sa1100/generic.c

@@ -16,9 +16,7 @@
 #include <linux/pm.h>
 #include <linux/cpufreq.h>
 #include <linux/ioport.h>
-#include <linux/sched.h>	/* just for sched_clock() - funny that */
 #include <linux/platform_device.h>
-#include <linux/cnt32_to_63.h>
 
 #include <asm/div64.h>
 #include <mach/hardware.h>
@@ -109,27 +107,6 @@ unsigned int sa11x0_getspeed(unsigned int cpu)
 	return cclk_frequency_100khz[PPCR & 0xf] * 100;
 }
 
-/*
- * This is the SA11x0 sched_clock implementation.  This has
- * a resolution of 271ns, and a maximum value of 32025597s (370 days).
- *
- * The return value is guaranteed to be monotonic in that range as
- * long as there is always less than 582 seconds between successive
- * calls to this function.
- *
- *  ( * 1E9 / 3686400 => * 78125 / 288)
- */
-unsigned long long sched_clock(void)
-{
-	unsigned long long v = cnt32_to_63(OSCR);
-
-	/* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */
-	v *= 78125<<1;
-	do_div(v, 288<<1);
-
-	return v;
-}
-
 /*
  * Default power-off for SA1100
  */

+ 31 - 5
arch/arm/mach-sa1100/time.c

@@ -12,12 +12,39 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/sched.h>	/* just for sched_clock() - funny that */
 #include <linux/timex.h>
 #include <linux/clockchips.h>
 
 #include <asm/mach/time.h>
+#include <asm/sched_clock.h>
 #include <mach/hardware.h>
 
+/*
+ * This is the SA11x0 sched_clock implementation.
+ */
+static DEFINE_CLOCK_DATA(cd);
+
+/*
+ * Constants generated by clocks_calc_mult_shift(m, s, 3.6864MHz,
+ * NSEC_PER_SEC, 60).
+ * This gives a resolution of about 271ns and a wrap period of about 19min.
+ */
+#define SC_MULT		2275555556u
+#define SC_SHIFT	23
+
+unsigned long long notrace sched_clock(void)
+{
+	u32 cyc = OSCR;
+	return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
+}
+
+static void notrace sa1100_update_sched_clock(void)
+{
+	u32 cyc = OSCR;
+	update_sched_clock(&cd, cyc, (u32)~0);
+}
+
 #define MIN_OSCR_DELTA 2
 
 static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id)
@@ -81,7 +108,6 @@ static struct clocksource cksrc_sa1100_oscr = {
 	.rating		= 200,
 	.read		= sa1100_read_oscr,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -97,6 +123,9 @@ static void __init sa1100_timer_init(void)
 	OIER = 0;		/* disable any timer interrupts */
 	OSSR = 0xf;		/* clear status on all timers */
 
+	init_fixed_sched_clock(&cd, sa1100_update_sched_clock, 32,
+			       3686400, SC_MULT, SC_SHIFT);
+
 	ckevt_sa1100_osmr0.mult =
 		div_sc(3686400, NSEC_PER_SEC, ckevt_sa1100_osmr0.shift);
 	ckevt_sa1100_osmr0.max_delta_ns =
@@ -105,12 +134,9 @@ static void __init sa1100_timer_init(void)
 		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
 	ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
 
-	cksrc_sa1100_oscr.mult =
-		clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
-
 	setup_irq(IRQ_OST0, &sa1100_timer_irq);
 
-	clocksource_register(&cksrc_sa1100_oscr);
+	clocksource_register_hz(&cksrc_sa1100_oscr, CLOCK_TICK_RATE);
 	clockevents_register_device(&ckevt_sa1100_osmr0);
 }
 

+ 1 - 4
arch/arm/mach-tcc8k/time.c

@@ -35,7 +35,6 @@ static struct clocksource clocksource_tcc = {
 	.rating		= 200,
 	.read		= tcc_get_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 28,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -103,9 +102,7 @@ static int __init tcc_clockevent_init(struct clk *clock)
 {
 	unsigned int c = clk_get_rate(clock);
 
-	clocksource_tcc.mult = clocksource_hz2mult(c,
-					clocksource_tcc.shift);
-	clocksource_register(&clocksource_tcc);
+	clocksource_register_hz(&clocksource_tcc, c);
 
 	clockevent_tcc.mult = div_sc(c, NSEC_PER_SEC,
 					clockevent_tcc.shift);

+ 25 - 6
arch/arm/mach-tegra/timer.c

@@ -18,6 +18,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/time.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
@@ -25,10 +26,10 @@
 #include <linux/clocksource.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-#include <linux/cnt32_to_63.h>
 
 #include <asm/mach/time.h>
 #include <asm/localtimer.h>
+#include <asm/sched_clock.h>
 
 #include <mach/iomap.h>
 #include <mach/irqs.h>
@@ -91,7 +92,7 @@ static void tegra_timer_set_mode(enum clock_event_mode mode,
 
 static cycle_t tegra_clocksource_read(struct clocksource *cs)
 {
-	return cnt32_to_63(timer_readl(TIMERUS_CNTR_1US));
+	return timer_readl(TIMERUS_CNTR_1US);
 }
 
 static struct clock_event_device tegra_clockevent = {
@@ -106,14 +107,29 @@ static struct clocksource tegra_clocksource = {
 	.name	= "timer_us",
 	.rating	= 300,
 	.read	= tegra_clocksource_read,
-	.mask	= 0x7FFFFFFFFFFFFFFFULL,
+	.mask	= CLOCKSOURCE_MASK(32),
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-unsigned long long sched_clock(void)
+static DEFINE_CLOCK_DATA(cd);
+
+/*
+ * Constants generated by clocks_calc_mult_shift(m, s, 1MHz, NSEC_PER_SEC, 60).
+ * This gives a resolution of about 1us and a wrap period of about 1h11min.
+ */
+#define SC_MULT		4194304000u
+#define SC_SHIFT	22
+
+unsigned long long notrace sched_clock(void)
 {
-	return clocksource_cyc2ns(tegra_clocksource.read(&tegra_clocksource),
-		tegra_clocksource.mult, tegra_clocksource.shift);
+	u32 cyc = timer_readl(TIMERUS_CNTR_1US);
+	return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
+}
+
+static void notrace tegra_update_sched_clock(void)
+{
+	u32 cyc = timer_readl(TIMERUS_CNTR_1US);
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 static irqreturn_t tegra_timer_interrupt(int irq, void *dev_id)
@@ -158,6 +174,9 @@ static void __init tegra_init_timer(void)
 		WARN(1, "Unknown clock rate");
 	}
 
+	init_fixed_sched_clock(&cd, tegra_update_sched_clock, 32,
+			       1000000, SC_MULT, SC_SHIFT);
+
 	if (clocksource_register_hz(&tegra_clocksource, 1000000)) {
 		printk(KERN_ERR "Failed to register clocksource\n");
 		BUG();

+ 15 - 7
arch/arm/mach-u300/timer.c

@@ -9,6 +9,7 @@
  * Author: Linus Walleij <linus.walleij@stericsson.com>
  */
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <linux/time.h>
 #include <linux/timex.h>
 #include <linux/clockchips.h>
@@ -21,6 +22,7 @@
 #include <mach/hardware.h>
 
 /* Generic stuff */
+#include <asm/sched_clock.h>
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 #include <asm/mach/irq.h>
@@ -352,12 +354,18 @@ static struct clocksource clocksource_u300_1mhz = {
  * this wraps around for now, since it is just a relative time
  * stamp. (Inspired by OMAP implementation.)
  */
+static DEFINE_CLOCK_DATA(cd);
+
 unsigned long long notrace sched_clock(void)
 {
-	return clocksource_cyc2ns(clocksource_u300_1mhz.read(
-				  &clocksource_u300_1mhz),
-				  clocksource_u300_1mhz.mult,
-				  clocksource_u300_1mhz.shift);
+	u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC);
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
+}
+
+static void notrace u300_update_sched_clock(void)
+{
+	u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC);
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 
@@ -375,6 +383,8 @@ static void __init u300_timer_init(void)
 	clk_enable(clk);
 	rate = clk_get_rate(clk);
 
+	init_sched_clock(&cd, u300_update_sched_clock, 32, rate);
+
 	/*
 	 * Disable the "OS" and "DD" timers - these are designed for Symbian!
 	 * Example usage in cnh1601578 cpu subsystem pd_timer_app.c
@@ -412,9 +422,7 @@ static void __init u300_timer_init(void)
 	writel(U300_TIMER_APP_EGPT2_TIMER_ENABLE,
 		U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT2);
 
-	clocksource_calc_mult_shift(&clocksource_u300_1mhz,
-				    rate, APPTIMER_MIN_RANGE);
-	if (clocksource_register(&clocksource_u300_1mhz))
+	if (clocksource_register_hz(&clocksource_u300_1mhz, rate))
 		printk(KERN_ERR "timer: failed to initialize clock "
 		       "source %s\n", clocksource_u300_1mhz.name);
 

+ 10 - 0
arch/arm/mach-versatile/core.c

@@ -51,6 +51,8 @@
 #include <mach/platform.h>
 #include <asm/hardware/timer-sp.h>
 
+#include <plat/sched_clock.h>
+
 #include "core.h"
 
 /*
@@ -885,6 +887,12 @@ void __init versatile_init(void)
 #endif
 }
 
+/*
+ * The sched_clock counter
+ */
+#define REFCOUNTER		(__io_address(VERSATILE_SYS_BASE) + \
+				 VERSATILE_SYS_24MHz_OFFSET)
+
 /*
  * Where is the timer (VA)?
  */
@@ -900,6 +908,8 @@ static void __init versatile_timer_init(void)
 {
 	u32 val;
 
+	versatile_sched_clock_init(REFCOUNTER, 24000000);
+
 	/* 
 	 * set clock frequency: 
 	 *	VERSATILE_REFCLK is 32KHz

+ 4 - 1
arch/arm/mach-vexpress/v2m.c

@@ -18,11 +18,12 @@
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 #include <asm/hardware/arm_timer.h>
+#include <asm/hardware/timer-sp.h>
 
 #include <mach/clkdev.h>
 #include <mach/motherboard.h>
 
-#include <asm/hardware/timer-sp.h>
+#include <plat/sched_clock.h>
 
 #include "core.h"
 
@@ -50,6 +51,8 @@ void __init v2m_map_io(struct map_desc *tile, size_t num)
 
 static void __init v2m_timer_init(void)
 {
+	versatile_sched_clock_init(MMIO_P2V(V2M_SYS_24MHZ), 24000000);
+
 	writel(0, MMIO_P2V(V2M_TIMER0) + TIMER_CTRL);
 	writel(0, MMIO_P2V(V2M_TIMER1) + TIMER_CTRL);
 

+ 1 - 4
arch/arm/mach-w90x900/time.c

@@ -153,7 +153,6 @@ static struct clocksource clocksource_nuc900 = {
 	.rating	= 200,
 	.read	= nuc900_get_cycles,
 	.mask	= CLOCKSOURCE_MASK(TDR_SHIFT),
-	.shift	= 10,
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -176,9 +175,7 @@ static void __init nuc900_clocksource_init(void)
 	val |= (COUNTEN | PERIOD | PRESCALE);
 	__raw_writel(val, REG_TCSR1);
 
-	clocksource_nuc900.mult =
-		clocksource_khz2mult((rate / 1000), clocksource_nuc900.shift);
-	clocksource_register(&clocksource_nuc900);
+	clocksource_register_hz(&clocksource_nuc900, rate);
 }
 
 static void __init nuc900_timer_init(void)

+ 17 - 10
arch/arm/plat-iop/time.c

@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/time.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/timex.h>
 #include <linux/sched.h>
 #include <linux/io.h>
@@ -24,6 +25,7 @@
 #include <linux/clockchips.h>
 #include <mach/hardware.h>
 #include <asm/irq.h>
+#include <asm/sched_clock.h>
 #include <asm/uaccess.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
@@ -50,15 +52,21 @@ static struct clocksource iop_clocksource = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+static DEFINE_CLOCK_DATA(cd);
+
 /*
  * IOP sched_clock() implementation via its clocksource.
  */
-unsigned long long sched_clock(void)
+unsigned long long notrace sched_clock(void)
 {
-	cycle_t cyc = iop_clocksource_read(NULL);
-	struct clocksource *cs = &iop_clocksource;
+	u32 cyc = 0xffffffffu - read_tcr1();
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
+}
 
-	return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
+static void notrace iop_update_sched_clock(void)
+{
+	u32 cyc = 0xffffffffu - read_tcr1();
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 /*
@@ -88,6 +96,7 @@ static void iop_set_mode(enum clock_event_mode mode,
 	case CLOCK_EVT_MODE_PERIODIC:
 		write_tmr0(tmr & ~IOP_TMR_EN);
 		write_tcr0(ticks_per_jiffy - 1);
+		write_trr0(ticks_per_jiffy - 1);
 		tmr |= (IOP_TMR_RELOAD | IOP_TMR_EN);
 		break;
 	case CLOCK_EVT_MODE_ONESHOT:
@@ -143,6 +152,8 @@ void __init iop_init_time(unsigned long tick_rate)
 {
 	u32 timer_ctl;
 
+	init_sched_clock(&cd, iop_update_sched_clock, 32, tick_rate);
+
 	ticks_per_jiffy = DIV_ROUND_CLOSEST(tick_rate, HZ);
 	iop_tick_rate = tick_rate;
 
@@ -153,6 +164,7 @@ void __init iop_init_time(unsigned long tick_rate)
 	 * Set up interrupting clockevent timer 0.
 	 */
 	write_tmr0(timer_ctl & ~IOP_TMR_EN);
+	write_tisr(1);
 	setup_irq(IRQ_IOP_TIMER0, &iop_timer_irq);
 	clockevents_calc_mult_shift(&iop_clockevent,
 				    tick_rate, IOP_MIN_RANGE);
@@ -162,9 +174,6 @@ void __init iop_init_time(unsigned long tick_rate)
 		clockevent_delta2ns(0xf, &iop_clockevent);
 	iop_clockevent.cpumask = cpumask_of(0);
 	clockevents_register_device(&iop_clockevent);
-	write_trr0(ticks_per_jiffy - 1);
-	write_tcr0(ticks_per_jiffy - 1);
-	write_tmr0(timer_ctl);
 
 	/*
 	 * Set up free-running clocksource timer 1.
@@ -172,7 +181,5 @@ void __init iop_init_time(unsigned long tick_rate)
 	write_trr1(0xffffffff);
 	write_tcr1(0xffffffff);
 	write_tmr1(timer_ctl);
-	clocksource_calc_mult_shift(&iop_clocksource, tick_rate,
-				    IOP_MIN_RANGE);
-	clocksource_register(&iop_clocksource);
+	clocksource_register_hz(&iop_clocksource, tick_rate);
 }

+ 1 - 4
arch/arm/plat-mxc/epit.c

@@ -93,7 +93,6 @@ static struct clocksource clocksource_epit = {
 	.rating		= 200,
 	.read		= epit_read,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -101,9 +100,7 @@ static int __init epit_clocksource_init(struct clk *timer_clk)
 {
 	unsigned int c = clk_get_rate(timer_clk);
 
-	clocksource_epit.mult = clocksource_hz2mult(c,
-					clocksource_epit.shift);
-	clocksource_register(&clocksource_epit);
+	clocksource_register_hz(&clocksource_epit, c);
 
 	return 0;
 }

+ 1 - 4
arch/arm/plat-mxc/time.c

@@ -120,7 +120,6 @@ static struct clocksource clocksource_mxc = {
 	.rating		= 200,
 	.read		= mx1_2_get_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift 		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -131,9 +130,7 @@ static int __init mxc_clocksource_init(struct clk *timer_clk)
 	if (timer_is_v2())
 		clocksource_mxc.read = v2_get_cycles;
 
-	clocksource_mxc.mult = clocksource_hz2mult(c,
-					clocksource_mxc.shift);
-	clocksource_register(&clocksource_mxc);
+	clocksource_register_hz(&clocksource_mxc, c);
 
 	return 0;
 }

+ 1 - 0
arch/arm/plat-nomadik/Kconfig

@@ -14,6 +14,7 @@ if PLAT_NOMADIK
 
 config HAS_MTU
 	bool
+	select HAVE_SCHED_CLOCK
 	help
 	  Support for Multi Timer Unit. MTU provides access
 	  to multiple interrupt generating programmable

+ 11 - 69
arch/arm/plat-nomadik/timer.c

@@ -17,9 +17,9 @@
 #include <linux/clk.h>
 #include <linux/jiffies.h>
 #include <linux/err.h>
-#include <linux/cnt32_to_63.h>
-#include <linux/timer.h>
+#include <linux/sched.h>
 #include <asm/mach/time.h>
+#include <asm/sched_clock.h>
 
 #include <plat/mtu.h>
 
@@ -52,81 +52,24 @@ static struct clocksource nmdk_clksrc = {
  * Override the global weak sched_clock symbol with this
  * local implementation which uses the clocksource to get some
  * better resolution when scheduling the kernel.
- *
- * Because the hardware timer period may be quite short
- * (32.3 secs on the 133 MHz MTU timer selection on ux500)
- * and because cnt32_to_63() needs to be called at least once per
- * half period to work properly, a kernel keepwarm() timer is set up
- * to ensure this requirement is always met.
- *
- * Also the sched_clock timer will wrap around at some point,
- * here we set it to run continously for a year.
  */
-#define SCHED_CLOCK_MIN_WRAP 3600*24*365
-static struct timer_list cnt32_to_63_keepwarm_timer;
-static u32 sched_mult;
-static u32 sched_shift;
+static DEFINE_CLOCK_DATA(cd);
 
 unsigned long long notrace sched_clock(void)
 {
-	u64 cycles;
+	u32 cyc;
 
 	if (unlikely(!mtu_base))
 		return 0;
 
-	cycles = cnt32_to_63(-readl(mtu_base + MTU_VAL(0)));
-	/*
-	 * sched_mult is guaranteed to be even so will
-	 * shift out bit 63
-	 */
-	return (cycles * sched_mult) >> sched_shift;
+	cyc = -readl(mtu_base + MTU_VAL(0));
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
 }
 
-/* Just kick sched_clock every so often */
-static void cnt32_to_63_keepwarm(unsigned long data)
+static void notrace nomadik_update_sched_clock(void)
 {
-	mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
-	(void) sched_clock();
-}
-
-/*
- * Set up a timer to keep sched_clock():s 32_to_63 algorithm warm
- * once in half a 32bit timer wrap interval.
- */
-static void __init nmdk_sched_clock_init(unsigned long rate)
-{
-	u32 v;
-	unsigned long delta;
-	u64 days;
-
-	/* Find the apropriate mult and shift factors */
-	clocks_calc_mult_shift(&sched_mult, &sched_shift,
-			       rate, NSEC_PER_SEC, SCHED_CLOCK_MIN_WRAP);
-	/* We need to multiply by an even number to get rid of bit 63 */
-	if (sched_mult & 1)
-		sched_mult++;
-
-	/* Let's see what we get, take max counter and scale it */
-	days = (0xFFFFFFFFFFFFFFFFLLU * sched_mult) >> sched_shift;
-	do_div(days, NSEC_PER_SEC);
-	do_div(days, (3600*24));
-
-	pr_info("sched_clock: using %d bits @ %lu Hz wrap in %lu days\n",
-		(64 - sched_shift), rate, (unsigned long) days);
-
-	/*
-	 * Program a timer to kick us at half 32bit wraparound
-	 * Formula: seconds per wrap = (2^32) / f
-	 */
-	v = 0xFFFFFFFFUL / rate;
-	/* We want half of the wrap time to keep cnt32_to_63 warm */
-	v /= 2;
-	pr_debug("sched_clock: prescaled timer rate: %lu Hz, "
-		 "initialize keepwarm timer every %d seconds\n", rate, v);
-	/* Convert seconds to jiffies */
-	delta = msecs_to_jiffies(v*1000);
-	setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, delta);
-	mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + delta));
+	u32 cyc = -readl(mtu_base + MTU_VAL(0));
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 /* Clockevent device: use one-shot mode */
@@ -222,7 +165,6 @@ void __init nmdk_timer_init(void)
 	} else {
 		cr |= MTU_CRn_PRESCALE_1;
 	}
-	clocksource_calc_mult_shift(&nmdk_clksrc, rate, MTU_MIN_RANGE);
 
 	/* Timer 0 is the free running clocksource */
 	writel(cr, mtu_base + MTU_CR(0));
@@ -233,11 +175,11 @@ void __init nmdk_timer_init(void)
 	/* Now the clock source is ready */
 	nmdk_clksrc.read = nmdk_read_timer;
 
-	if (clocksource_register(&nmdk_clksrc))
+	if (clocksource_register_hz(&nmdk_clksrc, rate))
 		pr_err("timer: failed to initialize clock source %s\n",
 		       nmdk_clksrc.name);
 
-	nmdk_sched_clock_init(rate);
+	init_sched_clock(&cd, nomadik_update_sched_clock, 32, rate);
 
 	/* Timer 1 is used for events */
 

+ 32 - 15
arch/arm/plat-omap/counter_32k.c

@@ -15,8 +15,11 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/clk.h>
-#include <linux/io.h>
 #include <linux/err.h>
+#include <linux/io.h>
+#include <linux/sched.h>
+
+#include <asm/sched_clock.h>
 
 #include <plat/common.h>
 #include <plat/board.h>
@@ -45,7 +48,7 @@
 static u32 offset_32k __read_mostly;
 
 #ifdef CONFIG_ARCH_OMAP16XX
-static cycle_t omap16xx_32k_read(struct clocksource *cs)
+static cycle_t notrace omap16xx_32k_read(struct clocksource *cs)
 {
 	return omap_readl(OMAP16XX_TIMER_32K_SYNCHRONIZED) - offset_32k;
 }
@@ -54,7 +57,7 @@ static cycle_t omap16xx_32k_read(struct clocksource *cs)
 #endif
 
 #ifdef CONFIG_ARCH_OMAP2420
-static cycle_t omap2420_32k_read(struct clocksource *cs)
+static cycle_t notrace omap2420_32k_read(struct clocksource *cs)
 {
 	return omap_readl(OMAP2420_32KSYNCT_BASE + 0x10) - offset_32k;
 }
@@ -63,7 +66,7 @@ static cycle_t omap2420_32k_read(struct clocksource *cs)
 #endif
 
 #ifdef CONFIG_ARCH_OMAP2430
-static cycle_t omap2430_32k_read(struct clocksource *cs)
+static cycle_t notrace omap2430_32k_read(struct clocksource *cs)
 {
 	return omap_readl(OMAP2430_32KSYNCT_BASE + 0x10) - offset_32k;
 }
@@ -72,7 +75,7 @@ static cycle_t omap2430_32k_read(struct clocksource *cs)
 #endif
 
 #ifdef CONFIG_ARCH_OMAP3
-static cycle_t omap34xx_32k_read(struct clocksource *cs)
+static cycle_t notrace omap34xx_32k_read(struct clocksource *cs)
 {
 	return omap_readl(OMAP3430_32KSYNCT_BASE + 0x10) - offset_32k;
 }
@@ -81,7 +84,7 @@ static cycle_t omap34xx_32k_read(struct clocksource *cs)
 #endif
 
 #ifdef CONFIG_ARCH_OMAP4
-static cycle_t omap44xx_32k_read(struct clocksource *cs)
+static cycle_t notrace omap44xx_32k_read(struct clocksource *cs)
 {
 	return omap_readl(OMAP4430_32KSYNCT_BASE + 0x10) - offset_32k;
 }
@@ -93,7 +96,7 @@ static cycle_t omap44xx_32k_read(struct clocksource *cs)
  * Kernel assumes that sched_clock can be called early but may not have
  * things ready yet.
  */
-static cycle_t omap_32k_read_dummy(struct clocksource *cs)
+static cycle_t notrace omap_32k_read_dummy(struct clocksource *cs)
 {
 	return 0;
 }
@@ -103,7 +106,6 @@ static struct clocksource clocksource_32k = {
 	.rating		= 250,
 	.read		= omap_32k_read_dummy,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -111,10 +113,25 @@ static struct clocksource clocksource_32k = {
  * Returns current time from boot in nsecs. It's OK for this to wrap
  * around for now, as it's just a relative time stamp.
  */
-unsigned long long sched_clock(void)
+static DEFINE_CLOCK_DATA(cd);
+
+/*
+ * Constants generated by clocks_calc_mult_shift(m, s, 32768, NSEC_PER_SEC, 60).
+ * This gives a resolution of about 30us and a wrap period of about 36hrs.
+ */
+#define SC_MULT		4000000000u
+#define SC_SHIFT	17
+
+unsigned long long notrace sched_clock(void)
+{
+	u32 cyc = clocksource_32k.read(&clocksource_32k);
+	return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
+}
+
+static void notrace omap_update_sched_clock(void)
 {
-	return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k),
-				  clocksource_32k.mult, clocksource_32k.shift);
+	u32 cyc = clocksource_32k.read(&clocksource_32k);
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 /**
@@ -168,13 +185,13 @@ static int __init omap_init_clocksource_32k(void)
 		if (!IS_ERR(sync_32k_ick))
 			clk_enable(sync_32k_ick);
 
-		clocksource_32k.mult = clocksource_hz2mult(32768,
-					    clocksource_32k.shift);
-
 		offset_32k = clocksource_32k.read(&clocksource_32k);
 
-		if (clocksource_register(&clocksource_32k))
+		if (clocksource_register_hz(&clocksource_32k, 32768))
 			printk(err, clocksource_32k.name);
+
+		init_fixed_sched_clock(&cd, omap_update_sched_clock, 32,
+				       32768, SC_MULT, SC_SHIFT);
 	}
 	return 0;
 }

+ 11 - 39
arch/arm/plat-orion/time.c

@@ -13,11 +13,11 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/cnt32_to_63.h>
 #include <linux/timer.h>
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <asm/sched_clock.h>
 #include <asm/mach/time.h>
 #include <mach/bridge-regs.h>
 #include <mach/hardware.h>
@@ -44,52 +44,26 @@ static u32 ticks_per_jiffy;
 
 /*
  * Orion's sched_clock implementation. It has a resolution of
- * at least 7.5ns (133MHz TCLK) and a maximum value of 834 days.
- *
- * Because the hardware timer period is quite short (21 secs if
- * 200MHz TCLK) and because cnt32_to_63() needs to be called at
- * least once per half period to work properly, a kernel timer is
- * set up to ensure this requirement is always met.
+ * at least 7.5ns (133MHz TCLK).
  */
-#define TCLK2NS_SCALE_FACTOR 8
-
-static unsigned long tclk2ns_scale;
+static DEFINE_CLOCK_DATA(cd);
 
-unsigned long long sched_clock(void)
+unsigned long long notrace sched_clock(void)
 {
-	unsigned long long v = cnt32_to_63(0xffffffff - readl(TIMER0_VAL));
-	return (v * tclk2ns_scale) >> TCLK2NS_SCALE_FACTOR;
+	u32 cyc = 0xffffffff - readl(TIMER0_VAL);
+	return cyc_to_sched_clock(&cd, cyc, (u32)~0);
 }
 
-static struct timer_list cnt32_to_63_keepwarm_timer;
 
-static void cnt32_to_63_keepwarm(unsigned long data)
+static void notrace orion_update_sched_clock(void)
 {
-	mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
-	(void) sched_clock();
+	u32 cyc = 0xffffffff - readl(TIMER0_VAL);
+	update_sched_clock(&cd, cyc, (u32)~0);
 }
 
 static void __init setup_sched_clock(unsigned long tclk)
 {
-	unsigned long long v;
-	unsigned long data;
-
-	v = NSEC_PER_SEC;
-	v <<= TCLK2NS_SCALE_FACTOR;
-	v += tclk/2;
-	do_div(v, tclk);
-	/*
-	 * We want an even value to automatically clear the top bit
-	 * returned by cnt32_to_63() without an additional run time
-	 * instruction. So if the LSB is 1 then round it up.
-	 */
-	if (v & 1)
-		v++;
-	tclk2ns_scale = v;
-
-	data = (0xffffffffUL / tclk / 2 - 2) * HZ;
-	setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, data);
-	mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
+	init_sched_clock(&cd, orion_update_sched_clock, 32, tclk);
 }
 
 /*
@@ -102,7 +76,6 @@ static cycle_t orion_clksrc_read(struct clocksource *cs)
 
 static struct clocksource orion_clksrc = {
 	.name		= "orion_clocksource",
-	.shift		= 20,
 	.rating		= 300,
 	.read		= orion_clksrc_read,
 	.mask		= CLOCKSOURCE_MASK(32),
@@ -245,8 +218,7 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
 	writel(u & ~BRIDGE_INT_TIMER0, BRIDGE_MASK);
 	u = readl(TIMER_CTRL);
 	writel(u | TIMER0_EN | TIMER0_RELOAD_EN, TIMER_CTRL);
-	orion_clksrc.mult = clocksource_hz2mult(tclk, orion_clksrc.shift);
-	clocksource_register(&orion_clksrc);
+	clocksource_register_hz(&orion_clksrc, tclk);
 
 	/*
 	 * Setup clockevent timer (interrupt-driven.)

+ 1 - 5
arch/arm/plat-spear/time.c

@@ -81,8 +81,6 @@ static struct clocksource clksrc = {
 	.rating = 200,		/* its a pretty decent clock */
 	.read = clocksource_read_cycles,
 	.mask = 0xFFFF,		/* 16 bits */
-	.mult = 0,		/* to be computed */
-	.shift = 0,		/* to be computed */
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -105,10 +103,8 @@ static void spear_clocksource_init(void)
 	val |= CTRL_ENABLE ;
 	writew(val, gpt_base + CR(CLKSRC));
 
-	clocksource_calc_mult_shift(&clksrc, tick_rate, SPEAR_MIN_RANGE);
-
 	/* register the clocksource */
-	clocksource_register(&clksrc);
+	clocksource_register_hz(&clksrc, tick_rate);
 }
 
 static struct clock_event_device clkevt = {

+ 1 - 4
arch/arm/plat-stmp3xxx/timer.c

@@ -89,7 +89,6 @@ static struct clocksource cksrc_stmp3xxx = {
 	.rating         = 250,
 	.read           = stmp3xxx_clock_read,
 	.mask           = CLOCKSOURCE_MASK(16),
-	.shift          = 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -106,8 +105,6 @@ static struct irqaction stmp3xxx_timer_irq = {
  */
 static void __init stmp3xxx_init_timer(void)
 {
-	cksrc_stmp3xxx.mult = clocksource_hz2mult(CLOCK_TICK_RATE,
-				cksrc_stmp3xxx.shift);
 	ckevt_timrot.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
 				ckevt_timrot.shift);
 	ckevt_timrot.min_delta_ns = clockevent_delta2ns(2, &ckevt_timrot);
@@ -140,7 +137,7 @@ static void __init stmp3xxx_init_timer(void)
 
 	setup_irq(IRQ_TIMER0, &stmp3xxx_timer_irq);
 
-	clocksource_register(&cksrc_stmp3xxx);
+	clocksource_register_hz(&cksrc_stmp3xxx, CLOCK_TICK_RATE);
 	clockevents_register_device(&ckevt_timrot);
 }
 

+ 3 - 2
arch/arm/plat-versatile/Makefile

@@ -1,6 +1,7 @@
 obj-y	:= clock.o
-obj-$(CONFIG_ARCH_REALVIEW) += sched-clock.o
-obj-$(CONFIG_ARCH_VERSATILE) += sched-clock.o
+ifneq ($(CONFIG_ARCH_INTEGRATOR),y)
+obj-y	+= sched-clock.o
+endif
 ifeq ($(CONFIG_LEDS_CLASS),y)
 obj-$(CONFIG_ARCH_REALVIEW) += leds.o
 obj-$(CONFIG_ARCH_VERSATILE) += leds.o

+ 6 - 0
arch/arm/plat-versatile/include/plat/sched_clock.h

@@ -0,0 +1,6 @@
+#ifndef ARM_PLAT_SCHED_CLOCK_H
+#define ARM_PLAT_SCHED_CLOCK_H
+
+void versatile_sched_clock_init(void __iomem *, unsigned long);
+
+#endif

+ 28 - 23
arch/arm/plat-versatile/sched-clock.c

@@ -18,36 +18,41 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-#include <linux/cnt32_to_63.h>
 #include <linux/io.h>
-#include <asm/div64.h>
+#include <linux/sched.h>
 
-#include <mach/hardware.h>
-#include <mach/platform.h>
+#include <asm/sched_clock.h>
+#include <plat/sched_clock.h>
 
-#ifdef VERSATILE_SYS_BASE
-#define REFCOUNTER	(__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_24MHz_OFFSET)
-#endif
-
-#ifdef REALVIEW_SYS_BASE
-#define REFCOUNTER	(__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET)
-#endif
+static DEFINE_CLOCK_DATA(cd);
+static void __iomem *ctr;
 
 /*
- * This is the Realview and Versatile sched_clock implementation.  This
- * has a resolution of 41.7ns, and a maximum value of about 35583 days.
- *
- * The return value is guaranteed to be monotonic in that range as
- * long as there is always less than 89 seconds between successive
- * calls to this function.
+ * Constants generated by clocks_calc_mult_shift(m, s, 24MHz, NSEC_PER_SEC, 60).
+ * This gives a resolution of about 41ns and a wrap period of about 178s.
  */
-unsigned long long sched_clock(void)
+#define SC_MULT		2796202667u
+#define SC_SHIFT	26
+
+unsigned long long notrace sched_clock(void)
 {
-	unsigned long long v = cnt32_to_63(readl(REFCOUNTER));
+	if (ctr) {
+		u32 cyc = readl(ctr);
+		return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0,
+						SC_MULT, SC_SHIFT);
+	} else
+		return 0;
+}
 
-	/* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */
-	v *= 125<<1;
-	do_div(v, 3<<1);
+static void notrace versatile_update_sched_clock(void)
+{
+	u32 cyc = readl(ctr);
+	update_sched_clock(&cd, cyc, (u32)~0);
+}
 
-	return v;
+void __init versatile_sched_clock_init(void __iomem *reg, unsigned long rate)
+{
+	ctr = reg;
+	init_fixed_sched_clock(&cd, versatile_update_sched_clock,
+			       32, rate, SC_MULT, SC_SHIFT);
 }

+ 1 - 0
kernel/time/clocksource.c

@@ -152,6 +152,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)
 	 */
 	for (sft = 32; sft > 0; sft--) {
 		tmp = (u64) to << sft;
+		tmp += from / 2;
 		do_div(tmp, from);
 		if ((tmp >> sftacc) == 0)
 			break;

Kaikkia tiedostoja ei voida näyttää, sillä liian monta tiedostoa muuttui tässä diffissä