浏览代码

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (163 commits)
  tracing: Fix compile issue for trace_sched_wakeup.c
  [S390] hardirq: remove pointless header file includes
  [IA64] Move local_softirq_pending() definition
  perf, powerpc: Fix power_pmu_event_init to not use event->ctx
  ftrace: Remove recursion between recordmcount and scripts/mod/empty
  jump_label: Add COND_STMT(), reducer wrappery
  perf: Optimize sw events
  perf: Use jump_labels to optimize the scheduler hooks
  jump_label: Add atomic_t interface
  jump_label: Use more consistent naming
  perf, hw_breakpoint: Fix crash in hw_breakpoint creation
  perf: Find task before event alloc
  perf: Fix task refcount bugs
  perf: Fix group moving
  irq_work: Add generic hardirq context callbacks
  perf_events: Fix transaction recovery in group_sched_in()
  perf_events: Fix bogus AMD64 generic TLB events
  perf_events: Fix bogus context time tracking
  tracing: Remove parent recording in latency tracer graph options
  tracing: Use one prologue for the preempt irqs off tracer function tracers
  ...
Linus Torvalds 14 年之前
父节点
当前提交
5d70f79b5e
共有 100 个文件被更改,包括 2781 次插入1601 次删除
  1. 5 3
      Documentation/kprobes.txt
  2. 11 0
      Makefile
  3. 3 0
      arch/Kconfig
  4. 1 0
      arch/alpha/Kconfig
  5. 0 5
      arch/alpha/include/asm/perf_event.h
  6. 86 42
      arch/alpha/kernel/perf_event.c
  7. 15 15
      arch/alpha/kernel/time.c
  8. 1 0
      arch/arm/Kconfig
  9. 0 12
      arch/arm/include/asm/perf_event.h
  10. 102 110
      arch/arm/kernel/perf_event.c
  11. 4 0
      arch/arm/oprofile/Makefile
  12. 6 305
      arch/arm/oprofile/common.c
  13. 1 0
      arch/frv/Kconfig
  14. 1 1
      arch/frv/lib/Makefile
  15. 0 19
      arch/frv/lib/perf_event.c
  16. 5 6
      arch/ia64/include/asm/hardirq.h
  17. 1 0
      arch/parisc/Kconfig
  18. 1 2
      arch/parisc/include/asm/perf_event.h
  19. 1 0
      arch/powerpc/Kconfig
  20. 1 1
      arch/powerpc/include/asm/paca.h
  21. 25 61
      arch/powerpc/kernel/perf_callchain.c
  22. 102 64
      arch/powerpc/kernel/perf_event.c
  23. 90 58
      arch/powerpc/kernel/perf_event_fsl_emb.c
  24. 21 21
      arch/powerpc/kernel/time.c
  25. 1 0
      arch/s390/Kconfig
  26. 0 4
      arch/s390/include/asm/hardirq.h
  27. 1 2
      arch/s390/include/asm/perf_event.h
  28. 14 0
      arch/sh/Kconfig
  29. 0 7
      arch/sh/include/asm/perf_event.h
  30. 4 46
      arch/sh/kernel/perf_callchain.c
  31. 112 47
      arch/sh/kernel/perf_event.c
  32. 4 0
      arch/sh/oprofile/Makefile
  33. 23 92
      arch/sh/oprofile/common.c
  34. 0 33
      arch/sh/oprofile/op_impl.h
  35. 3 0
      arch/sparc/Kconfig
  36. 32 0
      arch/sparc/include/asm/jump_label.h
  37. 0 4
      arch/sparc/include/asm/perf_event.h
  38. 2 0
      arch/sparc/kernel/Makefile
  39. 47 0
      arch/sparc/kernel/jump_label.c
  40. 6 0
      arch/sparc/kernel/module.c
  41. 4 4
      arch/sparc/kernel/pcr.c
  42. 123 117
      arch/sparc/kernel/perf_event.c
  43. 8 0
      arch/x86/Kconfig
  44. 11 0
      arch/x86/include/asm/alternative.h
  45. 2 2
      arch/x86/include/asm/entry_arch.h
  46. 1 1
      arch/x86/include/asm/hardirq.h
  47. 1 1
      arch/x86/include/asm/hw_irq.h
  48. 2 2
      arch/x86/include/asm/irq_vectors.h
  49. 37 0
      arch/x86/include/asm/jump_label.h
  50. 22 30
      arch/x86/include/asm/perf_event_p4.h
  51. 2 1
      arch/x86/kernel/Makefile
  52. 68 3
      arch/x86/kernel/alternative.c
  53. 123 157
      arch/x86/kernel/cpu/perf_event.c
  54. 2 2
      arch/x86/kernel/cpu/perf_event_amd.c
  55. 4 4
      arch/x86/kernel/cpu/perf_event_intel.c
  56. 7 6
      arch/x86/kernel/cpu/perf_event_intel_ds.c
  57. 268 24
      arch/x86/kernel/cpu/perf_event_p4.c
  58. 3 3
      arch/x86/kernel/entry_64.S
  59. 1 62
      arch/x86/kernel/ftrace.c
  60. 4 4
      arch/x86/kernel/irq.c
  61. 30 0
      arch/x86/kernel/irq_work.c
  62. 3 3
      arch/x86/kernel/irqinit.c
  63. 50 0
      arch/x86/kernel/jump_label.c
  64. 5 9
      arch/x86/kernel/kprobes.c
  65. 3 0
      arch/x86/kernel/module.c
  66. 6 0
      arch/x86/kernel/setup.c
  67. 4 0
      arch/x86/mm/fault.c
  68. 2 0
      arch/x86/mm/kmemcheck/kmemcheck.c
  69. 59 11
      arch/x86/oprofile/backtrace.c
  70. 1 8
      arch/x86/oprofile/nmi_int.c
  71. 8 24
      drivers/oprofile/oprof.c
  72. 1 1
      drivers/oprofile/oprof.h
  73. 5 2
      drivers/oprofile/oprofile_files.c
  74. 328 0
      drivers/oprofile/oprofile_perf.c
  75. 20 34
      drivers/oprofile/oprofilefs.c
  76. 1 1
      include/asm-generic/hardirq.h
  77. 10 0
      include/asm-generic/vmlinux.lds.h
  78. 21 18
      include/linux/dynamic_debug.h
  79. 4 4
      include/linux/ftrace_event.h
  80. 7 1
      include/linux/interrupt.h
  81. 20 0
      include/linux/irq_work.h
  82. 74 0
      include/linux/jump_label.h
  83. 44 0
      include/linux/jump_label_ref.h
  84. 4 1
      include/linux/module.h
  85. 7 0
      include/linux/oprofile.h
  86. 9 0
      include/linux/percpu.h
  87. 138 74
      include/linux/perf_event.h
  88. 8 1
      include/linux/sched.h
  89. 8 2
      include/linux/stop_machine.h
  90. 4 1
      include/linux/tracepoint.h
  91. 24 2
      include/trace/events/irq.h
  92. 23 2
      include/trace/events/napi.h
  93. 82 0
      include/trace/events/net.h
  94. 87 3
      include/trace/events/power.h
  95. 17 0
      include/trace/events/skb.h
  96. 8 0
      init/Kconfig
  97. 3 1
      kernel/Makefile
  98. 1 3
      kernel/exit.c
  99. 63 12
      kernel/hw_breakpoint.c
  100. 164 0
      kernel/irq_work.c

+ 5 - 3
Documentation/kprobes.txt

@@ -542,9 +542,11 @@ Kprobes does not use mutexes or allocate memory except during
 registration and unregistration.
 registration and unregistration.
 
 
 Probe handlers are run with preemption disabled.  Depending on the
 Probe handlers are run with preemption disabled.  Depending on the
-architecture, handlers may also run with interrupts disabled.  In any
-case, your handler should not yield the CPU (e.g., by attempting to
-acquire a semaphore).
+architecture and optimization state, handlers may also run with
+interrupts disabled (e.g., kretprobe handlers and optimized kprobe
+handlers run without interrupt disabled on x86/x86-64).  In any case,
+your handler should not yield the CPU (e.g., by attempting to acquire
+a semaphore).
 
 
 Since a return probe is implemented by replacing the return
 Since a return probe is implemented by replacing the return
 address with the trampoline's address, stack backtraces and calls
 address with the trampoline's address, stack backtraces and calls

+ 11 - 0
Makefile

@@ -568,6 +568,12 @@ endif
 
 
 ifdef CONFIG_FUNCTION_TRACER
 ifdef CONFIG_FUNCTION_TRACER
 KBUILD_CFLAGS	+= -pg
 KBUILD_CFLAGS	+= -pg
+ifdef CONFIG_DYNAMIC_FTRACE
+	ifdef CONFIG_HAVE_C_RECORDMCOUNT
+		BUILD_C_RECORDMCOUNT := y
+		export BUILD_C_RECORDMCOUNT
+	endif
+endif
 endif
 endif
 
 
 # We trigger additional mismatches with less inlining
 # We trigger additional mismatches with less inlining
@@ -591,6 +597,11 @@ KBUILD_CFLAGS	+= $(call cc-option,-fno-strict-overflow)
 # conserve stack if available
 # conserve stack if available
 KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)
 KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)
 
 
+# check for 'asm goto'
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
+	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+endif
+
 # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
 # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
 # But warn user when we do so
 # But warn user when we do so
 warn-assign = \
 warn-assign = \

+ 3 - 0
arch/Kconfig

@@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI
 	  subsystem.  Also has support for calculating CPU cycle events
 	  subsystem.  Also has support for calculating CPU cycle events
 	  to determine how many clock cycles in a given period.
 	  to determine how many clock cycles in a given period.
 
 
+config HAVE_ARCH_JUMP_LABEL
+	bool
+
 source "kernel/gcov/Kconfig"
 source "kernel/gcov/Kconfig"

+ 1 - 0
arch/alpha/Kconfig

@@ -9,6 +9,7 @@ config ALPHA
 	select HAVE_IDE
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_OPROFILE
 	select HAVE_SYSCALL_WRAPPERS
 	select HAVE_SYSCALL_WRAPPERS
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_ATTRS
 	help
 	help

+ 0 - 5
arch/alpha/include/asm/perf_event.h

@@ -1,11 +1,6 @@
 #ifndef __ASM_ALPHA_PERF_EVENT_H
 #ifndef __ASM_ALPHA_PERF_EVENT_H
 #define __ASM_ALPHA_PERF_EVENT_H
 #define __ASM_ALPHA_PERF_EVENT_H
 
 
-/* Alpha only supports software events through this interface. */
-extern void set_perf_event_pending(void);
-
-#define PERF_EVENT_INDEX_OFFSET 0
-
 #ifdef CONFIG_PERF_EVENTS
 #ifdef CONFIG_PERF_EVENTS
 extern void init_hw_perf_events(void);
 extern void init_hw_perf_events(void);
 #else
 #else

+ 86 - 42
arch/alpha/kernel/perf_event.c

@@ -307,7 +307,7 @@ again:
 			     new_raw_count) != prev_raw_count)
 			     new_raw_count) != prev_raw_count)
 		goto again;
 		goto again;
 
 
-	delta = (new_raw_count  - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
+	delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
 
 
 	/* It is possible on very rare occasions that the PMC has overflowed
 	/* It is possible on very rare occasions that the PMC has overflowed
 	 * but the interrupt is yet to come.  Detect and fix this situation.
 	 * but the interrupt is yet to come.  Detect and fix this situation.
@@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
 		struct hw_perf_event *hwc = &pe->hw;
 		struct hw_perf_event *hwc = &pe->hw;
 		int idx = hwc->idx;
 		int idx = hwc->idx;
 
 
-		if (cpuc->current_idx[j] != PMC_NO_INDEX) {
-			cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
-			continue;
+		if (cpuc->current_idx[j] == PMC_NO_INDEX) {
+			alpha_perf_event_set_period(pe, hwc, idx);
+			cpuc->current_idx[j] = idx;
 		}
 		}
 
 
-		alpha_perf_event_set_period(pe, hwc, idx);
-		cpuc->current_idx[j] = idx;
-		cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
+		if (!(hwc->state & PERF_HES_STOPPED))
+			cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
 	}
 	}
 	cpuc->config = cpuc->event[0]->hw.config_base;
 	cpuc->config = cpuc->event[0]->hw.config_base;
 }
 }
@@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
  *  - this function is called from outside this module via the pmu struct
  *  - this function is called from outside this module via the pmu struct
  *    returned from perf event initialisation.
  *    returned from perf event initialisation.
  */
  */
-static int alpha_pmu_enable(struct perf_event *event)
+static int alpha_pmu_add(struct perf_event *event, int flags)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
 	int n0;
 	int n0;
 	int ret;
 	int ret;
-	unsigned long flags;
+	unsigned long irq_flags;
 
 
 	/*
 	/*
 	 * The Sparc code has the IRQ disable first followed by the perf
 	 * The Sparc code has the IRQ disable first followed by the perf
@@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event)
 	 * nevertheless we disable the PMCs first to enable a potential
 	 * nevertheless we disable the PMCs first to enable a potential
 	 * final PMI to occur before we disable interrupts.
 	 * final PMI to occur before we disable interrupts.
 	 */
 	 */
-	perf_disable();
-	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+	local_irq_save(irq_flags);
 
 
 	/* Default to error to be returned */
 	/* Default to error to be returned */
 	ret = -EAGAIN;
 	ret = -EAGAIN;
@@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event)
 		}
 		}
 	}
 	}
 
 
-	local_irq_restore(flags);
-	perf_enable();
+	hwc->state = PERF_HES_UPTODATE;
+	if (!(flags & PERF_EF_START))
+		hwc->state |= PERF_HES_STOPPED;
+
+	local_irq_restore(irq_flags);
+	perf_pmu_enable(event->pmu);
 
 
 	return ret;
 	return ret;
 }
 }
@@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event)
  *  - this function is called from outside this module via the pmu struct
  *  - this function is called from outside this module via the pmu struct
  *    returned from perf event initialisation.
  *    returned from perf event initialisation.
  */
  */
-static void alpha_pmu_disable(struct perf_event *event)
+static void alpha_pmu_del(struct perf_event *event, int flags)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event *hwc = &event->hw;
-	unsigned long flags;
+	unsigned long irq_flags;
 	int j;
 	int j;
 
 
-	perf_disable();
-	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+	local_irq_save(irq_flags);
 
 
 	for (j = 0; j < cpuc->n_events; j++) {
 	for (j = 0; j < cpuc->n_events; j++) {
 		if (event == cpuc->event[j]) {
 		if (event == cpuc->event[j]) {
@@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event)
 		}
 		}
 	}
 	}
 
 
-	local_irq_restore(flags);
-	perf_enable();
+	local_irq_restore(irq_flags);
+	perf_pmu_enable(event->pmu);
 }
 }
 
 
 
 
@@ -514,13 +518,44 @@ static void alpha_pmu_read(struct perf_event *event)
 }
 }
 
 
 
 
-static void alpha_pmu_unthrottle(struct perf_event *event)
+static void alpha_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		cpuc->idx_mask &= ~(1UL<<hwc->idx);
+		hwc->state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		alpha_perf_event_update(event, hwc, hwc->idx, 0);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+
+	if (cpuc->enabled)
+		wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx));
+}
+
+
+static void alpha_pmu_start(struct perf_event *event, int flags)
 {
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event *hwc = &event->hw;
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+		alpha_perf_event_set_period(event, hwc, hwc->idx);
+	}
+
+	hwc->state = 0;
+
 	cpuc->idx_mask |= 1UL<<hwc->idx;
 	cpuc->idx_mask |= 1UL<<hwc->idx;
-	wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
+	if (cpuc->enabled)
+		wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
 }
 }
 
 
 
 
@@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event)
 	return 0;
 	return 0;
 }
 }
 
 
-static const struct pmu pmu = {
-	.enable		= alpha_pmu_enable,
-	.disable	= alpha_pmu_disable,
-	.read		= alpha_pmu_read,
-	.unthrottle	= alpha_pmu_unthrottle,
-};
-
-
 /*
 /*
  * Main entry point to initialise a HW performance event.
  * Main entry point to initialise a HW performance event.
  */
  */
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+static int alpha_pmu_event_init(struct perf_event *event)
 {
 {
 	int err;
 	int err;
 
 
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
 	if (!alpha_pmu)
 	if (!alpha_pmu)
-		return ERR_PTR(-ENODEV);
+		return -ENODEV;
 
 
 	/* Do the real initialisation work. */
 	/* Do the real initialisation work. */
 	err = __hw_perf_event_init(event);
 	err = __hw_perf_event_init(event);
 
 
-	if (err)
-		return ERR_PTR(err);
-
-	return &pmu;
+	return err;
 }
 }
 
 
-
-
 /*
 /*
  * Main entry point - enable HW performance counters.
  * Main entry point - enable HW performance counters.
  */
  */
-void hw_perf_enable(void)
+static void alpha_pmu_enable(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 
@@ -700,7 +732,7 @@ void hw_perf_enable(void)
  * Main entry point - disable HW performance counters.
  * Main entry point - disable HW performance counters.
  */
  */
 
 
-void hw_perf_disable(void)
+static void alpha_pmu_disable(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 
@@ -713,6 +745,17 @@ void hw_perf_disable(void)
 	wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
 	wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
 }
 }
 
 
+static struct pmu pmu = {
+	.pmu_enable	= alpha_pmu_enable,
+	.pmu_disable	= alpha_pmu_disable,
+	.event_init	= alpha_pmu_event_init,
+	.add		= alpha_pmu_add,
+	.del		= alpha_pmu_del,
+	.start		= alpha_pmu_start,
+	.stop		= alpha_pmu_stop,
+	.read		= alpha_pmu_read,
+};
+
 
 
 /*
 /*
  * Main entry point - don't know when this is called but it
  * Main entry point - don't know when this is called but it
@@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 	wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
 	wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
 
 
 	/* la_ptr is the counter that overflowed. */
 	/* la_ptr is the counter that overflowed. */
-	if (unlikely(la_ptr >= perf_max_events)) {
+	if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
 		/* This should never occur! */
 		/* This should never occur! */
 		irq_err_count++;
 		irq_err_count++;
 		pr_warning("PMI: silly index %ld\n", la_ptr);
 		pr_warning("PMI: silly index %ld\n", la_ptr);
@@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 			/* Interrupts coming too quickly; "throttle" the
 			/* Interrupts coming too quickly; "throttle" the
 			 * counter, i.e., disable it for a little while.
 			 * counter, i.e., disable it for a little while.
 			 */
 			 */
-			cpuc->idx_mask &= ~(1UL<<idx);
+			alpha_pmu_stop(event, 0);
 		}
 		}
 	}
 	}
 	wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
 	wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
@@ -837,6 +880,7 @@ void __init init_hw_perf_events(void)
 
 
 	/* And set up PMU specification */
 	/* And set up PMU specification */
 	alpha_pmu = &ev67_pmu;
 	alpha_pmu = &ev67_pmu;
-	perf_max_events = alpha_pmu->num_pmcs;
+
+	perf_pmu_register(&pmu);
 }
 }
 
 

+ 15 - 15
arch/alpha/kernel/time.c

@@ -41,7 +41,7 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/bcd.h>
 #include <linux/bcd.h>
 #include <linux/profile.h>
 #include <linux/profile.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/io.h>
@@ -83,25 +83,25 @@ static struct {
 
 
 unsigned long est_cycle_freq;
 unsigned long est_cycle_freq;
 
 
-#ifdef CONFIG_PERF_EVENTS
+#ifdef CONFIG_IRQ_WORK
 
 
-DEFINE_PER_CPU(u8, perf_event_pending);
+DEFINE_PER_CPU(u8, irq_work_pending);
 
 
-#define set_perf_event_pending_flag()  __get_cpu_var(perf_event_pending) = 1
-#define test_perf_event_pending()      __get_cpu_var(perf_event_pending)
-#define clear_perf_event_pending()     __get_cpu_var(perf_event_pending) = 0
+#define set_irq_work_pending_flag()  __get_cpu_var(irq_work_pending) = 1
+#define test_irq_work_pending()      __get_cpu_var(irq_work_pending)
+#define clear_irq_work_pending()     __get_cpu_var(irq_work_pending) = 0
 
 
-void set_perf_event_pending(void)
+void set_irq_work_pending(void)
 {
 {
-	set_perf_event_pending_flag();
+	set_irq_work_pending_flag();
 }
 }
 
 
-#else  /* CONFIG_PERF_EVENTS */
+#else  /* CONFIG_IRQ_WORK */
 
 
-#define test_perf_event_pending()      0
-#define clear_perf_event_pending()
+#define test_irq_work_pending()      0
+#define clear_irq_work_pending()
 
 
-#endif /* CONFIG_PERF_EVENTS */
+#endif /* CONFIG_IRQ_WORK */
 
 
 
 
 static inline __u32 rpcc(void)
 static inline __u32 rpcc(void)
@@ -191,9 +191,9 @@ irqreturn_t timer_interrupt(int irq, void *dev)
 
 
 	write_sequnlock(&xtime_lock);
 	write_sequnlock(&xtime_lock);
 
 
-	if (test_perf_event_pending()) {
-		clear_perf_event_pending();
-		perf_event_do_pending();
+	if (test_irq_work_pending()) {
+		clear_irq_work_pending();
+		irq_work_run();
 	}
 	}
 
 
 #ifndef CONFIG_SMP
 #ifndef CONFIG_SMP

+ 1 - 0
arch/arm/Kconfig

@@ -23,6 +23,7 @@ config ARM
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZMA
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select PERF_USE_VMALLOC
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_REGS_AND_STACK_ACCESS_API

+ 0 - 12
arch/arm/include/asm/perf_event.h

@@ -12,18 +12,6 @@
 #ifndef __ARM_PERF_EVENT_H__
 #ifndef __ARM_PERF_EVENT_H__
 #define __ARM_PERF_EVENT_H__
 #define __ARM_PERF_EVENT_H__
 
 
-/*
- * NOP: on *most* (read: all supported) ARM platforms, the performance
- * counter interrupts are regular interrupts and not an NMI. This
- * means that when we receive the interrupt we can call
- * perf_event_do_pending() that handles all of the work with
- * interrupts disabled.
- */
-static inline void
-set_perf_event_pending(void)
-{
-}
-
 /* ARM performance counters start from 1 (in the cp15 accesses) so use the
 /* ARM performance counters start from 1 (in the cp15 accesses) so use the
  * same indexes here for consistency. */
  * same indexes here for consistency. */
 #define PERF_EVENT_INDEX_OFFSET 1
 #define PERF_EVENT_INDEX_OFFSET 1

+ 102 - 110
arch/arm/kernel/perf_event.c

@@ -123,6 +123,12 @@ armpmu_get_max_events(void)
 }
 }
 EXPORT_SYMBOL_GPL(armpmu_get_max_events);
 EXPORT_SYMBOL_GPL(armpmu_get_max_events);
 
 
+int perf_num_counters(void)
+{
+	return armpmu_get_max_events();
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
 #define HW_OP_UNSUPPORTED		0xFFFF
 #define HW_OP_UNSUPPORTED		0xFFFF
 
 
 #define C(_x) \
 #define C(_x) \
@@ -221,46 +227,56 @@ again:
 }
 }
 
 
 static void
 static void
-armpmu_disable(struct perf_event *event)
+armpmu_read(struct perf_event *event)
 {
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
-
-	WARN_ON(idx < 0);
-
-	clear_bit(idx, cpuc->active_mask);
-	armpmu->disable(hwc, idx);
-
-	barrier();
 
 
-	armpmu_event_update(event, hwc, idx);
-	cpuc->events[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
+	/* Don't read disabled counters! */
+	if (hwc->idx < 0)
+		return;
 
 
-	perf_event_update_userpage(event);
+	armpmu_event_update(event, hwc, hwc->idx);
 }
 }
 
 
 static void
 static void
-armpmu_read(struct perf_event *event)
+armpmu_stop(struct perf_event *event, int flags)
 {
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event *hwc = &event->hw;
 
 
-	/* Don't read disabled counters! */
-	if (hwc->idx < 0)
+	if (!armpmu)
 		return;
 		return;
 
 
-	armpmu_event_update(event, hwc, hwc->idx);
+	/*
+	 * ARM pmu always has to update the counter, so ignore
+	 * PERF_EF_UPDATE, see comments in armpmu_start().
+	 */
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		armpmu->disable(hwc, hwc->idx);
+		barrier(); /* why? */
+		armpmu_event_update(event, hwc, hwc->idx);
+		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	}
 }
 }
 
 
 static void
 static void
-armpmu_unthrottle(struct perf_event *event)
+armpmu_start(struct perf_event *event, int flags)
 {
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event *hwc = &event->hw;
 
 
+	if (!armpmu)
+		return;
+
+	/*
+	 * ARM pmu always has to reprogram the period, so ignore
+	 * PERF_EF_RELOAD, see the comment below.
+	 */
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
 	/*
 	/*
 	 * Set the period again. Some counters can't be stopped, so when we
 	 * Set the period again. Some counters can't be stopped, so when we
-	 * were throttled we simply disabled the IRQ source and the counter
+	 * were stopped we simply disabled the IRQ source and the counter
 	 * may have been left counting. If we don't do this step then we may
 	 * may have been left counting. If we don't do this step then we may
 	 * get an interrupt too soon or *way* too late if the overflow has
 	 * get an interrupt too soon or *way* too late if the overflow has
 	 * happened since disabling.
 	 * happened since disabling.
@@ -269,14 +285,33 @@ armpmu_unthrottle(struct perf_event *event)
 	armpmu->enable(hwc, hwc->idx);
 	armpmu->enable(hwc, hwc->idx);
 }
 }
 
 
+static void
+armpmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	WARN_ON(idx < 0);
+
+	clear_bit(idx, cpuc->active_mask);
+	armpmu_stop(event, PERF_EF_UPDATE);
+	cpuc->events[idx] = NULL;
+	clear_bit(idx, cpuc->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
 static int
 static int
-armpmu_enable(struct perf_event *event)
+armpmu_add(struct perf_event *event, int flags)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event *hwc = &event->hw;
 	int idx;
 	int idx;
 	int err = 0;
 	int err = 0;
 
 
+	perf_pmu_disable(event->pmu);
+
 	/* If we don't have a space for the counter then finish early. */
 	/* If we don't have a space for the counter then finish early. */
 	idx = armpmu->get_event_idx(cpuc, hwc);
 	idx = armpmu->get_event_idx(cpuc, hwc);
 	if (idx < 0) {
 	if (idx < 0) {
@@ -293,25 +328,19 @@ armpmu_enable(struct perf_event *event)
 	cpuc->events[idx] = event;
 	cpuc->events[idx] = event;
 	set_bit(idx, cpuc->active_mask);
 	set_bit(idx, cpuc->active_mask);
 
 
-	/* Set the period for the event. */
-	armpmu_event_set_period(event, hwc, idx);
-
-	/* Enable the event. */
-	armpmu->enable(hwc, idx);
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		armpmu_start(event, PERF_EF_RELOAD);
 
 
 	/* Propagate our changes to the userspace mapping. */
 	/* Propagate our changes to the userspace mapping. */
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
 
 
 out:
 out:
+	perf_pmu_enable(event->pmu);
 	return err;
 	return err;
 }
 }
 
 
-static struct pmu pmu = {
-	.enable	    = armpmu_enable,
-	.disable    = armpmu_disable,
-	.unthrottle = armpmu_unthrottle,
-	.read	    = armpmu_read,
-};
+static struct pmu pmu;
 
 
 static int
 static int
 validate_event(struct cpu_hw_events *cpuc,
 validate_event(struct cpu_hw_events *cpuc,
@@ -491,20 +520,29 @@ __hw_perf_event_init(struct perf_event *event)
 	return err;
 	return err;
 }
 }
 
 
-const struct pmu *
-hw_perf_event_init(struct perf_event *event)
+static int armpmu_event_init(struct perf_event *event)
 {
 {
 	int err = 0;
 	int err = 0;
 
 
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
 	if (!armpmu)
 	if (!armpmu)
-		return ERR_PTR(-ENODEV);
+		return -ENODEV;
 
 
 	event->destroy = hw_perf_event_destroy;
 	event->destroy = hw_perf_event_destroy;
 
 
 	if (!atomic_inc_not_zero(&active_events)) {
 	if (!atomic_inc_not_zero(&active_events)) {
-		if (atomic_read(&active_events) > perf_max_events) {
+		if (atomic_read(&active_events) > armpmu->num_events) {
 			atomic_dec(&active_events);
 			atomic_dec(&active_events);
-			return ERR_PTR(-ENOSPC);
+			return -ENOSPC;
 		}
 		}
 
 
 		mutex_lock(&pmu_reserve_mutex);
 		mutex_lock(&pmu_reserve_mutex);
@@ -518,17 +556,16 @@ hw_perf_event_init(struct perf_event *event)
 	}
 	}
 
 
 	if (err)
 	if (err)
-		return ERR_PTR(err);
+		return err;
 
 
 	err = __hw_perf_event_init(event);
 	err = __hw_perf_event_init(event);
 	if (err)
 	if (err)
 		hw_perf_event_destroy(event);
 		hw_perf_event_destroy(event);
 
 
-	return err ? ERR_PTR(err) : &pmu;
+	return err;
 }
 }
 
 
-void
-hw_perf_enable(void)
+static void armpmu_enable(struct pmu *pmu)
 {
 {
 	/* Enable all of the perf events on hardware. */
 	/* Enable all of the perf events on hardware. */
 	int idx;
 	int idx;
@@ -549,13 +586,23 @@ hw_perf_enable(void)
 	armpmu->start();
 	armpmu->start();
 }
 }
 
 
-void
-hw_perf_disable(void)
+static void armpmu_disable(struct pmu *pmu)
 {
 {
 	if (armpmu)
 	if (armpmu)
 		armpmu->stop();
 		armpmu->stop();
 }
 }
 
 
+static struct pmu pmu = {
+	.pmu_enable	= armpmu_enable,
+	.pmu_disable	= armpmu_disable,
+	.event_init	= armpmu_event_init,
+	.add		= armpmu_add,
+	.del		= armpmu_del,
+	.start		= armpmu_start,
+	.stop		= armpmu_stop,
+	.read		= armpmu_read,
+};
+
 /*
 /*
  * ARMv6 Performance counter handling code.
  * ARMv6 Performance counter handling code.
  *
  *
@@ -1045,7 +1092,7 @@ armv6pmu_handle_irq(int irq_num,
 	 * platforms that can have the PMU interrupts raised as an NMI, this
 	 * platforms that can have the PMU interrupts raised as an NMI, this
 	 * will not work.
 	 * will not work.
 	 */
 	 */
-	perf_event_do_pending();
+	irq_work_run();
 
 
 	return IRQ_HANDLED;
 	return IRQ_HANDLED;
 }
 }
@@ -2021,7 +2068,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	 * platforms that can have the PMU interrupts raised as an NMI, this
 	 * platforms that can have the PMU interrupts raised as an NMI, this
 	 * will not work.
 	 * will not work.
 	 */
 	 */
-	perf_event_do_pending();
+	irq_work_run();
 
 
 	return IRQ_HANDLED;
 	return IRQ_HANDLED;
 }
 }
@@ -2389,7 +2436,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 			armpmu->disable(hwc, idx);
 			armpmu->disable(hwc, idx);
 	}
 	}
 
 
-	perf_event_do_pending();
+	irq_work_run();
 
 
 	/*
 	/*
 	 * Re-enable the PMU.
 	 * Re-enable the PMU.
@@ -2716,7 +2763,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 			armpmu->disable(hwc, idx);
 			armpmu->disable(hwc, idx);
 	}
 	}
 
 
-	perf_event_do_pending();
+	irq_work_run();
 
 
 	/*
 	/*
 	 * Re-enable the PMU.
 	 * Re-enable the PMU.
@@ -2933,14 +2980,12 @@ init_hw_perf_events(void)
 			armpmu = &armv6pmu;
 			armpmu = &armv6pmu;
 			memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
 			memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
 					sizeof(armv6_perf_cache_map));
 					sizeof(armv6_perf_cache_map));
-			perf_max_events	= armv6pmu.num_events;
 			break;
 			break;
 		case 0xB020:	/* ARM11mpcore */
 		case 0xB020:	/* ARM11mpcore */
 			armpmu = &armv6mpcore_pmu;
 			armpmu = &armv6mpcore_pmu;
 			memcpy(armpmu_perf_cache_map,
 			memcpy(armpmu_perf_cache_map,
 			       armv6mpcore_perf_cache_map,
 			       armv6mpcore_perf_cache_map,
 			       sizeof(armv6mpcore_perf_cache_map));
 			       sizeof(armv6mpcore_perf_cache_map));
-			perf_max_events = armv6mpcore_pmu.num_events;
 			break;
 			break;
 		case 0xC080:	/* Cortex-A8 */
 		case 0xC080:	/* Cortex-A8 */
 			armv7pmu.id = ARM_PERF_PMU_ID_CA8;
 			armv7pmu.id = ARM_PERF_PMU_ID_CA8;
@@ -2952,7 +2997,6 @@ init_hw_perf_events(void)
 			/* Reset PMNC and read the nb of CNTx counters
 			/* Reset PMNC and read the nb of CNTx counters
 			    supported */
 			    supported */
 			armv7pmu.num_events = armv7_reset_read_pmnc();
 			armv7pmu.num_events = armv7_reset_read_pmnc();
-			perf_max_events = armv7pmu.num_events;
 			break;
 			break;
 		case 0xC090:	/* Cortex-A9 */
 		case 0xC090:	/* Cortex-A9 */
 			armv7pmu.id = ARM_PERF_PMU_ID_CA9;
 			armv7pmu.id = ARM_PERF_PMU_ID_CA9;
@@ -2964,7 +3008,6 @@ init_hw_perf_events(void)
 			/* Reset PMNC and read the nb of CNTx counters
 			/* Reset PMNC and read the nb of CNTx counters
 			    supported */
 			    supported */
 			armv7pmu.num_events = armv7_reset_read_pmnc();
 			armv7pmu.num_events = armv7_reset_read_pmnc();
-			perf_max_events = armv7pmu.num_events;
 			break;
 			break;
 		}
 		}
 	/* Intel CPUs [xscale]. */
 	/* Intel CPUs [xscale]. */
@@ -2975,13 +3018,11 @@ init_hw_perf_events(void)
 			armpmu = &xscale1pmu;
 			armpmu = &xscale1pmu;
 			memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
 			memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
 					sizeof(xscale_perf_cache_map));
 					sizeof(xscale_perf_cache_map));
-			perf_max_events	= xscale1pmu.num_events;
 			break;
 			break;
 		case 2:
 		case 2:
 			armpmu = &xscale2pmu;
 			armpmu = &xscale2pmu;
 			memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
 			memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
 					sizeof(xscale_perf_cache_map));
 					sizeof(xscale_perf_cache_map));
-			perf_max_events	= xscale2pmu.num_events;
 			break;
 			break;
 		}
 		}
 	}
 	}
@@ -2991,9 +3032,10 @@ init_hw_perf_events(void)
 				arm_pmu_names[armpmu->id], armpmu->num_events);
 				arm_pmu_names[armpmu->id], armpmu->num_events);
 	} else {
 	} else {
 		pr_info("no hardware support available\n");
 		pr_info("no hardware support available\n");
-		perf_max_events = -1;
 	}
 	}
 
 
+	perf_pmu_register(&pmu);
+
 	return 0;
 	return 0;
 }
 }
 arch_initcall(init_hw_perf_events);
 arch_initcall(init_hw_perf_events);
@@ -3001,13 +3043,6 @@ arch_initcall(init_hw_perf_events);
 /*
 /*
  * Callchain handling code.
  * Callchain handling code.
  */
  */
-static inline void
-callchain_store(struct perf_callchain_entry *entry,
-		u64 ip)
-{
-	if (entry->nr < PERF_MAX_STACK_DEPTH)
-		entry->ip[entry->nr++] = ip;
-}
 
 
 /*
 /*
  * The registers we're interested in are at the end of the variable
  * The registers we're interested in are at the end of the variable
@@ -3039,7 +3074,7 @@ user_backtrace(struct frame_tail *tail,
 	if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
 	if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
 		return NULL;
 		return NULL;
 
 
-	callchain_store(entry, buftail.lr);
+	perf_callchain_store(entry, buftail.lr);
 
 
 	/*
 	/*
 	 * Frame pointers should strictly progress back up the stack
 	 * Frame pointers should strictly progress back up the stack
@@ -3051,16 +3086,11 @@ user_backtrace(struct frame_tail *tail,
 	return buftail.fp - 1;
 	return buftail.fp - 1;
 }
 }
 
 
-static void
-perf_callchain_user(struct pt_regs *regs,
-		    struct perf_callchain_entry *entry)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 {
 	struct frame_tail *tail;
 	struct frame_tail *tail;
 
 
-	callchain_store(entry, PERF_CONTEXT_USER);
-
-	if (!user_mode(regs))
-		regs = task_pt_regs(current);
 
 
 	tail = (struct frame_tail *)regs->ARM_fp - 1;
 	tail = (struct frame_tail *)regs->ARM_fp - 1;
 
 
@@ -3078,56 +3108,18 @@ callchain_trace(struct stackframe *fr,
 		void *data)
 		void *data)
 {
 {
 	struct perf_callchain_entry *entry = data;
 	struct perf_callchain_entry *entry = data;
-	callchain_store(entry, fr->pc);
+	perf_callchain_store(entry, fr->pc);
 	return 0;
 	return 0;
 }
 }
 
 
-static void
-perf_callchain_kernel(struct pt_regs *regs,
-		      struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 {
 	struct stackframe fr;
 	struct stackframe fr;
 
 
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
 	fr.fp = regs->ARM_fp;
 	fr.fp = regs->ARM_fp;
 	fr.sp = regs->ARM_sp;
 	fr.sp = regs->ARM_sp;
 	fr.lr = regs->ARM_lr;
 	fr.lr = regs->ARM_lr;
 	fr.pc = regs->ARM_pc;
 	fr.pc = regs->ARM_pc;
 	walk_stackframe(&fr, callchain_trace, entry);
 	walk_stackframe(&fr, callchain_trace, entry);
 }
 }
-
-static void
-perf_do_callchain(struct pt_regs *regs,
-		  struct perf_callchain_entry *entry)
-{
-	int is_user;
-
-	if (!regs)
-		return;
-
-	is_user = user_mode(regs);
-
-	if (!current || !current->pid)
-		return;
-
-	if (is_user && current->state != TASK_RUNNING)
-		return;
-
-	if (!is_user)
-		perf_callchain_kernel(regs, entry);
-
-	if (current->mm)
-		perf_callchain_user(regs, entry);
-}
-
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-
-struct perf_callchain_entry *
-perf_callchain(struct pt_regs *regs)
-{
-	struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
-
-	entry->nr = 0;
-	perf_do_callchain(regs, entry);
-	return entry;
-}

+ 4 - 0
arch/arm/oprofile/Makefile

@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		oprofilefs.o oprofile_stats.o \
 		oprofilefs.o oprofile_stats.o \
 		timer_int.o )
 		timer_int.o )
 
 
+ifeq ($(CONFIG_HW_PERF_EVENTS),y)
+DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
+endif
+
 oprofile-y				:= $(DRIVER_OBJS) common.o
 oprofile-y				:= $(DRIVER_OBJS) common.o

+ 6 - 305
arch/arm/oprofile/common.c

@@ -25,139 +25,10 @@
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 
 
 #ifdef CONFIG_HW_PERF_EVENTS
 #ifdef CONFIG_HW_PERF_EVENTS
-/*
- * Per performance monitor configuration as set via oprofilefs.
- */
-struct op_counter_config {
-	unsigned long count;
-	unsigned long enabled;
-	unsigned long event;
-	unsigned long unit_mask;
-	unsigned long kernel;
-	unsigned long user;
-	struct perf_event_attr attr;
-};
-
-static int op_arm_enabled;
-static DEFINE_MUTEX(op_arm_mutex);
-
-static struct op_counter_config *counter_config;
-static struct perf_event **perf_events[nr_cpumask_bits];
-static int perf_num_counters;
-
-/*
- * Overflow callback for oprofile.
- */
-static void op_overflow_handler(struct perf_event *event, int unused,
-			struct perf_sample_data *data, struct pt_regs *regs)
+char *op_name_from_perf_id(void)
 {
 {
-	int id;
-	u32 cpu = smp_processor_id();
-
-	for (id = 0; id < perf_num_counters; ++id)
-		if (perf_events[cpu][id] == event)
-			break;
-
-	if (id != perf_num_counters)
-		oprofile_add_sample(regs, id);
-	else
-		pr_warning("oprofile: ignoring spurious overflow "
-				"on cpu %u\n", cpu);
-}
-
-/*
- * Called by op_arm_setup to create perf attributes to mirror the oprofile
- * settings in counter_config. Attributes are created as `pinned' events and
- * so are permanently scheduled on the PMU.
- */
-static void op_perf_setup(void)
-{
-	int i;
-	u32 size = sizeof(struct perf_event_attr);
-	struct perf_event_attr *attr;
-
-	for (i = 0; i < perf_num_counters; ++i) {
-		attr = &counter_config[i].attr;
-		memset(attr, 0, size);
-		attr->type		= PERF_TYPE_RAW;
-		attr->size		= size;
-		attr->config		= counter_config[i].event;
-		attr->sample_period	= counter_config[i].count;
-		attr->pinned		= 1;
-	}
-}
-
-static int op_create_counter(int cpu, int event)
-{
-	int ret = 0;
-	struct perf_event *pevent;
-
-	if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL))
-		return ret;
-
-	pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
-						  cpu, -1,
-						  op_overflow_handler);
-
-	if (IS_ERR(pevent)) {
-		ret = PTR_ERR(pevent);
-	} else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
-		perf_event_release_kernel(pevent);
-		pr_warning("oprofile: failed to enable event %d "
-				"on CPU %d\n", event, cpu);
-		ret = -EBUSY;
-	} else {
-		perf_events[cpu][event] = pevent;
-	}
-
-	return ret;
-}
+	enum arm_perf_pmu_ids id = armpmu_get_pmu_id();
 
 
-static void op_destroy_counter(int cpu, int event)
-{
-	struct perf_event *pevent = perf_events[cpu][event];
-
-	if (pevent) {
-		perf_event_release_kernel(pevent);
-		perf_events[cpu][event] = NULL;
-	}
-}
-
-/*
- * Called by op_arm_start to create active perf events based on the
- * perviously configured attributes.
- */
-static int op_perf_start(void)
-{
-	int cpu, event, ret = 0;
-
-	for_each_online_cpu(cpu) {
-		for (event = 0; event < perf_num_counters; ++event) {
-			ret = op_create_counter(cpu, event);
-			if (ret)
-				goto out;
-		}
-	}
-
-out:
-	return ret;
-}
-
-/*
- * Called by op_arm_stop at the end of a profiling run.
- */
-static void op_perf_stop(void)
-{
-	int cpu, event;
-
-	for_each_online_cpu(cpu)
-		for (event = 0; event < perf_num_counters; ++event)
-			op_destroy_counter(cpu, event);
-}
-
-
-static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
-{
 	switch (id) {
 	switch (id) {
 	case ARM_PERF_PMU_ID_XSCALE1:
 	case ARM_PERF_PMU_ID_XSCALE1:
 		return "arm/xscale1";
 		return "arm/xscale1";
@@ -176,116 +47,6 @@ static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
 	}
 	}
 }
 }
 
 
-static int op_arm_create_files(struct super_block *sb, struct dentry *root)
-{
-	unsigned int i;
-
-	for (i = 0; i < perf_num_counters; i++) {
-		struct dentry *dir;
-		char buf[4];
-
-		snprintf(buf, sizeof buf, "%d", i);
-		dir = oprofilefs_mkdir(sb, root, buf);
-		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
-		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
-		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
-		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
-		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
-		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
-	}
-
-	return 0;
-}
-
-static int op_arm_setup(void)
-{
-	spin_lock(&oprofilefs_lock);
-	op_perf_setup();
-	spin_unlock(&oprofilefs_lock);
-	return 0;
-}
-
-static int op_arm_start(void)
-{
-	int ret = -EBUSY;
-
-	mutex_lock(&op_arm_mutex);
-	if (!op_arm_enabled) {
-		ret = 0;
-		op_perf_start();
-		op_arm_enabled = 1;
-	}
-	mutex_unlock(&op_arm_mutex);
-	return ret;
-}
-
-static void op_arm_stop(void)
-{
-	mutex_lock(&op_arm_mutex);
-	if (op_arm_enabled)
-		op_perf_stop();
-	op_arm_enabled = 0;
-	mutex_unlock(&op_arm_mutex);
-}
-
-#ifdef CONFIG_PM
-static int op_arm_suspend(struct platform_device *dev, pm_message_t state)
-{
-	mutex_lock(&op_arm_mutex);
-	if (op_arm_enabled)
-		op_perf_stop();
-	mutex_unlock(&op_arm_mutex);
-	return 0;
-}
-
-static int op_arm_resume(struct platform_device *dev)
-{
-	mutex_lock(&op_arm_mutex);
-	if (op_arm_enabled && op_perf_start())
-		op_arm_enabled = 0;
-	mutex_unlock(&op_arm_mutex);
-	return 0;
-}
-
-static struct platform_driver oprofile_driver = {
-	.driver		= {
-		.name		= "arm-oprofile",
-	},
-	.resume		= op_arm_resume,
-	.suspend	= op_arm_suspend,
-};
-
-static struct platform_device *oprofile_pdev;
-
-static int __init init_driverfs(void)
-{
-	int ret;
-
-	ret = platform_driver_register(&oprofile_driver);
-	if (ret)
-		goto out;
-
-	oprofile_pdev =	platform_device_register_simple(
-				oprofile_driver.driver.name, 0, NULL, 0);
-	if (IS_ERR(oprofile_pdev)) {
-		ret = PTR_ERR(oprofile_pdev);
-		platform_driver_unregister(&oprofile_driver);
-	}
-
-out:
-	return ret;
-}
-
-static void  exit_driverfs(void)
-{
-	platform_device_unregister(oprofile_pdev);
-	platform_driver_unregister(&oprofile_driver);
-}
-#else
-static int __init init_driverfs(void) { return 0; }
-#define exit_driverfs() do { } while (0)
-#endif /* CONFIG_PM */
-
 static int report_trace(struct stackframe *frame, void *d)
 static int report_trace(struct stackframe *frame, void *d)
 {
 {
 	unsigned int *depth = d;
 	unsigned int *depth = d;
@@ -350,74 +111,14 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
 
 
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 {
-	int cpu, ret = 0;
-
-	perf_num_counters = armpmu_get_max_events();
-
-	counter_config = kcalloc(perf_num_counters,
-			sizeof(struct op_counter_config), GFP_KERNEL);
-
-	if (!counter_config) {
-		pr_info("oprofile: failed to allocate %d "
-				"counters\n", perf_num_counters);
-		return -ENOMEM;
-	}
-
-	ret = init_driverfs();
-	if (ret) {
-		kfree(counter_config);
-		counter_config = NULL;
-		return ret;
-	}
-
-	for_each_possible_cpu(cpu) {
-		perf_events[cpu] = kcalloc(perf_num_counters,
-				sizeof(struct perf_event *), GFP_KERNEL);
-		if (!perf_events[cpu]) {
-			pr_info("oprofile: failed to allocate %d perf events "
-					"for cpu %d\n", perf_num_counters, cpu);
-			while (--cpu >= 0)
-				kfree(perf_events[cpu]);
-			return -ENOMEM;
-		}
-	}
-
 	ops->backtrace		= arm_backtrace;
 	ops->backtrace		= arm_backtrace;
-	ops->create_files	= op_arm_create_files;
-	ops->setup		= op_arm_setup;
-	ops->start		= op_arm_start;
-	ops->stop		= op_arm_stop;
-	ops->shutdown		= op_arm_stop;
-	ops->cpu_type		= op_name_from_perf_id(armpmu_get_pmu_id());
-
-	if (!ops->cpu_type)
-		ret = -ENODEV;
-	else
-		pr_info("oprofile: using %s\n", ops->cpu_type);
 
 
-	return ret;
+	return oprofile_perf_init(ops);
 }
 }
 
 
-void oprofile_arch_exit(void)
+void __exit oprofile_arch_exit(void)
 {
 {
-	int cpu, id;
-	struct perf_event *event;
-
-	if (*perf_events) {
-		for_each_possible_cpu(cpu) {
-			for (id = 0; id < perf_num_counters; ++id) {
-				event = perf_events[cpu][id];
-				if (event != NULL)
-					perf_event_release_kernel(event);
-			}
-			kfree(perf_events[cpu]);
-		}
-	}
-
-	if (counter_config) {
-		kfree(counter_config);
-		exit_driverfs();
-	}
+	oprofile_perf_exit();
 }
 }
 #else
 #else
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 int __init oprofile_arch_init(struct oprofile_operations *ops)
@@ -425,5 +126,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	pr_info("oprofile: hardware counters not available\n");
 	pr_info("oprofile: hardware counters not available\n");
 	return -ENODEV;
 	return -ENODEV;
 }
 }
-void oprofile_arch_exit(void) {}
+void __exit oprofile_arch_exit(void) {}
 #endif /* CONFIG_HW_PERF_EVENTS */
 #endif /* CONFIG_HW_PERF_EVENTS */

+ 1 - 0
arch/frv/Kconfig

@@ -7,6 +7,7 @@ config FRV
 	default y
 	default y
 	select HAVE_IDE
 	select HAVE_IDE
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS
 
 
 config ZONE_DMA
 config ZONE_DMA

+ 1 - 1
arch/frv/lib/Makefile

@@ -5,4 +5,4 @@
 lib-y := \
 lib-y := \
 	__ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
 	__ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
 	checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
 	checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
-	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o
+	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o

+ 0 - 19
arch/frv/lib/perf_event.c

@@ -1,19 +0,0 @@
-/* Performance event handling
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/perf_event.h>
-
-/*
- * mark the performance event as pending
- */
-void set_perf_event_pending(void)
-{
-}

+ 5 - 6
arch/ia64/include/asm/hardirq.h

@@ -6,12 +6,6 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
  */
 
 
-
-#include <linux/threads.h>
-#include <linux/irq.h>
-
-#include <asm/processor.h>
-
 /*
 /*
  * No irq_cpustat_t for IA-64.  The data is held in the per-CPU data structure.
  * No irq_cpustat_t for IA-64.  The data is held in the per-CPU data structure.
  */
  */
@@ -20,6 +14,11 @@
 
 
 #define local_softirq_pending()		(local_cpu_data->softirq_pending)
 #define local_softirq_pending()		(local_cpu_data->softirq_pending)
 
 
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+#include <asm/processor.h>
+
 extern void __iomem *ipi_base_addr;
 extern void __iomem *ipi_base_addr;
 
 
 void ack_bad_irq(unsigned int irq);
 void ack_bad_irq(unsigned int irq);

+ 1 - 0
arch/parisc/Kconfig

@@ -16,6 +16,7 @@ config PARISC
 	select RTC_DRV_GENERIC
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select INIT_ALL_POSSIBLE
 	select BUG
 	select BUG
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS
 	select GENERIC_ATOMIC64 if !64BIT
 	select GENERIC_ATOMIC64 if !64BIT
 	help
 	help

+ 1 - 2
arch/parisc/include/asm/perf_event.h

@@ -1,7 +1,6 @@
 #ifndef __ASM_PARISC_PERF_EVENT_H
 #ifndef __ASM_PARISC_PERF_EVENT_H
 #define __ASM_PARISC_PERF_EVENT_H
 #define __ASM_PARISC_PERF_EVENT_H
 
 
-/* parisc only supports software events through this interface. */
-static inline void set_perf_event_pending(void) { }
+/* Empty, just to avoid compiling error */
 
 
 #endif /* __ASM_PARISC_PERF_EVENT_H */
 #endif /* __ASM_PARISC_PERF_EVENT_H */

+ 1 - 0
arch/powerpc/Kconfig

@@ -138,6 +138,7 @@ config PPC
 	select HAVE_OPROFILE
 	select HAVE_OPROFILE
 	select HAVE_SYSCALL_WRAPPERS if PPC64
 	select HAVE_SYSCALL_WRAPPERS if PPC64
 	select GENERIC_ATOMIC64 if PPC32
 	select GENERIC_ATOMIC64 if PPC32
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64

+ 1 - 1
arch/powerpc/include/asm/paca.h

@@ -129,7 +129,7 @@ struct paca_struct {
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
-	u8 perf_event_pending;		/* PM interrupt while soft-disabled */
+	u8 irq_work_pending;		/* IRQ_WORK interrupt while soft-disable */
 
 
 	/* Stuff for accurate time accounting */
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
 	u64 user_time;			/* accumulated usermode TB ticks */

+ 25 - 61
arch/powerpc/kernel/perf_callchain.c

@@ -23,18 +23,6 @@
 #include "ppc32.h"
 #include "ppc32.h"
 #endif
 #endif
 
 
-/*
- * Store another value in a callchain_entry.
- */
-static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-	unsigned int nr = entry->nr;
-
-	if (nr < PERF_MAX_STACK_DEPTH) {
-		entry->ip[nr] = ip;
-		entry->nr = nr + 1;
-	}
-}
 
 
 /*
 /*
  * Is sp valid as the address of the next kernel stack frame after prev_sp?
  * Is sp valid as the address of the next kernel stack frame after prev_sp?
@@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
 	return 0;
 	return 0;
 }
 }
 
 
-static void perf_callchain_kernel(struct pt_regs *regs,
-				  struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 {
 	unsigned long sp, next_sp;
 	unsigned long sp, next_sp;
 	unsigned long next_ip;
 	unsigned long next_ip;
@@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 
 
 	lr = regs->link;
 	lr = regs->link;
 	sp = regs->gpr[1];
 	sp = regs->gpr[1];
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
-	callchain_store(entry, regs->nip);
+	perf_callchain_store(entry, regs->nip);
 
 
 	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
 	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
 		return;
 		return;
@@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 			next_ip = regs->nip;
 			next_ip = regs->nip;
 			lr = regs->link;
 			lr = regs->link;
 			level = 0;
 			level = 0;
-			callchain_store(entry, PERF_CONTEXT_KERNEL);
+			perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
 
 
 		} else {
 		} else {
 			if (level == 0)
 			if (level == 0)
@@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 			++level;
 			++level;
 		}
 		}
 
 
-		callchain_store(entry, next_ip);
+		perf_callchain_store(entry, next_ip);
 		if (!valid_next_sp(next_sp, sp))
 		if (!valid_next_sp(next_sp, sp))
 			return;
 			return;
 		sp = next_sp;
 		sp = next_sp;
@@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp)
 		puc == (unsigned long) &sf->uc;
 		puc == (unsigned long) &sf->uc;
 }
 }
 
 
-static void perf_callchain_user_64(struct pt_regs *regs,
-				   struct perf_callchain_entry *entry)
+static void perf_callchain_user_64(struct perf_callchain_entry *entry,
+				   struct pt_regs *regs)
 {
 {
 	unsigned long sp, next_sp;
 	unsigned long sp, next_sp;
 	unsigned long next_ip;
 	unsigned long next_ip;
@@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs,
 	next_ip = regs->nip;
 	next_ip = regs->nip;
 	lr = regs->link;
 	lr = regs->link;
 	sp = regs->gpr[1];
 	sp = regs->gpr[1];
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, next_ip);
+	perf_callchain_store(entry, next_ip);
 
 
 	for (;;) {
 	for (;;) {
 		fp = (unsigned long __user *) sp;
 		fp = (unsigned long __user *) sp;
@@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs,
 			    read_user_stack_64(&uregs[PT_R1], &sp))
 			    read_user_stack_64(&uregs[PT_R1], &sp))
 				return;
 				return;
 			level = 0;
 			level = 0;
-			callchain_store(entry, PERF_CONTEXT_USER);
-			callchain_store(entry, next_ip);
+			perf_callchain_store(entry, PERF_CONTEXT_USER);
+			perf_callchain_store(entry, next_ip);
 			continue;
 			continue;
 		}
 		}
 
 
 		if (level == 0)
 		if (level == 0)
 			next_ip = lr;
 			next_ip = lr;
-		callchain_store(entry, next_ip);
+		perf_callchain_store(entry, next_ip);
 		++level;
 		++level;
 		sp = next_sp;
 		sp = next_sp;
 	}
 	}
@@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
 	return __get_user_inatomic(*ret, ptr);
 	return __get_user_inatomic(*ret, ptr);
 }
 }
 
 
-static inline void perf_callchain_user_64(struct pt_regs *regs,
-					  struct perf_callchain_entry *entry)
+static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
+					  struct pt_regs *regs)
 {
 {
 }
 }
 
 
@@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp,
 	return mctx->mc_gregs;
 	return mctx->mc_gregs;
 }
 }
 
 
-static void perf_callchain_user_32(struct pt_regs *regs,
-				   struct perf_callchain_entry *entry)
+static void perf_callchain_user_32(struct perf_callchain_entry *entry,
+				   struct pt_regs *regs)
 {
 {
 	unsigned int sp, next_sp;
 	unsigned int sp, next_sp;
 	unsigned int next_ip;
 	unsigned int next_ip;
@@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 	next_ip = regs->nip;
 	next_ip = regs->nip;
 	lr = regs->link;
 	lr = regs->link;
 	sp = regs->gpr[1];
 	sp = regs->gpr[1];
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, next_ip);
+	perf_callchain_store(entry, next_ip);
 
 
 	while (entry->nr < PERF_MAX_STACK_DEPTH) {
 	while (entry->nr < PERF_MAX_STACK_DEPTH) {
 		fp = (unsigned int __user *) (unsigned long) sp;
 		fp = (unsigned int __user *) (unsigned long) sp;
@@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 			    read_user_stack_32(&uregs[PT_R1], &sp))
 			    read_user_stack_32(&uregs[PT_R1], &sp))
 				return;
 				return;
 			level = 0;
 			level = 0;
-			callchain_store(entry, PERF_CONTEXT_USER);
-			callchain_store(entry, next_ip);
+			perf_callchain_store(entry, PERF_CONTEXT_USER);
+			perf_callchain_store(entry, next_ip);
 			continue;
 			continue;
 		}
 		}
 
 
 		if (level == 0)
 		if (level == 0)
 			next_ip = lr;
 			next_ip = lr;
-		callchain_store(entry, next_ip);
+		perf_callchain_store(entry, next_ip);
 		++level;
 		++level;
 		sp = next_sp;
 		sp = next_sp;
 	}
 	}
 }
 }
 
 
-/*
- * Since we can't get PMU interrupts inside a PMU interrupt handler,
- * we don't need separate irq and nmi entries here.
- */
-static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 {
-	struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain);
-
-	entry->nr = 0;
-
-	if (!user_mode(regs)) {
-		perf_callchain_kernel(regs, entry);
-		if (current->mm)
-			regs = task_pt_regs(current);
-		else
-			regs = NULL;
-	}
-
-	if (regs) {
-		if (current_is_64bit())
-			perf_callchain_user_64(regs, entry);
-		else
-			perf_callchain_user_32(regs, entry);
-	}
-
-	return entry;
+	if (current_is_64bit())
+		perf_callchain_user_64(entry, regs);
+	else
+		perf_callchain_user_32(entry, regs);
 }
 }

+ 102 - 64
arch/powerpc/kernel/perf_event.c

@@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event)
 {
 {
 	s64 val, delta, prev;
 	s64 val, delta, prev;
 
 
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
 	if (!event->hw.idx)
 	if (!event->hw.idx)
 		return;
 		return;
 	/*
 	/*
@@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
  * Disable all events to prevent PMU interrupts and to allow
  * Disable all events to prevent PMU interrupts and to allow
  * events to be added or removed.
  * events to be added or removed.
  */
  */
-void hw_perf_disable(void)
+static void power_pmu_disable(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuhw;
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
 	unsigned long flags;
@@ -565,7 +568,7 @@ void hw_perf_disable(void)
  * If we were previously disabled and events were added, then
  * If we were previously disabled and events were added, then
  * put the new config on the PMU.
  * put the new config on the PMU.
  */
  */
-void hw_perf_enable(void)
+static void power_pmu_enable(struct pmu *pmu)
 {
 {
 	struct perf_event *event;
 	struct perf_event *event;
 	struct cpu_hw_events *cpuhw;
 	struct cpu_hw_events *cpuhw;
@@ -672,6 +675,8 @@ void hw_perf_enable(void)
 		}
 		}
 		local64_set(&event->hw.prev_count, val);
 		local64_set(&event->hw.prev_count, val);
 		event->hw.idx = idx;
 		event->hw.idx = idx;
+		if (event->hw.state & PERF_HES_STOPPED)
+			val = 0;
 		write_pmc(idx, val);
 		write_pmc(idx, val);
 		perf_event_update_userpage(event);
 		perf_event_update_userpage(event);
 	}
 	}
@@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count,
  * re-enable the PMU in order to get hw_perf_enable to do the
  * re-enable the PMU in order to get hw_perf_enable to do the
  * actual work of reconfiguring the PMU.
  * actual work of reconfiguring the PMU.
  */
  */
-static int power_pmu_enable(struct perf_event *event)
+static int power_pmu_add(struct perf_event *event, int ef_flags)
 {
 {
 	struct cpu_hw_events *cpuhw;
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
 	unsigned long flags;
@@ -735,7 +740,7 @@ static int power_pmu_enable(struct perf_event *event)
 	int ret = -EAGAIN;
 	int ret = -EAGAIN;
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 
 
 	/*
 	/*
 	 * Add the event to the list (if there is room)
 	 * Add the event to the list (if there is room)
@@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event)
 	cpuhw->events[n0] = event->hw.config;
 	cpuhw->events[n0] = event->hw.config;
 	cpuhw->flags[n0] = event->hw.event_base;
 	cpuhw->flags[n0] = event->hw.event_base;
 
 
+	if (!(ef_flags & PERF_EF_START))
+		event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
 	/*
 	/*
 	 * If group events scheduling transaction was started,
 	 * If group events scheduling transaction was started,
 	 * skip the schedulability test here, it will be peformed
 	 * skip the schedulability test here, it will be peformed
@@ -769,7 +777,7 @@ nocheck:
 
 
 	ret = 0;
 	ret = 0;
  out:
  out:
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 	return ret;
 	return ret;
 }
 }
@@ -777,14 +785,14 @@ nocheck:
 /*
 /*
  * Remove a event from the PMU.
  * Remove a event from the PMU.
  */
  */
-static void power_pmu_disable(struct perf_event *event)
+static void power_pmu_del(struct perf_event *event, int ef_flags)
 {
 {
 	struct cpu_hw_events *cpuhw;
 	struct cpu_hw_events *cpuhw;
 	long i;
 	long i;
 	unsigned long flags;
 	unsigned long flags;
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 
 
 	power_pmu_read(event);
 	power_pmu_read(event);
 
 
@@ -821,34 +829,60 @@ static void power_pmu_disable(struct perf_event *event)
 		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
 		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
 	}
 	}
 
 
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 }
 }
 
 
 /*
 /*
- * Re-enable interrupts on a event after they were throttled
- * because they were coming too fast.
+ * POWER-PMU does not support disabling individual counters, hence
+ * program their cycle counter to their max value and ignore the interrupts.
  */
  */
-static void power_pmu_unthrottle(struct perf_event *event)
+
+static void power_pmu_start(struct perf_event *event, int ef_flags)
+{
+	unsigned long flags;
+	s64 left;
+
+	if (!event->hw.idx || !event->hw.sample_period)
+		return;
+
+	if (!(event->hw.state & PERF_HES_STOPPED))
+		return;
+
+	if (ef_flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = 0;
+	left = local64_read(&event->hw.period_left);
+	write_pmc(event->hw.idx, left);
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	local_irq_restore(flags);
+}
+
+static void power_pmu_stop(struct perf_event *event, int ef_flags)
 {
 {
-	s64 val, left;
 	unsigned long flags;
 	unsigned long flags;
 
 
 	if (!event->hw.idx || !event->hw.sample_period)
 	if (!event->hw.idx || !event->hw.sample_period)
 		return;
 		return;
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
 	local_irq_save(flags);
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
+
 	power_pmu_read(event);
 	power_pmu_read(event);
-	left = event->hw.sample_period;
-	event->hw.last_period = left;
-	val = 0;
-	if (left < 0x80000000L)
-		val = 0x80000000L - left;
-	write_pmc(event->hw.idx, val);
-	local64_set(&event->hw.prev_count, val);
-	local64_set(&event->hw.period_left, left);
+	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	write_pmc(event->hw.idx, 0);
+
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 }
 }
 
 
@@ -857,10 +891,11 @@ static void power_pmu_unthrottle(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  * schedulability test, it will be performed at commit time
  */
  */
-void power_pmu_start_txn(const struct pmu *pmu)
+void power_pmu_start_txn(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 
+	perf_pmu_disable(pmu);
 	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->n_txn_start = cpuhw->n_events;
 	cpuhw->n_txn_start = cpuhw->n_events;
 }
 }
@@ -870,11 +905,12 @@ void power_pmu_start_txn(const struct pmu *pmu)
  * Clear the flag and pmu::enable() will perform the
  * Clear the flag and pmu::enable() will perform the
  * schedulability test.
  * schedulability test.
  */
  */
-void power_pmu_cancel_txn(const struct pmu *pmu)
+void power_pmu_cancel_txn(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
+	perf_pmu_enable(pmu);
 }
 }
 
 
 /*
 /*
@@ -882,7 +918,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
  * Perform the group schedulability test as a whole
  * Perform the group schedulability test as a whole
  * Return 0 if success
  * Return 0 if success
  */
  */
-int power_pmu_commit_txn(const struct pmu *pmu)
+int power_pmu_commit_txn(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuhw;
 	struct cpu_hw_events *cpuhw;
 	long i, n;
 	long i, n;
@@ -901,19 +937,10 @@ int power_pmu_commit_txn(const struct pmu *pmu)
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
+	perf_pmu_enable(pmu);
 	return 0;
 	return 0;
 }
 }
 
 
-struct pmu power_pmu = {
-	.enable		= power_pmu_enable,
-	.disable	= power_pmu_disable,
-	.read		= power_pmu_read,
-	.unthrottle	= power_pmu_unthrottle,
-	.start_txn	= power_pmu_start_txn,
-	.cancel_txn	= power_pmu_cancel_txn,
-	.commit_txn	= power_pmu_commit_txn,
-};
-
 /*
 /*
  * Return 1 if we might be able to put event on a limited PMC,
  * Return 1 if we might be able to put event on a limited PMC,
  * or 0 if not.
  * or 0 if not.
@@ -1014,7 +1041,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
 	return 0;
 	return 0;
 }
 }
 
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+static int power_pmu_event_init(struct perf_event *event)
 {
 {
 	u64 ev;
 	u64 ev;
 	unsigned long flags;
 	unsigned long flags;
@@ -1026,25 +1053,27 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 	struct cpu_hw_events *cpuhw;
 	struct cpu_hw_events *cpuhw;
 
 
 	if (!ppmu)
 	if (!ppmu)
-		return ERR_PTR(-ENXIO);
+		return -ENOENT;
+
 	switch (event->attr.type) {
 	switch (event->attr.type) {
 	case PERF_TYPE_HARDWARE:
 	case PERF_TYPE_HARDWARE:
 		ev = event->attr.config;
 		ev = event->attr.config;
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-			return ERR_PTR(-EOPNOTSUPP);
+			return -EOPNOTSUPP;
 		ev = ppmu->generic_events[ev];
 		ev = ppmu->generic_events[ev];
 		break;
 		break;
 	case PERF_TYPE_HW_CACHE:
 	case PERF_TYPE_HW_CACHE:
 		err = hw_perf_cache_event(event->attr.config, &ev);
 		err = hw_perf_cache_event(event->attr.config, &ev);
 		if (err)
 		if (err)
-			return ERR_PTR(err);
+			return err;
 		break;
 		break;
 	case PERF_TYPE_RAW:
 	case PERF_TYPE_RAW:
 		ev = event->attr.config;
 		ev = event->attr.config;
 		break;
 		break;
 	default:
 	default:
-		return ERR_PTR(-EINVAL);
+		return -ENOENT;
 	}
 	}
+
 	event->hw.config_base = ev;
 	event->hw.config_base = ev;
 	event->hw.idx = 0;
 	event->hw.idx = 0;
 
 
@@ -1063,7 +1092,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 	 * XXX we should check if the task is an idle task.
 	 * XXX we should check if the task is an idle task.
 	 */
 	 */
 	flags = 0;
 	flags = 0;
-	if (event->ctx->task)
+	if (event->attach_state & PERF_ATTACH_TASK)
 		flags |= PPMU_ONLY_COUNT_RUN;
 		flags |= PPMU_ONLY_COUNT_RUN;
 
 
 	/*
 	/*
@@ -1081,7 +1110,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 			 */
 			 */
 			ev = normal_pmc_alternative(ev, flags);
 			ev = normal_pmc_alternative(ev, flags);
 			if (!ev)
 			if (!ev)
-				return ERR_PTR(-EINVAL);
+				return -EINVAL;
 		}
 		}
 	}
 	}
 
 
@@ -1095,19 +1124,19 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 		n = collect_events(event->group_leader, ppmu->n_counter - 1,
 		n = collect_events(event->group_leader, ppmu->n_counter - 1,
 				   ctrs, events, cflags);
 				   ctrs, events, cflags);
 		if (n < 0)
 		if (n < 0)
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 	}
 	}
 	events[n] = ev;
 	events[n] = ev;
 	ctrs[n] = event;
 	ctrs[n] = event;
 	cflags[n] = flags;
 	cflags[n] = flags;
 	if (check_excludes(ctrs, cflags, n, 1))
 	if (check_excludes(ctrs, cflags, n, 1))
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 
 	cpuhw = &get_cpu_var(cpu_hw_events);
 	cpuhw = &get_cpu_var(cpu_hw_events);
 	err = power_check_constraints(cpuhw, events, cflags, n + 1);
 	err = power_check_constraints(cpuhw, events, cflags, n + 1);
 	put_cpu_var(cpu_hw_events);
 	put_cpu_var(cpu_hw_events);
 	if (err)
 	if (err)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 
 	event->hw.config = events[n];
 	event->hw.config = events[n];
 	event->hw.event_base = cflags[n];
 	event->hw.event_base = cflags[n];
@@ -1132,11 +1161,23 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 	}
 	}
 	event->destroy = hw_perf_event_destroy;
 	event->destroy = hw_perf_event_destroy;
 
 
-	if (err)
-		return ERR_PTR(err);
-	return &power_pmu;
+	return err;
 }
 }
 
 
+struct pmu power_pmu = {
+	.pmu_enable	= power_pmu_enable,
+	.pmu_disable	= power_pmu_disable,
+	.event_init	= power_pmu_event_init,
+	.add		= power_pmu_add,
+	.del		= power_pmu_del,
+	.start		= power_pmu_start,
+	.stop		= power_pmu_stop,
+	.read		= power_pmu_read,
+	.start_txn	= power_pmu_start_txn,
+	.cancel_txn	= power_pmu_cancel_txn,
+	.commit_txn	= power_pmu_commit_txn,
+};
+
 /*
 /*
  * A counter has overflowed; update its count and record
  * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
  * things if requested.  Note that interrupts are hard-disabled
@@ -1149,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	s64 prev, delta, left;
 	s64 prev, delta, left;
 	int record = 0;
 	int record = 0;
 
 
+	if (event->hw.state & PERF_HES_STOPPED) {
+		write_pmc(event->hw.idx, 0);
+		return;
+	}
+
 	/* we don't have to worry about interrupts here */
 	/* we don't have to worry about interrupts here */
 	prev = local64_read(&event->hw.prev_count);
 	prev = local64_read(&event->hw.prev_count);
 	delta = (val - prev) & 0xfffffffful;
 	delta = (val - prev) & 0xfffffffful;
@@ -1171,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 			val = 0x80000000LL - left;
 			val = 0x80000000LL - left;
 	}
 	}
 
 
+	write_pmc(event->hw.idx, val);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+
 	/*
 	/*
 	 * Finally record data if requested.
 	 * Finally record data if requested.
 	 */
 	 */
@@ -1183,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
 		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
 			perf_get_data_addr(regs, &data.addr);
 			perf_get_data_addr(regs, &data.addr);
 
 
-		if (perf_event_overflow(event, nmi, &data, regs)) {
-			/*
-			 * Interrupts are coming too fast - throttle them
-			 * by setting the event to 0, so it will be
-			 * at least 2^30 cycles until the next interrupt
-			 * (assuming each event counts at most 2 counts
-			 * per cycle).
-			 */
-			val = 0;
-			left = ~0ULL >> 1;
-		}
+		if (perf_event_overflow(event, nmi, &data, regs))
+			power_pmu_stop(event, 0);
 	}
 	}
-
-	write_pmc(event->hw.idx, val);
-	local64_set(&event->hw.prev_count, val);
-	local64_set(&event->hw.period_left, left);
-	perf_event_update_userpage(event);
 }
 }
 
 
 /*
 /*
@@ -1342,6 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
 		freeze_events_kernel = MMCR0_FCHV;
 		freeze_events_kernel = MMCR0_FCHV;
 #endif /* CONFIG_PPC64 */
 #endif /* CONFIG_PPC64 */
 
 
+	perf_pmu_register(&power_pmu);
 	perf_cpu_notifier(power_pmu_notifier);
 	perf_cpu_notifier(power_pmu_notifier);
 
 
 	return 0;
 	return 0;

+ 90 - 58
arch/powerpc/kernel/perf_event_fsl_emb.c

@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event)
 {
 {
 	s64 val, delta, prev;
 	s64 val, delta, prev;
 
 
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
 	/*
 	/*
 	 * Performance monitor interrupts come even when interrupts
 	 * Performance monitor interrupts come even when interrupts
 	 * are soft-disabled, as long as interrupts are hard-enabled.
 	 * are soft-disabled, as long as interrupts are hard-enabled.
@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
  * Disable all events to prevent PMU interrupts and to allow
  * Disable all events to prevent PMU interrupts and to allow
  * events to be added or removed.
  * events to be added or removed.
  */
  */
-void hw_perf_disable(void)
+static void fsl_emb_pmu_disable(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuhw;
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
 	unsigned long flags;
@@ -216,7 +219,7 @@ void hw_perf_disable(void)
  * If we were previously disabled and events were added, then
  * If we were previously disabled and events were added, then
  * put the new config on the PMU.
  * put the new config on the PMU.
  */
  */
-void hw_perf_enable(void)
+static void fsl_emb_pmu_enable(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuhw;
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
 	unsigned long flags;
@@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count,
 	return n;
 	return n;
 }
 }
 
 
-/* perf must be disabled, context locked on entry */
-static int fsl_emb_pmu_enable(struct perf_event *event)
+/* context locked on entry */
+static int fsl_emb_pmu_add(struct perf_event *event, int flags)
 {
 {
 	struct cpu_hw_events *cpuhw;
 	struct cpu_hw_events *cpuhw;
 	int ret = -EAGAIN;
 	int ret = -EAGAIN;
@@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 	u64 val;
 	u64 val;
 	int i;
 	int i;
 
 
+	perf_pmu_disable(event->pmu);
 	cpuhw = &get_cpu_var(cpu_hw_events);
 	cpuhw = &get_cpu_var(cpu_hw_events);
 
 
 	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
 	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
@@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 			val = 0x80000000L - left;
 			val = 0x80000000L - left;
 	}
 	}
 	local64_set(&event->hw.prev_count, val);
 	local64_set(&event->hw.prev_count, val);
+
+	if (!(flags & PERF_EF_START)) {
+		event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+		val = 0;
+	}
+
 	write_pmc(i, val);
 	write_pmc(i, val);
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
 
 
@@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 	ret = 0;
 	ret = 0;
  out:
  out:
 	put_cpu_var(cpu_hw_events);
 	put_cpu_var(cpu_hw_events);
+	perf_pmu_enable(event->pmu);
 	return ret;
 	return ret;
 }
 }
 
 
-/* perf must be disabled, context locked on entry */
-static void fsl_emb_pmu_disable(struct perf_event *event)
+/* context locked on entry */
+static void fsl_emb_pmu_del(struct perf_event *event, int flags)
 {
 {
 	struct cpu_hw_events *cpuhw;
 	struct cpu_hw_events *cpuhw;
 	int i = event->hw.idx;
 	int i = event->hw.idx;
 
 
+	perf_pmu_disable(event->pmu);
 	if (i < 0)
 	if (i < 0)
 		goto out;
 		goto out;
 
 
@@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
 	cpuhw->n_events--;
 	cpuhw->n_events--;
 
 
  out:
  out:
+	perf_pmu_enable(event->pmu);
 	put_cpu_var(cpu_hw_events);
 	put_cpu_var(cpu_hw_events);
 }
 }
 
 
-/*
- * Re-enable interrupts on a event after they were throttled
- * because they were coming too fast.
- *
- * Context is locked on entry, but perf is not disabled.
- */
-static void fsl_emb_pmu_unthrottle(struct perf_event *event)
+static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
 {
 {
-	s64 val, left;
 	unsigned long flags;
 	unsigned long flags;
+	s64 left;
 
 
 	if (event->hw.idx < 0 || !event->hw.sample_period)
 	if (event->hw.idx < 0 || !event->hw.sample_period)
 		return;
 		return;
+
+	if (!(event->hw.state & PERF_HES_STOPPED))
+		return;
+
+	if (ef_flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
 	local_irq_save(flags);
 	local_irq_save(flags);
-	perf_disable();
-	fsl_emb_pmu_read(event);
-	left = event->hw.sample_period;
-	event->hw.last_period = left;
-	val = 0;
-	if (left < 0x80000000L)
-		val = 0x80000000L - left;
-	write_pmc(event->hw.idx, val);
-	local64_set(&event->hw.prev_count, val);
-	local64_set(&event->hw.period_left, left);
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = 0;
+	left = local64_read(&event->hw.period_left);
+	write_pmc(event->hw.idx, left);
+
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 }
 }
 
 
-static struct pmu fsl_emb_pmu = {
-	.enable		= fsl_emb_pmu_enable,
-	.disable	= fsl_emb_pmu_disable,
-	.read		= fsl_emb_pmu_read,
-	.unthrottle	= fsl_emb_pmu_unthrottle,
-};
+static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
+{
+	unsigned long flags;
+
+	if (event->hw.idx < 0 || !event->hw.sample_period)
+		return;
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+
+	fsl_emb_pmu_read(event);
+	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	write_pmc(event->hw.idx, 0);
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	local_irq_restore(flags);
+}
 
 
 /*
 /*
  * Release the PMU if this is the last perf_event.
  * Release the PMU if this is the last perf_event.
@@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
 	return 0;
 	return 0;
 }
 }
 
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+static int fsl_emb_pmu_event_init(struct perf_event *event)
 {
 {
 	u64 ev;
 	u64 ev;
 	struct perf_event *events[MAX_HWEVENTS];
 	struct perf_event *events[MAX_HWEVENTS];
@@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 	case PERF_TYPE_HARDWARE:
 	case PERF_TYPE_HARDWARE:
 		ev = event->attr.config;
 		ev = event->attr.config;
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-			return ERR_PTR(-EOPNOTSUPP);
+			return -EOPNOTSUPP;
 		ev = ppmu->generic_events[ev];
 		ev = ppmu->generic_events[ev];
 		break;
 		break;
 
 
 	case PERF_TYPE_HW_CACHE:
 	case PERF_TYPE_HW_CACHE:
 		err = hw_perf_cache_event(event->attr.config, &ev);
 		err = hw_perf_cache_event(event->attr.config, &ev);
 		if (err)
 		if (err)
-			return ERR_PTR(err);
+			return err;
 		break;
 		break;
 
 
 	case PERF_TYPE_RAW:
 	case PERF_TYPE_RAW:
@@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 		break;
 		break;
 
 
 	default:
 	default:
-		return ERR_PTR(-EINVAL);
+		return -ENOENT;
 	}
 	}
 
 
 	event->hw.config = ppmu->xlate_event(ev);
 	event->hw.config = ppmu->xlate_event(ev);
 	if (!(event->hw.config & FSL_EMB_EVENT_VALID))
 	if (!(event->hw.config & FSL_EMB_EVENT_VALID))
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 
 	/*
 	/*
 	 * If this is in a group, check if it can go on with all the
 	 * If this is in a group, check if it can go on with all the
@@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 		n = collect_events(event->group_leader,
 		n = collect_events(event->group_leader,
 		                   ppmu->n_counter - 1, events);
 		                   ppmu->n_counter - 1, events);
 		if (n < 0)
 		if (n < 0)
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 	}
 	}
 
 
 	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
 	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
@@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 		}
 		}
 
 
 		if (num_restricted >= ppmu->n_restricted)
 		if (num_restricted >= ppmu->n_restricted)
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 	}
 	}
 
 
 	event->hw.idx = -1;
 	event->hw.idx = -1;
@@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 	if (event->attr.exclude_kernel)
 	if (event->attr.exclude_kernel)
 		event->hw.config_base |= PMLCA_FCS;
 		event->hw.config_base |= PMLCA_FCS;
 	if (event->attr.exclude_idle)
 	if (event->attr.exclude_idle)
-		return ERR_PTR(-ENOTSUPP);
+		return -ENOTSUPP;
 
 
 	event->hw.last_period = event->hw.sample_period;
 	event->hw.last_period = event->hw.sample_period;
 	local64_set(&event->hw.period_left, event->hw.last_period);
 	local64_set(&event->hw.period_left, event->hw.last_period);
@@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 	}
 	}
 	event->destroy = hw_perf_event_destroy;
 	event->destroy = hw_perf_event_destroy;
 
 
-	if (err)
-		return ERR_PTR(err);
-	return &fsl_emb_pmu;
+	return err;
 }
 }
 
 
+static struct pmu fsl_emb_pmu = {
+	.pmu_enable	= fsl_emb_pmu_enable,
+	.pmu_disable	= fsl_emb_pmu_disable,
+	.event_init	= fsl_emb_pmu_event_init,
+	.add		= fsl_emb_pmu_add,
+	.del		= fsl_emb_pmu_del,
+	.start		= fsl_emb_pmu_start,
+	.stop		= fsl_emb_pmu_stop,
+	.read		= fsl_emb_pmu_read,
+};
+
 /*
 /*
  * A counter has overflowed; update its count and record
  * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
  * things if requested.  Note that interrupts are hard-disabled
@@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	s64 prev, delta, left;
 	s64 prev, delta, left;
 	int record = 0;
 	int record = 0;
 
 
+	if (event->hw.state & PERF_HES_STOPPED) {
+		write_pmc(event->hw.idx, 0);
+		return;
+	}
+
 	/* we don't have to worry about interrupts here */
 	/* we don't have to worry about interrupts here */
 	prev = local64_read(&event->hw.prev_count);
 	prev = local64_read(&event->hw.prev_count);
 	delta = (val - prev) & 0xfffffffful;
 	delta = (val - prev) & 0xfffffffful;
@@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 			val = 0x80000000LL - left;
 			val = 0x80000000LL - left;
 	}
 	}
 
 
+	write_pmc(event->hw.idx, val);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+
 	/*
 	/*
 	 * Finally record data if requested.
 	 * Finally record data if requested.
 	 */
 	 */
@@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 		perf_sample_data_init(&data, 0);
 		perf_sample_data_init(&data, 0);
 		data.period = event->hw.last_period;
 		data.period = event->hw.last_period;
 
 
-		if (perf_event_overflow(event, nmi, &data, regs)) {
-			/*
-			 * Interrupts are coming too fast - throttle them
-			 * by setting the event to 0, so it will be
-			 * at least 2^30 cycles until the next interrupt
-			 * (assuming each event counts at most 2 counts
-			 * per cycle).
-			 */
-			val = 0;
-			left = ~0ULL >> 1;
-		}
+		if (perf_event_overflow(event, nmi, &data, regs))
+			fsl_emb_pmu_stop(event, 0);
 	}
 	}
-
-	write_pmc(event->hw.idx, val);
-	local64_set(&event->hw.prev_count, val);
-	local64_set(&event->hw.period_left, left);
-	perf_event_update_userpage(event);
 }
 }
 
 
 static void perf_event_interrupt(struct pt_regs *regs)
 static void perf_event_interrupt(struct pt_regs *regs)
@@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
 	pr_info("%s performance monitor hardware support registered\n",
 	pr_info("%s performance monitor hardware support registered\n",
 		pmu->name);
 		pmu->name);
 
 
+	perf_pmu_register(&fsl_emb_pmu);
+
 	return 0;
 	return 0;
 }
 }

+ 21 - 21
arch/powerpc/kernel/time.c

@@ -53,7 +53,7 @@
 #include <linux/posix-timers.h>
 #include <linux/posix-timers.h>
 #include <linux/irq.h>
 #include <linux/irq.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
 #include <asm/trace.h>
 #include <asm/trace.h>
 
 
 #include <asm/io.h>
 #include <asm/io.h>
@@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void)
 }
 }
 #endif /* CONFIG_PPC_ISERIES */
 #endif /* CONFIG_PPC_ISERIES */
 
 
-#ifdef CONFIG_PERF_EVENTS
+#ifdef CONFIG_IRQ_WORK
 
 
 /*
 /*
  * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
  * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
  */
  */
 #ifdef CONFIG_PPC64
 #ifdef CONFIG_PPC64
-static inline unsigned long test_perf_event_pending(void)
+static inline unsigned long test_irq_work_pending(void)
 {
 {
 	unsigned long x;
 	unsigned long x;
 
 
 	asm volatile("lbz %0,%1(13)"
 	asm volatile("lbz %0,%1(13)"
 		: "=r" (x)
 		: "=r" (x)
-		: "i" (offsetof(struct paca_struct, perf_event_pending)));
+		: "i" (offsetof(struct paca_struct, irq_work_pending)));
 	return x;
 	return x;
 }
 }
 
 
-static inline void set_perf_event_pending_flag(void)
+static inline void set_irq_work_pending_flag(void)
 {
 {
 	asm volatile("stb %0,%1(13)" : :
 	asm volatile("stb %0,%1(13)" : :
 		"r" (1),
 		"r" (1),
-		"i" (offsetof(struct paca_struct, perf_event_pending)));
+		"i" (offsetof(struct paca_struct, irq_work_pending)));
 }
 }
 
 
-static inline void clear_perf_event_pending(void)
+static inline void clear_irq_work_pending(void)
 {
 {
 	asm volatile("stb %0,%1(13)" : :
 	asm volatile("stb %0,%1(13)" : :
 		"r" (0),
 		"r" (0),
-		"i" (offsetof(struct paca_struct, perf_event_pending)));
+		"i" (offsetof(struct paca_struct, irq_work_pending)));
 }
 }
 
 
 #else /* 32-bit */
 #else /* 32-bit */
 
 
-DEFINE_PER_CPU(u8, perf_event_pending);
+DEFINE_PER_CPU(u8, irq_work_pending);
 
 
-#define set_perf_event_pending_flag()	__get_cpu_var(perf_event_pending) = 1
-#define test_perf_event_pending()	__get_cpu_var(perf_event_pending)
-#define clear_perf_event_pending()	__get_cpu_var(perf_event_pending) = 0
+#define set_irq_work_pending_flag()	__get_cpu_var(irq_work_pending) = 1
+#define test_irq_work_pending()		__get_cpu_var(irq_work_pending)
+#define clear_irq_work_pending()	__get_cpu_var(irq_work_pending) = 0
 
 
 #endif /* 32 vs 64 bit */
 #endif /* 32 vs 64 bit */
 
 
-void set_perf_event_pending(void)
+void set_irq_work_pending(void)
 {
 {
 	preempt_disable();
 	preempt_disable();
-	set_perf_event_pending_flag();
+	set_irq_work_pending_flag();
 	set_dec(1);
 	set_dec(1);
 	preempt_enable();
 	preempt_enable();
 }
 }
 
 
-#else  /* CONFIG_PERF_EVENTS */
+#else  /* CONFIG_IRQ_WORK */
 
 
-#define test_perf_event_pending()	0
-#define clear_perf_event_pending()
+#define test_irq_work_pending()	0
+#define clear_irq_work_pending()
 
 
-#endif /* CONFIG_PERF_EVENTS */
+#endif /* CONFIG_IRQ_WORK */
 
 
 /*
 /*
  * For iSeries shared processors, we have to let the hypervisor
  * For iSeries shared processors, we have to let the hypervisor
@@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs)
 
 
 	calculate_steal_time();
 	calculate_steal_time();
 
 
-	if (test_perf_event_pending()) {
-		clear_perf_event_pending();
-		perf_event_do_pending();
+	if (test_irq_work_pending()) {
+		clear_irq_work_pending();
+		irq_work_run();
 	}
 	}
 
 
 #ifdef CONFIG_PPC_ISERIES
 #ifdef CONFIG_PPC_ISERIES

+ 1 - 0
arch/s390/Kconfig

@@ -95,6 +95,7 @@ config S390
 	select HAVE_KVM if 64BIT
 	select HAVE_KVM if 64BIT
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRACEHOOK
 	select INIT_ALL_POSSIBLE
 	select INIT_ALL_POSSIBLE
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_BZIP2

+ 0 - 4
arch/s390/include/asm/hardirq.h

@@ -12,10 +12,6 @@
 #ifndef __ASM_HARDIRQ_H
 #ifndef __ASM_HARDIRQ_H
 #define __ASM_HARDIRQ_H
 #define __ASM_HARDIRQ_H
 
 
-#include <linux/threads.h>
-#include <linux/sched.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
 #include <asm/lowcore.h>
 #include <asm/lowcore.h>
 
 
 #define local_softirq_pending() (S390_lowcore.softirq_pending)
 #define local_softirq_pending() (S390_lowcore.softirq_pending)

+ 1 - 2
arch/s390/include/asm/perf_event.h

@@ -4,7 +4,6 @@
  * Copyright 2009 Martin Schwidefsky, IBM Corporation.
  * Copyright 2009 Martin Schwidefsky, IBM Corporation.
  */
  */
 
 
-static inline void set_perf_event_pending(void) {}
-static inline void clear_perf_event_pending(void) {}
+/* Empty, just to avoid compiling error */
 
 
 #define PERF_EVENT_INDEX_OFFSET 0
 #define PERF_EVENT_INDEX_OFFSET 0

+ 14 - 0
arch/sh/Kconfig

@@ -16,6 +16,7 @@ config SUPERH
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_ATTRS
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select PERF_USE_VMALLOC
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_GZIP
@@ -249,6 +250,11 @@ config ARCH_SHMOBILE
 	select PM
 	select PM
 	select PM_RUNTIME
 	select PM_RUNTIME
 
 
+config CPU_HAS_PMU
+       depends on CPU_SH4 || CPU_SH4A
+       default y
+       bool
+
 if SUPERH32
 if SUPERH32
 
 
 choice
 choice
@@ -738,6 +744,14 @@ config GUSA_RB
 	  LLSC, this should be more efficient than the other alternative of
 	  LLSC, this should be more efficient than the other alternative of
 	  disabling interrupts around the atomic sequence.
 	  disabling interrupts around the atomic sequence.
 
 
+config HW_PERF_EVENTS
+	bool "Enable hardware performance counter support for perf events"
+	depends on PERF_EVENTS && CPU_HAS_PMU
+	default y
+	help
+	  Enable hardware performance counter support for perf events. If
+	  disabled, perf events will use software events only.
+
 source "drivers/sh/Kconfig"
 source "drivers/sh/Kconfig"
 
 
 endmenu
 endmenu

+ 0 - 7
arch/sh/include/asm/perf_event.h

@@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu *);
 extern int reserve_pmc_hardware(void);
 extern int reserve_pmc_hardware(void);
 extern void release_pmc_hardware(void);
 extern void release_pmc_hardware(void);
 
 
-static inline void set_perf_event_pending(void)
-{
-	/* Nothing to see here, move along. */
-}
-
-#define PERF_EVENT_INDEX_OFFSET	0
-
 #endif /* __ASM_SH_PERF_EVENT_H */
 #endif /* __ASM_SH_PERF_EVENT_H */

+ 4 - 46
arch/sh/kernel/perf_callchain.c

@@ -14,11 +14,6 @@
 #include <asm/unwinder.h>
 #include <asm/unwinder.h>
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 
 
-static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-	if (entry->nr < PERF_MAX_STACK_DEPTH)
-		entry->ip[entry->nr++] = ip;
-}
 
 
 static void callchain_warning(void *data, char *msg)
 static void callchain_warning(void *data, char *msg)
 {
 {
@@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable)
 	struct perf_callchain_entry *entry = data;
 	struct perf_callchain_entry *entry = data;
 
 
 	if (reliable)
 	if (reliable)
-		callchain_store(entry, addr);
+		perf_callchain_store(entry, addr);
 }
 }
 
 
 static const struct stacktrace_ops callchain_ops = {
 static const struct stacktrace_ops callchain_ops = {
@@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = {
 	.address	= callchain_address,
 	.address	= callchain_address,
 };
 };
 
 
-static void
-perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 {
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
-	callchain_store(entry, regs->pc);
+	perf_callchain_store(entry, regs->pc);
 
 
 	unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
 	unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
 }
 }
-
-static void
-perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	int is_user;
-
-	if (!regs)
-		return;
-
-	is_user = user_mode(regs);
-
-	if (is_user && current->state != TASK_RUNNING)
-		return;
-
-	/*
-	 * Only the kernel side is implemented for now.
-	 */
-	if (!is_user)
-		perf_callchain_kernel(regs, entry);
-}
-
-/*
- * No need for separate IRQ and NMI entries.
- */
-static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-	struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
-
-	entry->nr = 0;
-
-	perf_do_callchain(regs, entry);
-
-	return entry;
-}

+ 112 - 47
arch/sh/kernel/perf_event.c

@@ -59,6 +59,24 @@ static inline int sh_pmu_initialized(void)
 	return !!sh_pmu;
 	return !!sh_pmu;
 }
 }
 
 
+const char *perf_pmu_name(void)
+{
+	if (!sh_pmu)
+		return NULL;
+
+	return sh_pmu->name;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+	if (!sh_pmu)
+		return 0;
+
+	return sh_pmu->num_events;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
 /*
 /*
  * Release the PMU if this is the last perf_event.
  * Release the PMU if this is the last perf_event.
  */
  */
@@ -206,50 +224,80 @@ again:
 	local64_add(delta, &event->count);
 	local64_add(delta, &event->count);
 }
 }
 
 
-static void sh_pmu_disable(struct perf_event *event)
+static void sh_pmu_stop(struct perf_event *event, int flags)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 	int idx = hwc->idx;
 
 
-	clear_bit(idx, cpuc->active_mask);
-	sh_pmu->disable(hwc, idx);
+	if (!(event->hw.state & PERF_HES_STOPPED)) {
+		sh_pmu->disable(hwc, idx);
+		cpuc->events[idx] = NULL;
+		event->hw.state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+		sh_perf_event_update(event, &event->hw, idx);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
+
+static void sh_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
 
 
-	barrier();
+	cpuc->events[idx] = event;
+	event->hw.state = 0;
+	sh_pmu->enable(hwc, idx);
+}
 
 
-	sh_perf_event_update(event, &event->hw, idx);
+static void sh_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 
-	cpuc->events[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
+	sh_pmu_stop(event, PERF_EF_UPDATE);
+	__clear_bit(event->hw.idx, cpuc->used_mask);
 
 
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
 }
 }
 
 
-static int sh_pmu_enable(struct perf_event *event)
+static int sh_pmu_add(struct perf_event *event, int flags)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 	int idx = hwc->idx;
+	int ret = -EAGAIN;
+
+	perf_pmu_disable(event->pmu);
 
 
-	if (test_and_set_bit(idx, cpuc->used_mask)) {
+	if (__test_and_set_bit(idx, cpuc->used_mask)) {
 		idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
 		idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
 		if (idx == sh_pmu->num_events)
 		if (idx == sh_pmu->num_events)
-			return -EAGAIN;
+			goto out;
 
 
-		set_bit(idx, cpuc->used_mask);
+		__set_bit(idx, cpuc->used_mask);
 		hwc->idx = idx;
 		hwc->idx = idx;
 	}
 	}
 
 
 	sh_pmu->disable(hwc, idx);
 	sh_pmu->disable(hwc, idx);
 
 
-	cpuc->events[idx] = event;
-	set_bit(idx, cpuc->active_mask);
-
-	sh_pmu->enable(hwc, idx);
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (flags & PERF_EF_START)
+		sh_pmu_start(event, PERF_EF_RELOAD);
 
 
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
-
-	return 0;
+	ret = 0;
+out:
+	perf_pmu_enable(event->pmu);
+	return ret;
 }
 }
 
 
 static void sh_pmu_read(struct perf_event *event)
 static void sh_pmu_read(struct perf_event *event)
@@ -257,24 +305,56 @@ static void sh_pmu_read(struct perf_event *event)
 	sh_perf_event_update(event, &event->hw, event->hw.idx);
 	sh_perf_event_update(event, &event->hw, event->hw.idx);
 }
 }
 
 
-static const struct pmu pmu = {
-	.enable		= sh_pmu_enable,
-	.disable	= sh_pmu_disable,
-	.read		= sh_pmu_read,
-};
-
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+static int sh_pmu_event_init(struct perf_event *event)
 {
 {
-	int err = __hw_perf_event_init(event);
+	int err;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HW_CACHE:
+	case PERF_TYPE_HARDWARE:
+		err = __hw_perf_event_init(event);
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
 	if (unlikely(err)) {
 	if (unlikely(err)) {
 		if (event->destroy)
 		if (event->destroy)
 			event->destroy(event);
 			event->destroy(event);
-		return ERR_PTR(err);
 	}
 	}
 
 
-	return &pmu;
+	return err;
+}
+
+static void sh_pmu_enable(struct pmu *pmu)
+{
+	if (!sh_pmu_initialized())
+		return;
+
+	sh_pmu->enable_all();
+}
+
+static void sh_pmu_disable(struct pmu *pmu)
+{
+	if (!sh_pmu_initialized())
+		return;
+
+	sh_pmu->disable_all();
 }
 }
 
 
+static struct pmu pmu = {
+	.pmu_enable	= sh_pmu_enable,
+	.pmu_disable	= sh_pmu_disable,
+	.event_init	= sh_pmu_event_init,
+	.add		= sh_pmu_add,
+	.del		= sh_pmu_del,
+	.start		= sh_pmu_start,
+	.stop		= sh_pmu_stop,
+	.read		= sh_pmu_read,
+};
+
 static void sh_pmu_setup(int cpu)
 static void sh_pmu_setup(int cpu)
 {
 {
 	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
 	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
@@ -299,32 +379,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 	return NOTIFY_OK;
 	return NOTIFY_OK;
 }
 }
 
 
-void hw_perf_enable(void)
-{
-	if (!sh_pmu_initialized())
-		return;
-
-	sh_pmu->enable_all();
-}
-
-void hw_perf_disable(void)
-{
-	if (!sh_pmu_initialized())
-		return;
-
-	sh_pmu->disable_all();
-}
-
-int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
+int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
 {
 {
 	if (sh_pmu)
 	if (sh_pmu)
 		return -EBUSY;
 		return -EBUSY;
-	sh_pmu = pmu;
+	sh_pmu = _pmu;
 
 
-	pr_info("Performance Events: %s support registered\n", pmu->name);
+	pr_info("Performance Events: %s support registered\n", _pmu->name);
 
 
-	WARN_ON(pmu->num_events > MAX_HWEVENTS);
+	WARN_ON(_pmu->num_events > MAX_HWEVENTS);
 
 
+	perf_pmu_register(&pmu);
 	perf_cpu_notifier(sh_pmu_notifier);
 	perf_cpu_notifier(sh_pmu_notifier);
 	return 0;
 	return 0;
 }
 }

+ 4 - 0
arch/sh/oprofile/Makefile

@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		oprofilefs.o oprofile_stats.o \
 		oprofilefs.o oprofile_stats.o \
 		timer_int.o )
 		timer_int.o )
 
 
+ifeq ($(CONFIG_HW_PERF_EVENTS),y)
+DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
+endif
+
 oprofile-y	:= $(DRIVER_OBJS) common.o backtrace.o
 oprofile-y	:= $(DRIVER_OBJS) common.o backtrace.o

+ 23 - 92
arch/sh/oprofile/common.c

@@ -17,114 +17,45 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/smp.h>
 #include <linux/smp.h>
+#include <linux/perf_event.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
-#include "op_impl.h"
-
-static struct op_sh_model *model;
-
-static struct op_counter_config ctr[20];
 
 
+#ifdef CONFIG_HW_PERF_EVENTS
 extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
 extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
 
 
-static int op_sh_setup(void)
-{
-	/* Pre-compute the values to stuff in the hardware registers.  */
-	model->reg_setup(ctr);
-
-	/* Configure the registers on all cpus.  */
-	on_each_cpu(model->cpu_setup, NULL, 1);
-
-        return 0;
-}
-
-static int op_sh_create_files(struct super_block *sb, struct dentry *root)
+char *op_name_from_perf_id(void)
 {
 {
-	int i, ret = 0;
+	const char *pmu;
+	char buf[20];
+	int size;
 
 
-	for (i = 0; i < model->num_counters; i++) {
-		struct dentry *dir;
-		char buf[4];
+	pmu = perf_pmu_name();
+	if (!pmu)
+		return NULL;
 
 
-		snprintf(buf, sizeof(buf), "%d", i);
-		dir = oprofilefs_mkdir(sb, root, buf);
+	size = snprintf(buf, sizeof(buf), "sh/%s", pmu);
+	if (size > -1 && size < sizeof(buf))
+		return buf;
 
 
-		ret |= oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled);
-		ret |= oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event);
-		ret |= oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel);
-		ret |= oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user);
-
-		if (model->create_files)
-			ret |= model->create_files(sb, dir);
-		else
-			ret |= oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count);
-
-		/* Dummy entries */
-		ret |= oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask);
-	}
-
-	return ret;
+	return NULL;
 }
 }
 
 
-static int op_sh_start(void)
+int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 {
-	/* Enable performance monitoring for all counters.  */
-	on_each_cpu(model->cpu_start, NULL, 1);
+	ops->backtrace = sh_backtrace;
 
 
-	return 0;
+	return oprofile_perf_init(ops);
 }
 }
 
 
-static void op_sh_stop(void)
+void __exit oprofile_arch_exit(void)
 {
 {
-	/* Disable performance monitoring for all counters.  */
-	on_each_cpu(model->cpu_stop, NULL, 1);
+	oprofile_perf_exit();
 }
 }
-
+#else
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 {
-	struct op_sh_model *lmodel = NULL;
-	int ret;
-
-	/*
-	 * Always assign the backtrace op. If the counter initialization
-	 * fails, we fall back to the timer which will still make use of
-	 * this.
-	 */
-	ops->backtrace = sh_backtrace;
-
-	/*
-	 * XXX
-	 *
-	 * All of the SH7750/SH-4A counters have been converted to perf,
-	 * this infrastructure hook is left for other users until they've
-	 * had a chance to convert over, at which point all of this
-	 * will be deleted.
-	 */
-
-	if (!lmodel)
-		return -ENODEV;
-	if (!(current_cpu_data.flags & CPU_HAS_PERF_COUNTER))
-		return -ENODEV;
-
-	ret = lmodel->init();
-	if (unlikely(ret != 0))
-		return ret;
-
-	model = lmodel;
-
-	ops->setup		= op_sh_setup;
-	ops->create_files	= op_sh_create_files;
-	ops->start		= op_sh_start;
-	ops->stop		= op_sh_stop;
-	ops->cpu_type		= lmodel->cpu_type;
-
-	printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
-	       lmodel->cpu_type);
-
-	return 0;
-}
-
-void oprofile_arch_exit(void)
-{
-	if (model && model->exit)
-		model->exit();
+	pr_info("oprofile: hardware counters not available\n");
+	return -ENODEV;
 }
 }
+void __exit oprofile_arch_exit(void) {}
+#endif /* CONFIG_HW_PERF_EVENTS */

+ 0 - 33
arch/sh/oprofile/op_impl.h

@@ -1,33 +0,0 @@
-#ifndef __OP_IMPL_H
-#define __OP_IMPL_H
-
-/* Per-counter configuration as set via oprofilefs.  */
-struct op_counter_config {
-	unsigned long enabled;
-	unsigned long event;
-
-	unsigned long count;
-
-	/* Dummy values for userspace tool compliance */
-	unsigned long kernel;
-	unsigned long user;
-	unsigned long unit_mask;
-};
-
-/* Per-architecture configury and hooks.  */
-struct op_sh_model {
-	void (*reg_setup)(struct op_counter_config *);
-	int (*create_files)(struct super_block *sb, struct dentry *dir);
-	void (*cpu_setup)(void *dummy);
-	int (*init)(void);
-	void (*exit)(void);
-	void (*cpu_start)(void *args);
-	void (*cpu_stop)(void *args);
-	char *cpu_type;
-	unsigned char num_counters;
-};
-
-/* arch/sh/oprofile/common.c */
-extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
-
-#endif /* __OP_IMPL_H */

+ 3 - 0
arch/sparc/Kconfig

@@ -26,10 +26,12 @@ config SPARC
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select RTC_CLASS
 	select RTC_CLASS
 	select RTC_DRV_M48T59
 	select RTC_DRV_M48T59
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select PERF_USE_VMALLOC
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_API_DEBUG
+	select HAVE_ARCH_JUMP_LABEL
 
 
 config SPARC32
 config SPARC32
 	def_bool !64BIT
 	def_bool !64BIT
@@ -53,6 +55,7 @@ config SPARC64
 	select RTC_DRV_BQ4802
 	select RTC_DRV_BQ4802
 	select RTC_DRV_SUN4V
 	select RTC_DRV_SUN4V
 	select RTC_DRV_STARFIRE
 	select RTC_DRV_STARFIRE
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select PERF_USE_VMALLOC
 
 

+ 32 - 0
arch/sparc/include/asm/jump_label.h

@@ -0,0 +1,32 @@
+#ifndef _ASM_SPARC_JUMP_LABEL_H
+#define _ASM_SPARC_JUMP_LABEL_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <asm/system.h>
+
+#define JUMP_LABEL_NOP_SIZE 4
+
+#define JUMP_LABEL(key, label)					\
+	do {							\
+		asm goto("1:\n\t"				\
+			 "nop\n\t"				\
+			 "nop\n\t"				\
+			 ".pushsection __jump_table,  \"a\"\n\t"\
+			 ".word 1b, %l[" #label "], %c0\n\t"	\
+			 ".popsection \n\t"			\
+			 : :  "i" (key) :  : label);\
+	} while (0)
+
+#endif /* __KERNEL__ */
+
+typedef u32 jump_label_t;
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif

+ 0 - 4
arch/sparc/include/asm/perf_event.h

@@ -1,10 +1,6 @@
 #ifndef __ASM_SPARC_PERF_EVENT_H
 #ifndef __ASM_SPARC_PERF_EVENT_H
 #define __ASM_SPARC_PERF_EVENT_H
 #define __ASM_SPARC_PERF_EVENT_H
 
 
-extern void set_perf_event_pending(void);
-
-#define	PERF_EVENT_INDEX_OFFSET	0
-
 #ifdef CONFIG_PERF_EVENTS
 #ifdef CONFIG_PERF_EVENTS
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 
 

+ 2 - 0
arch/sparc/kernel/Makefile

@@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT)    += $(audit--y)
 
 
 pc--$(CONFIG_PERF_EVENTS) := perf_event.o
 pc--$(CONFIG_PERF_EVENTS) := perf_event.o
 obj-$(CONFIG_SPARC64)	+= $(pc--y)
 obj-$(CONFIG_SPARC64)	+= $(pc--y)
+
+obj-$(CONFIG_SPARC64)	+= jump_label.o

+ 47 - 0
arch/sparc/kernel/jump_label.c

@@ -0,0 +1,47 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+
+#include <linux/jump_label.h>
+#include <linux/memory.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	u32 val;
+	u32 *insn = (u32 *) (unsigned long) entry->code;
+
+	if (type == JUMP_LABEL_ENABLE) {
+		s32 off = (s32)entry->target - (s32)entry->code;
+
+#ifdef CONFIG_SPARC64
+		/* ba,pt %xcc, . + (off << 2) */
+		val = 0x10680000 | ((u32) off >> 2);
+#else
+		/* ba . + (off << 2) */
+		val = 0x10800000 | ((u32) off >> 2);
+#endif
+	} else {
+		val = 0x01000000;
+	}
+
+	get_online_cpus();
+	mutex_lock(&text_mutex);
+	*insn = val;
+	flushi(insn);
+	mutex_unlock(&text_mutex);
+	put_online_cpus();
+}
+
+void arch_jump_label_text_poke_early(jump_label_t addr)
+{
+	u32 *insn_p = (u32 *) (unsigned long) addr;
+
+	*insn_p = 0x01000000;
+	flushi(insn_p);
+}
+
+#endif

+ 6 - 0
arch/sparc/kernel/module.c

@@ -18,6 +18,9 @@
 #include <asm/spitfire.h>
 #include <asm/spitfire.h>
 
 
 #ifdef CONFIG_SPARC64
 #ifdef CONFIG_SPARC64
+
+#include <linux/jump_label.h>
+
 static void *module_map(unsigned long size)
 static void *module_map(unsigned long size)
 {
 {
 	struct vm_struct *area;
 	struct vm_struct *area;
@@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 		    struct module *me)
 {
 {
+	/* make jump label nops */
+	jump_label_apply_nops(me);
+
 	/* Cheetah's I-cache is fully coherent.  */
 	/* Cheetah's I-cache is fully coherent.  */
 	if (tlb_type == spitfire) {
 	if (tlb_type == spitfire) {
 		unsigned long va;
 		unsigned long va;

+ 4 - 4
arch/sparc/kernel/pcr.c

@@ -7,7 +7,7 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/irq.h>
 
 
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
 #include <linux/ftrace.h>
 #include <linux/ftrace.h>
 
 
 #include <asm/pil.h>
 #include <asm/pil.h>
@@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs)
 
 
 	old_regs = set_irq_regs(regs);
 	old_regs = set_irq_regs(regs);
 	irq_enter();
 	irq_enter();
-#ifdef CONFIG_PERF_EVENTS
-	perf_event_do_pending();
+#ifdef CONFIG_IRQ_WORK
+	irq_work_run();
 #endif
 #endif
 	irq_exit();
 	irq_exit();
 	set_irq_regs(old_regs);
 	set_irq_regs(old_regs);
 }
 }
 
 
-void set_perf_event_pending(void)
+void arch_irq_work_raise(void)
 {
 {
 	set_softint(1 << PIL_DEFERRED_PCR_WORK);
 	set_softint(1 << PIL_DEFERRED_PCR_WORK);
 }
 }

+ 123 - 117
arch/sparc/kernel/perf_event.c

@@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
 
 
 		enc = perf_event_get_enc(cpuc->events[i]);
 		enc = perf_event_get_enc(cpuc->events[i]);
 		pcr &= ~mask_for_index(idx);
 		pcr &= ~mask_for_index(idx);
-		pcr |= event_encoding(enc, idx);
+		if (hwc->state & PERF_HES_STOPPED)
+			pcr |= nop_for_index(idx);
+		else
+			pcr |= event_encoding(enc, idx);
 	}
 	}
 out:
 out:
 	return pcr;
 	return pcr;
 }
 }
 
 
-void hw_perf_enable(void)
+static void sparc_pmu_enable(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	u64 pcr;
 	u64 pcr;
@@ -691,7 +694,7 @@ void hw_perf_enable(void)
 	pcr_ops->write(cpuc->pcr);
 	pcr_ops->write(cpuc->pcr);
 }
 }
 
 
-void hw_perf_disable(void)
+static void sparc_pmu_disable(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	u64 val;
 	u64 val;
@@ -710,19 +713,65 @@ void hw_perf_disable(void)
 	pcr_ops->write(cpuc->pcr);
 	pcr_ops->write(cpuc->pcr);
 }
 }
 
 
-static void sparc_pmu_disable(struct perf_event *event)
+static int active_event_index(struct cpu_hw_events *cpuc,
+			      struct perf_event *event)
+{
+	int i;
+
+	for (i = 0; i < cpuc->n_events; i++) {
+		if (cpuc->event[i] == event)
+			break;
+	}
+	BUG_ON(i == cpuc->n_events);
+	return cpuc->current_idx[i];
+}
+
+static void sparc_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx = active_event_index(cpuc, event);
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+		sparc_perf_event_set_period(event, &event->hw, idx);
+	}
+
+	event->hw.state = 0;
+
+	sparc_pmu_enable_event(cpuc, &event->hw, idx);
+}
+
+static void sparc_pmu_stop(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx = active_event_index(cpuc, event);
+
+	if (!(event->hw.state & PERF_HES_STOPPED)) {
+		sparc_pmu_disable_event(cpuc, &event->hw, idx);
+		event->hw.state |= PERF_HES_STOPPED;
+	}
+
+	if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) {
+		sparc_perf_event_update(event, &event->hw, idx);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
+
+static void sparc_pmu_del(struct perf_event *event, int _flags)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct hw_perf_event *hwc = &event->hw;
 	unsigned long flags;
 	unsigned long flags;
 	int i;
 	int i;
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 
 
 	for (i = 0; i < cpuc->n_events; i++) {
 	for (i = 0; i < cpuc->n_events; i++) {
 		if (event == cpuc->event[i]) {
 		if (event == cpuc->event[i]) {
-			int idx = cpuc->current_idx[i];
+			/* Absorb the final count and turn off the
+			 * event.
+			 */
+			sparc_pmu_stop(event, PERF_EF_UPDATE);
 
 
 			/* Shift remaining entries down into
 			/* Shift remaining entries down into
 			 * the existing slot.
 			 * the existing slot.
@@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event)
 					cpuc->current_idx[i];
 					cpuc->current_idx[i];
 			}
 			}
 
 
-			/* Absorb the final count and turn off the
-			 * event.
-			 */
-			sparc_pmu_disable_event(cpuc, hwc, idx);
-			barrier();
-			sparc_perf_event_update(event, hwc, idx);
-
 			perf_event_update_userpage(event);
 			perf_event_update_userpage(event);
 
 
 			cpuc->n_events--;
 			cpuc->n_events--;
@@ -748,23 +790,10 @@ static void sparc_pmu_disable(struct perf_event *event)
 		}
 		}
 	}
 	}
 
 
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 }
 }
 
 
-static int active_event_index(struct cpu_hw_events *cpuc,
-			      struct perf_event *event)
-{
-	int i;
-
-	for (i = 0; i < cpuc->n_events; i++) {
-		if (cpuc->event[i] == event)
-			break;
-	}
-	BUG_ON(i == cpuc->n_events);
-	return cpuc->current_idx[i];
-}
-
 static void sparc_pmu_read(struct perf_event *event)
 static void sparc_pmu_read(struct perf_event *event)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event)
 	sparc_perf_event_update(event, hwc, idx);
 	sparc_perf_event_update(event, hwc, idx);
 }
 }
 
 
-static void sparc_pmu_unthrottle(struct perf_event *event)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	int idx = active_event_index(cpuc, event);
-	struct hw_perf_event *hwc = &event->hw;
-
-	sparc_pmu_enable_event(cpuc, hwc, idx);
-}
-
 static atomic_t active_events = ATOMIC_INIT(0);
 static atomic_t active_events = ATOMIC_INIT(0);
 static DEFINE_MUTEX(pmc_grab_mutex);
 static DEFINE_MUTEX(pmc_grab_mutex);
 
 
@@ -877,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts,
 	if (!n_ev)
 	if (!n_ev)
 		return 0;
 		return 0;
 
 
-	if (n_ev > perf_max_events)
+	if (n_ev > MAX_HWEVENTS)
 		return -1;
 		return -1;
 
 
 	msk0 = perf_event_get_msk(events[0]);
 	msk0 = perf_event_get_msk(events[0]);
@@ -984,23 +1004,27 @@ static int collect_events(struct perf_event *group, int max_count,
 	return n;
 	return n;
 }
 }
 
 
-static int sparc_pmu_enable(struct perf_event *event)
+static int sparc_pmu_add(struct perf_event *event, int ef_flags)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int n0, ret = -EAGAIN;
 	int n0, ret = -EAGAIN;
 	unsigned long flags;
 	unsigned long flags;
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 
 
 	n0 = cpuc->n_events;
 	n0 = cpuc->n_events;
-	if (n0 >= perf_max_events)
+	if (n0 >= MAX_HWEVENTS)
 		goto out;
 		goto out;
 
 
 	cpuc->event[n0] = event;
 	cpuc->event[n0] = event;
 	cpuc->events[n0] = event->hw.event_base;
 	cpuc->events[n0] = event->hw.event_base;
 	cpuc->current_idx[n0] = PIC_NO_INDEX;
 	cpuc->current_idx[n0] = PIC_NO_INDEX;
 
 
+	event->hw.state = PERF_HES_UPTODATE;
+	if (!(ef_flags & PERF_EF_START))
+		event->hw.state |= PERF_HES_STOPPED;
+
 	/*
 	/*
 	 * If group events scheduling transaction was started,
 	 * If group events scheduling transaction was started,
 	 * skip the schedulability test here, it will be peformed
 	 * skip the schedulability test here, it will be peformed
@@ -1020,12 +1044,12 @@ nocheck:
 
 
 	ret = 0;
 	ret = 0;
 out:
 out:
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 	return ret;
 	return ret;
 }
 }
 
 
-static int __hw_perf_event_init(struct perf_event *event)
+static int sparc_pmu_event_init(struct perf_event *event)
 {
 {
 	struct perf_event_attr *attr = &event->attr;
 	struct perf_event_attr *attr = &event->attr;
 	struct perf_event *evts[MAX_HWEVENTS];
 	struct perf_event *evts[MAX_HWEVENTS];
@@ -1038,22 +1062,33 @@ static int __hw_perf_event_init(struct perf_event *event)
 	if (atomic_read(&nmi_active) < 0)
 	if (atomic_read(&nmi_active) < 0)
 		return -ENODEV;
 		return -ENODEV;
 
 
-	pmap = NULL;
-	if (attr->type == PERF_TYPE_HARDWARE) {
+	switch (attr->type) {
+	case PERF_TYPE_HARDWARE:
 		if (attr->config >= sparc_pmu->max_events)
 		if (attr->config >= sparc_pmu->max_events)
 			return -EINVAL;
 			return -EINVAL;
 		pmap = sparc_pmu->event_map(attr->config);
 		pmap = sparc_pmu->event_map(attr->config);
-	} else if (attr->type == PERF_TYPE_HW_CACHE) {
+		break;
+
+	case PERF_TYPE_HW_CACHE:
 		pmap = sparc_map_cache_event(attr->config);
 		pmap = sparc_map_cache_event(attr->config);
 		if (IS_ERR(pmap))
 		if (IS_ERR(pmap))
 			return PTR_ERR(pmap);
 			return PTR_ERR(pmap);
-	} else if (attr->type != PERF_TYPE_RAW)
-		return -EOPNOTSUPP;
+		break;
+
+	case PERF_TYPE_RAW:
+		pmap = NULL;
+		break;
+
+	default:
+		return -ENOENT;
+
+	}
 
 
 	if (pmap) {
 	if (pmap) {
 		hwc->event_base = perf_event_encode(pmap);
 		hwc->event_base = perf_event_encode(pmap);
 	} else {
 	} else {
-		/* User gives us "(encoding << 16) | pic_mask" for
+		/*
+		 * User gives us "(encoding << 16) | pic_mask" for
 		 * PERF_TYPE_RAW events.
 		 * PERF_TYPE_RAW events.
 		 */
 		 */
 		hwc->event_base = attr->config;
 		hwc->event_base = attr->config;
@@ -1071,7 +1106,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 	n = 0;
 	n = 0;
 	if (event->group_leader != event) {
 	if (event->group_leader != event) {
 		n = collect_events(event->group_leader,
 		n = collect_events(event->group_leader,
-				   perf_max_events - 1,
+				   MAX_HWEVENTS - 1,
 				   evts, events, current_idx_dmy);
 				   evts, events, current_idx_dmy);
 		if (n < 0)
 		if (n < 0)
 			return -EINVAL;
 			return -EINVAL;
@@ -1107,10 +1142,11 @@ static int __hw_perf_event_init(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  * schedulability test, it will be performed at commit time
  */
  */
-static void sparc_pmu_start_txn(const struct pmu *pmu)
+static void sparc_pmu_start_txn(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 
+	perf_pmu_disable(pmu);
 	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->group_flag |= PERF_EVENT_TXN;
 }
 }
 
 
@@ -1119,11 +1155,12 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
  * Clear the flag and pmu::enable() will perform the
  * Clear the flag and pmu::enable() will perform the
  * schedulability test.
  * schedulability test.
  */
  */
-static void sparc_pmu_cancel_txn(const struct pmu *pmu)
+static void sparc_pmu_cancel_txn(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
+	perf_pmu_enable(pmu);
 }
 }
 
 
 /*
 /*
@@ -1131,7 +1168,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
  * Perform the group schedulability test as a whole
  * Perform the group schedulability test as a whole
  * Return 0 if success
  * Return 0 if success
  */
  */
-static int sparc_pmu_commit_txn(const struct pmu *pmu)
+static int sparc_pmu_commit_txn(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int n;
 	int n;
@@ -1147,28 +1184,24 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
 		return -EAGAIN;
 		return -EAGAIN;
 
 
 	cpuc->group_flag &= ~PERF_EVENT_TXN;
 	cpuc->group_flag &= ~PERF_EVENT_TXN;
+	perf_pmu_enable(pmu);
 	return 0;
 	return 0;
 }
 }
 
 
-static const struct pmu pmu = {
-	.enable		= sparc_pmu_enable,
-	.disable	= sparc_pmu_disable,
+static struct pmu pmu = {
+	.pmu_enable	= sparc_pmu_enable,
+	.pmu_disable	= sparc_pmu_disable,
+	.event_init	= sparc_pmu_event_init,
+	.add		= sparc_pmu_add,
+	.del		= sparc_pmu_del,
+	.start		= sparc_pmu_start,
+	.stop		= sparc_pmu_stop,
 	.read		= sparc_pmu_read,
 	.read		= sparc_pmu_read,
-	.unthrottle	= sparc_pmu_unthrottle,
 	.start_txn	= sparc_pmu_start_txn,
 	.start_txn	= sparc_pmu_start_txn,
 	.cancel_txn	= sparc_pmu_cancel_txn,
 	.cancel_txn	= sparc_pmu_cancel_txn,
 	.commit_txn	= sparc_pmu_commit_txn,
 	.commit_txn	= sparc_pmu_commit_txn,
 };
 };
 
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
-{
-	int err = __hw_perf_event_init(event);
-
-	if (err)
-		return ERR_PTR(err);
-	return &pmu;
-}
-
 void perf_event_print_debug(void)
 void perf_event_print_debug(void)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
@@ -1244,7 +1277,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 			continue;
 			continue;
 
 
 		if (perf_event_overflow(event, 1, &data, regs))
 		if (perf_event_overflow(event, 1, &data, regs))
-			sparc_pmu_disable_event(cpuc, hwc, idx);
+			sparc_pmu_stop(event, 0);
 	}
 	}
 
 
 	return NOTIFY_STOP;
 	return NOTIFY_STOP;
@@ -1285,28 +1318,21 @@ void __init init_hw_perf_events(void)
 
 
 	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
 	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
 
 
-	/* All sparc64 PMUs currently have 2 events.  */
-	perf_max_events = 2;
-
+	perf_pmu_register(&pmu);
 	register_die_notifier(&perf_event_nmi_notifier);
 	register_die_notifier(&perf_event_nmi_notifier);
 }
 }
 
 
-static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-	if (entry->nr < PERF_MAX_STACK_DEPTH)
-		entry->ip[entry->nr++] = ip;
-}
-
-static void perf_callchain_kernel(struct pt_regs *regs,
-				  struct perf_callchain_entry *entry)
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+			   struct pt_regs *regs)
 {
 {
 	unsigned long ksp, fp;
 	unsigned long ksp, fp;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	int graph = 0;
 	int graph = 0;
 #endif
 #endif
 
 
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
-	callchain_store(entry, regs->tpc);
+	stack_trace_flush();
+
+	perf_callchain_store(entry, regs->tpc);
 
 
 	ksp = regs->u_regs[UREG_I6];
 	ksp = regs->u_regs[UREG_I6];
 	fp = ksp + STACK_BIAS;
 	fp = ksp + STACK_BIAS;
@@ -1330,13 +1356,13 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 			pc = sf->callers_pc;
 			pc = sf->callers_pc;
 			fp = (unsigned long)sf->fp + STACK_BIAS;
 			fp = (unsigned long)sf->fp + STACK_BIAS;
 		}
 		}
-		callchain_store(entry, pc);
+		perf_callchain_store(entry, pc);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 		if ((pc + 8UL) == (unsigned long) &return_to_handler) {
 		if ((pc + 8UL) == (unsigned long) &return_to_handler) {
 			int index = current->curr_ret_stack;
 			int index = current->curr_ret_stack;
 			if (current->ret_stack && index >= graph) {
 			if (current->ret_stack && index >= graph) {
 				pc = current->ret_stack[index - graph].ret;
 				pc = current->ret_stack[index - graph].ret;
-				callchain_store(entry, pc);
+				perf_callchain_store(entry, pc);
 				graph++;
 				graph++;
 			}
 			}
 		}
 		}
@@ -1344,13 +1370,12 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
 }
 
 
-static void perf_callchain_user_64(struct pt_regs *regs,
-				   struct perf_callchain_entry *entry)
+static void perf_callchain_user_64(struct perf_callchain_entry *entry,
+				   struct pt_regs *regs)
 {
 {
 	unsigned long ufp;
 	unsigned long ufp;
 
 
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, regs->tpc);
+	perf_callchain_store(entry, regs->tpc);
 
 
 	ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
 	ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
 	do {
 	do {
@@ -1363,17 +1388,16 @@ static void perf_callchain_user_64(struct pt_regs *regs,
 
 
 		pc = sf.callers_pc;
 		pc = sf.callers_pc;
 		ufp = (unsigned long)sf.fp + STACK_BIAS;
 		ufp = (unsigned long)sf.fp + STACK_BIAS;
-		callchain_store(entry, pc);
+		perf_callchain_store(entry, pc);
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
 }
 
 
-static void perf_callchain_user_32(struct pt_regs *regs,
-				   struct perf_callchain_entry *entry)
+static void perf_callchain_user_32(struct perf_callchain_entry *entry,
+				   struct pt_regs *regs)
 {
 {
 	unsigned long ufp;
 	unsigned long ufp;
 
 
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, regs->tpc);
+	perf_callchain_store(entry, regs->tpc);
 
 
 	ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
 	ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
 	do {
 	do {
@@ -1386,34 +1410,16 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 
 
 		pc = sf.callers_pc;
 		pc = sf.callers_pc;
 		ufp = (unsigned long)sf.fp;
 		ufp = (unsigned long)sf.fp;
-		callchain_store(entry, pc);
+		perf_callchain_store(entry, pc);
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
 }
 
 
-/* Like powerpc we can't get PMU interrupts within the PMU handler,
- * so no need for separate NMI and IRQ chains as on x86.
- */
-static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 {
-	struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
-
-	entry->nr = 0;
-	if (!user_mode(regs)) {
-		stack_trace_flush();
-		perf_callchain_kernel(regs, entry);
-		if (current->mm)
-			regs = task_pt_regs(current);
-		else
-			regs = NULL;
-	}
-	if (regs) {
-		flushw_user();
-		if (test_thread_flag(TIF_32BIT))
-			perf_callchain_user_32(regs, entry);
-		else
-			perf_callchain_user_64(regs, entry);
-	}
-	return entry;
+	flushw_user();
+	if (test_thread_flag(TIF_32BIT))
+		perf_callchain_user_32(entry, regs);
+	else
+		perf_callchain_user_64(entry, regs);
 }
 }

+ 8 - 0
arch/x86/Kconfig

@@ -25,6 +25,7 @@ config X86
 	select HAVE_IDE
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS if (!M386 && !M486)
 	select HAVE_PERF_EVENTS if (!M386 && !M486)
+	select HAVE_IRQ_WORK
 	select HAVE_IOREMAP_PROT
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select HAVE_KPROBES
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -33,6 +34,7 @@ config X86
 	select HAVE_KRETPROBES
 	select HAVE_KRETPROBES
 	select HAVE_OPTPROBES
 	select HAVE_OPTPROBES
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_C_RECORDMCOUNT
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
@@ -59,6 +61,8 @@ config X86
 	select ANON_INODES
 	select ANON_INODES
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_USER_RETURN_NOTIFIER
 	select HAVE_USER_RETURN_NOTIFIER
+	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_TEXT_POKE_SMP
 
 
 config INSTRUCTION_DECODER
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
 	def_bool (KPROBES || PERF_EVENTS)
@@ -2125,6 +2129,10 @@ config HAVE_ATOMIC_IOMAP
 	def_bool y
 	def_bool y
 	depends on X86_32
 	depends on X86_32
 
 
+config HAVE_TEXT_POKE_SMP
+	bool
+	select STOP_MACHINE if SMP
+
 source "net/Kconfig"
 source "net/Kconfig"
 
 
 source "drivers/Kconfig"
 source "drivers/Kconfig"

+ 11 - 0
arch/x86/include/asm/alternative.h

@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <linux/stringify.h>
+#include <linux/jump_label.h>
 #include <asm/asm.h>
 #include <asm/asm.h>
 
 
 /*
 /*
@@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
 #define __parainstructions_end	NULL
 #define __parainstructions_end	NULL
 #endif
 #endif
 
 
+extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+
 /*
 /*
  * Clear and restore the kernel write-protection flag on the local CPU.
  * Clear and restore the kernel write-protection flag on the local CPU.
  * Allows the kernel to edit read-only pages.
  * Allows the kernel to edit read-only pages.
@@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
 
 
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
+#define IDEAL_NOP_SIZE_5 5
+extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
+extern void arch_init_ideal_nop5(void);
+#else
+static inline void arch_init_ideal_nop5(void) {}
+#endif
+
 #endif /* _ASM_X86_ALTERNATIVE_H */
 #endif /* _ASM_X86_ALTERNATIVE_H */

+ 2 - 2
arch/x86/include/asm/entry_arch.h

@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 
 
-#ifdef CONFIG_PERF_EVENTS
-BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
+#ifdef CONFIG_IRQ_WORK
+BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
 #endif
 #endif
 
 
 #ifdef CONFIG_X86_THERMAL_VECTOR
 #ifdef CONFIG_X86_THERMAL_VECTOR

+ 1 - 1
arch/x86/include/asm/hardirq.h

@@ -14,7 +14,7 @@ typedef struct {
 #endif
 #endif
 	unsigned int x86_platform_ipis;	/* arch dependent */
 	unsigned int x86_platform_ipis;	/* arch dependent */
 	unsigned int apic_perf_irqs;
 	unsigned int apic_perf_irqs;
-	unsigned int apic_pending_irqs;
+	unsigned int apic_irq_work_irqs;
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 	unsigned int irq_resched_count;
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
 	unsigned int irq_call_count;

+ 1 - 1
arch/x86/include/asm/hw_irq.h

@@ -29,7 +29,7 @@
 extern void apic_timer_interrupt(void);
 extern void apic_timer_interrupt(void);
 extern void x86_platform_ipi(void);
 extern void x86_platform_ipi(void);
 extern void error_interrupt(void);
 extern void error_interrupt(void);
-extern void perf_pending_interrupt(void);
+extern void irq_work_interrupt(void);
 
 
 extern void spurious_interrupt(void);
 extern void spurious_interrupt(void);
 extern void thermal_interrupt(void);
 extern void thermal_interrupt(void);

+ 2 - 2
arch/x86/include/asm/irq_vectors.h

@@ -114,9 +114,9 @@
 #define X86_PLATFORM_IPI_VECTOR		0xed
 #define X86_PLATFORM_IPI_VECTOR		0xed
 
 
 /*
 /*
- * Performance monitoring pending work vector:
+ * IRQ work vector:
  */
  */
-#define LOCAL_PENDING_VECTOR		0xec
+#define IRQ_WORK_VECTOR			0xec
 
 
 #define UV_BAU_MESSAGE			0xea
 #define UV_BAU_MESSAGE			0xea
 
 

+ 37 - 0
arch/x86/include/asm/jump_label.h

@@ -0,0 +1,37 @@
+#ifndef _ASM_X86_JUMP_LABEL_H
+#define _ASM_X86_JUMP_LABEL_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <asm/nops.h>
+
+#define JUMP_LABEL_NOP_SIZE 5
+
+# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+
+# define JUMP_LABEL(key, label)					\
+	do {							\
+		asm goto("1:"					\
+			JUMP_LABEL_INITIAL_NOP			\
+			".pushsection __jump_table,  \"a\" \n\t"\
+			_ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
+			".popsection \n\t"			\
+			: :  "i" (key) :  : label);		\
+	} while (0)
+
+#endif /* __KERNEL__ */
+
+#ifdef CONFIG_X86_64
+typedef u64 jump_label_t;
+#else
+typedef u32 jump_label_t;
+#endif
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif

+ 22 - 30
arch/x86/include/asm/perf_event_p4.h

@@ -36,19 +36,6 @@
 #define P4_ESCR_EMASK(v)	((v) << P4_ESCR_EVENTMASK_SHIFT)
 #define P4_ESCR_EMASK(v)	((v) << P4_ESCR_EVENTMASK_SHIFT)
 #define P4_ESCR_TAG(v)		((v) << P4_ESCR_TAG_SHIFT)
 #define P4_ESCR_TAG(v)		((v) << P4_ESCR_TAG_SHIFT)
 
 
-/* Non HT mask */
-#define P4_ESCR_MASK			\
-	(P4_ESCR_EVENT_MASK	|	\
-	P4_ESCR_EVENTMASK_MASK	|	\
-	P4_ESCR_TAG_MASK	|	\
-	P4_ESCR_TAG_ENABLE	|	\
-	P4_ESCR_T0_OS		|	\
-	P4_ESCR_T0_USR)
-
-/* HT mask */
-#define P4_ESCR_MASK_HT			\
-	(P4_ESCR_MASK |	P4_ESCR_T1_OS | P4_ESCR_T1_USR)
-
 #define P4_CCCR_OVF			0x80000000U
 #define P4_CCCR_OVF			0x80000000U
 #define P4_CCCR_CASCADE			0x40000000U
 #define P4_CCCR_CASCADE			0x40000000U
 #define P4_CCCR_OVF_PMI_T0		0x04000000U
 #define P4_CCCR_OVF_PMI_T0		0x04000000U
@@ -70,23 +57,6 @@
 #define P4_CCCR_THRESHOLD(v)		((v) << P4_CCCR_THRESHOLD_SHIFT)
 #define P4_CCCR_THRESHOLD(v)		((v) << P4_CCCR_THRESHOLD_SHIFT)
 #define P4_CCCR_ESEL(v)			((v) << P4_CCCR_ESCR_SELECT_SHIFT)
 #define P4_CCCR_ESEL(v)			((v) << P4_CCCR_ESCR_SELECT_SHIFT)
 
 
-/* Non HT mask */
-#define P4_CCCR_MASK				\
-	(P4_CCCR_OVF			|	\
-	P4_CCCR_CASCADE			|	\
-	P4_CCCR_OVF_PMI_T0		|	\
-	P4_CCCR_FORCE_OVF		|	\
-	P4_CCCR_EDGE			|	\
-	P4_CCCR_THRESHOLD_MASK		|	\
-	P4_CCCR_COMPLEMENT		|	\
-	P4_CCCR_COMPARE			|	\
-	P4_CCCR_ESCR_SELECT_MASK	|	\
-	P4_CCCR_ENABLE)
-
-/* HT mask */
-#define P4_CCCR_MASK_HT				\
-	(P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY)
-
 #define P4_GEN_ESCR_EMASK(class, name, bit)	\
 #define P4_GEN_ESCR_EMASK(class, name, bit)	\
 	class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
 	class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
 #define P4_ESCR_EMASK_BIT(class, name)		class##__##name
 #define P4_ESCR_EMASK_BIT(class, name)		class##__##name
@@ -127,6 +97,28 @@
 #define P4_CONFIG_HT_SHIFT		63
 #define P4_CONFIG_HT_SHIFT		63
 #define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)
 #define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)
 
 
+/*
+ * The bits we allow to pass for RAW events
+ */
+#define P4_CONFIG_MASK_ESCR		\
+	P4_ESCR_EVENT_MASK	|	\
+	P4_ESCR_EVENTMASK_MASK	|	\
+	P4_ESCR_TAG_MASK	|	\
+	P4_ESCR_TAG_ENABLE
+
+#define P4_CONFIG_MASK_CCCR		\
+	P4_CCCR_EDGE		|	\
+	P4_CCCR_THRESHOLD_MASK	|	\
+	P4_CCCR_COMPLEMENT	|	\
+	P4_CCCR_COMPARE		|	\
+	P4_CCCR_THREAD_ANY	|	\
+	P4_CCCR_RESERVED
+
+/* some dangerous bits are reserved for kernel internals */
+#define P4_CONFIG_MASK				  	  \
+	(p4_config_pack_escr(P4_CONFIG_MASK_ESCR))	| \
+	(p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
+
 static inline bool p4_is_event_cascaded(u64 config)
 static inline bool p4_is_event_cascaded(u64 config)
 {
 {
 	u32 cccr = p4_config_unpack_cccr(config);
 	u32 cccr = p4_config_unpack_cccr(config);

+ 2 - 1
arch/x86/kernel/Makefile

@@ -34,7 +34,8 @@ GCOV_PROFILE_paravirt.o		:= n
 obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time.o ioport.o ldt.o dumpstack.o
 obj-y			+= time.o ioport.o ldt.o dumpstack.o
-obj-y			+= setup.o x86_init.o i8259.o irqinit.o
+obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
+obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
 obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
 obj-$(CONFIG_X86_32)	+= probe_roms_32.o
 obj-$(CONFIG_X86_32)	+= probe_roms_32.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o

+ 68 - 3
arch/x86/kernel/alternative.c

@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
 
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
-static void *text_poke_early(void *addr, const void *opcode, size_t len);
+void *text_poke_early(void *addr, const void *opcode, size_t len);
 
 
 /* Replace instructions with better alternatives for this CPU type.
 /* Replace instructions with better alternatives for this CPU type.
    This runs before SMP is initialized to avoid SMP problems with
    This runs before SMP is initialized to avoid SMP problems with
@@ -522,7 +522,7 @@ void __init alternative_instructions(void)
  * instructions. And on the local CPU you need to be protected again NMI or MCE
  * instructions. And on the local CPU you need to be protected again NMI or MCE
  * handlers seeing an inconsistent instruction while you patch.
  * handlers seeing an inconsistent instruction while you patch.
  */
  */
-static void *__init_or_module text_poke_early(void *addr, const void *opcode,
+void *__init_or_module text_poke_early(void *addr, const void *opcode,
 					      size_t len)
 					      size_t len)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
@@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
 	tpp.len = len;
 	tpp.len = len;
 	atomic_set(&stop_machine_first, 1);
 	atomic_set(&stop_machine_first, 1);
 	wrote_text = 0;
 	wrote_text = 0;
-	stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+	/* Use __stop_machine() because the caller already got online_cpus. */
+	__stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
 	return addr;
 	return addr;
 }
 }
 
 
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
+
+unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
+
+void __init arch_init_ideal_nop5(void)
+{
+	extern const unsigned char ftrace_test_p6nop[];
+	extern const unsigned char ftrace_test_nop5[];
+	extern const unsigned char ftrace_test_jmp[];
+	int faulted = 0;
+
+	/*
+	 * There is no good nop for all x86 archs.
+	 * We will default to using the P6_NOP5, but first we
+	 * will test to make sure that the nop will actually
+	 * work on this CPU. If it faults, we will then
+	 * go to a lesser efficient 5 byte nop. If that fails
+	 * we then just use a jmp as our nop. This isn't the most
+	 * efficient nop, but we can not use a multi part nop
+	 * since we would then risk being preempted in the middle
+	 * of that nop, and if we enabled tracing then, it might
+	 * cause a system crash.
+	 *
+	 * TODO: check the cpuid to determine the best nop.
+	 */
+	asm volatile (
+		"ftrace_test_jmp:"
+		"jmp ftrace_test_p6nop\n"
+		"nop\n"
+		"nop\n"
+		"nop\n"  /* 2 byte jmp + 3 bytes */
+		"ftrace_test_p6nop:"
+		P6_NOP5
+		"jmp 1f\n"
+		"ftrace_test_nop5:"
+		".byte 0x66,0x66,0x66,0x66,0x90\n"
+		"1:"
+		".section .fixup, \"ax\"\n"
+		"2:	movl $1, %0\n"
+		"	jmp ftrace_test_nop5\n"
+		"3:	movl $2, %0\n"
+		"	jmp 1b\n"
+		".previous\n"
+		_ASM_EXTABLE(ftrace_test_p6nop, 2b)
+		_ASM_EXTABLE(ftrace_test_nop5, 3b)
+		: "=r"(faulted) : "0" (faulted));
+
+	switch (faulted) {
+	case 0:
+		pr_info("converting mcount calls to 0f 1f 44 00 00\n");
+		memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
+		break;
+	case 1:
+		pr_info("converting mcount calls to 66 66 66 66 90\n");
+		memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
+		break;
+	case 2:
+		pr_info("converting mcount calls to jmp . + 5\n");
+		memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
+		break;
+	}
+
+}
+#endif

+ 123 - 157
arch/x86/kernel/cpu/perf_event.c

@@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
 /*
 /*
  * Setup the hardware configuration for a given attr_type
  * Setup the hardware configuration for a given attr_type
  */
  */
-static int __hw_perf_event_init(struct perf_event *event)
+static int __x86_pmu_event_init(struct perf_event *event)
 {
 {
 	int err;
 	int err;
 
 
@@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void)
 	}
 	}
 }
 }
 
 
-void hw_perf_disable(void)
+static void x86_pmu_disable(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 
@@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added)
 	}
 	}
 }
 }
 
 
-static const struct pmu pmu;
+static struct pmu pmu;
 
 
 static inline int is_x86_event(struct perf_event *event)
 static inline int is_x86_event(struct perf_event *event)
 {
 {
@@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
 		hwc->last_tag == cpuc->tags[i];
 		hwc->last_tag == cpuc->tags[i];
 }
 }
 
 
-static int x86_pmu_start(struct perf_event *event);
-static void x86_pmu_stop(struct perf_event *event);
+static void x86_pmu_start(struct perf_event *event, int flags);
+static void x86_pmu_stop(struct perf_event *event, int flags);
 
 
-void hw_perf_enable(void)
+static void x86_pmu_enable(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct perf_event *event;
 	struct perf_event *event;
@@ -840,7 +840,14 @@ void hw_perf_enable(void)
 			    match_prev_assignment(hwc, cpuc, i))
 			    match_prev_assignment(hwc, cpuc, i))
 				continue;
 				continue;
 
 
-			x86_pmu_stop(event);
+			/*
+			 * Ensure we don't accidentally enable a stopped
+			 * counter simply because we rescheduled.
+			 */
+			if (hwc->state & PERF_HES_STOPPED)
+				hwc->state |= PERF_HES_ARCH;
+
+			x86_pmu_stop(event, PERF_EF_UPDATE);
 		}
 		}
 
 
 		for (i = 0; i < cpuc->n_events; i++) {
 		for (i = 0; i < cpuc->n_events; i++) {
@@ -852,7 +859,10 @@ void hw_perf_enable(void)
 			else if (i < n_running)
 			else if (i < n_running)
 				continue;
 				continue;
 
 
-			x86_pmu_start(event);
+			if (hwc->state & PERF_HES_ARCH)
+				continue;
+
+			x86_pmu_start(event, PERF_EF_RELOAD);
 		}
 		}
 		cpuc->n_added = 0;
 		cpuc->n_added = 0;
 		perf_events_lapic_init();
 		perf_events_lapic_init();
@@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event)
 }
 }
 
 
 /*
 /*
- * activate a single event
+ * Add a single event to the PMU.
  *
  *
  * The event is added to the group of enabled events
  * The event is added to the group of enabled events
  * but only if it can be scehduled with existing events.
  * but only if it can be scehduled with existing events.
- *
- * Called with PMU disabled. If successful and return value 1,
- * then guaranteed to call perf_enable() and hw_perf_enable()
  */
  */
-static int x86_pmu_enable(struct perf_event *event)
+static int x86_pmu_add(struct perf_event *event, int flags)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc;
 	struct hw_perf_event *hwc;
@@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event)
 
 
 	hwc = &event->hw;
 	hwc = &event->hw;
 
 
+	perf_pmu_disable(event->pmu);
 	n0 = cpuc->n_events;
 	n0 = cpuc->n_events;
-	n = collect_events(cpuc, event, false);
-	if (n < 0)
-		return n;
+	ret = n = collect_events(cpuc, event, false);
+	if (ret < 0)
+		goto out;
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (!(flags & PERF_EF_START))
+		hwc->state |= PERF_HES_ARCH;
 
 
 	/*
 	/*
 	 * If group events scheduling transaction was started,
 	 * If group events scheduling transaction was started,
 	 * skip the schedulability test here, it will be peformed
 	 * skip the schedulability test here, it will be peformed
-	 * at commit time(->commit_txn) as a whole
+	 * at commit time (->commit_txn) as a whole
 	 */
 	 */
 	if (cpuc->group_flag & PERF_EVENT_TXN)
 	if (cpuc->group_flag & PERF_EVENT_TXN)
-		goto out;
+		goto done_collect;
 
 
 	ret = x86_pmu.schedule_events(cpuc, n, assign);
 	ret = x86_pmu.schedule_events(cpuc, n, assign);
 	if (ret)
 	if (ret)
-		return ret;
+		goto out;
 	/*
 	/*
 	 * copy new assignment, now we know it is possible
 	 * copy new assignment, now we know it is possible
 	 * will be used by hw_perf_enable()
 	 * will be used by hw_perf_enable()
 	 */
 	 */
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 
 
-out:
+done_collect:
 	cpuc->n_events = n;
 	cpuc->n_events = n;
 	cpuc->n_added += n - n0;
 	cpuc->n_added += n - n0;
 	cpuc->n_txn += n - n0;
 	cpuc->n_txn += n - n0;
 
 
-	return 0;
+	ret = 0;
+out:
+	perf_pmu_enable(event->pmu);
+	return ret;
 }
 }
 
 
-static int x86_pmu_start(struct perf_event *event)
+static void x86_pmu_start(struct perf_event *event, int flags)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx = event->hw.idx;
 	int idx = event->hw.idx;
 
 
-	if (idx == -1)
-		return -EAGAIN;
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+		x86_perf_event_set_period(event);
+	}
+
+	event->hw.state = 0;
 
 
-	x86_perf_event_set_period(event);
 	cpuc->events[idx] = event;
 	cpuc->events[idx] = event;
 	__set_bit(idx, cpuc->active_mask);
 	__set_bit(idx, cpuc->active_mask);
 	__set_bit(idx, cpuc->running);
 	__set_bit(idx, cpuc->running);
 	x86_pmu.enable(event);
 	x86_pmu.enable(event);
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
-
-	return 0;
-}
-
-static void x86_pmu_unthrottle(struct perf_event *event)
-{
-	int ret = x86_pmu_start(event);
-	WARN_ON_ONCE(ret);
 }
 }
 
 
 void perf_event_print_debug(void)
 void perf_event_print_debug(void)
@@ -1078,27 +1094,29 @@ void perf_event_print_debug(void)
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 }
 }
 
 
-static void x86_pmu_stop(struct perf_event *event)
+static void x86_pmu_stop(struct perf_event *event, int flags)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
 
 
-	if (!__test_and_clear_bit(idx, cpuc->active_mask))
-		return;
-
-	x86_pmu.disable(event);
-
-	/*
-	 * Drain the remaining delta count out of a event
-	 * that we are disabling:
-	 */
-	x86_perf_event_update(event);
+	if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
+		x86_pmu.disable(event);
+		cpuc->events[hwc->idx] = NULL;
+		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+		hwc->state |= PERF_HES_STOPPED;
+	}
 
 
-	cpuc->events[idx] = NULL;
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		/*
+		 * Drain the remaining delta count out of a event
+		 * that we are disabling:
+		 */
+		x86_perf_event_update(event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
 }
 }
 
 
-static void x86_pmu_disable(struct perf_event *event)
+static void x86_pmu_del(struct perf_event *event, int flags)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int i;
 	int i;
@@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event)
 	if (cpuc->group_flag & PERF_EVENT_TXN)
 	if (cpuc->group_flag & PERF_EVENT_TXN)
 		return;
 		return;
 
 
-	x86_pmu_stop(event);
+	x86_pmu_stop(event, PERF_EF_UPDATE);
 
 
 	for (i = 0; i < cpuc->n_events; i++) {
 	for (i = 0; i < cpuc->n_events; i++) {
 		if (event == cpuc->event_list[i]) {
 		if (event == cpuc->event_list[i]) {
@@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 	struct perf_sample_data data;
 	struct perf_sample_data data;
 	struct cpu_hw_events *cpuc;
 	struct cpu_hw_events *cpuc;
 	struct perf_event *event;
 	struct perf_event *event;
-	struct hw_perf_event *hwc;
 	int idx, handled = 0;
 	int idx, handled = 0;
 	u64 val;
 	u64 val;
 
 
@@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 		}
 		}
 
 
 		event = cpuc->events[idx];
 		event = cpuc->events[idx];
-		hwc = &event->hw;
 
 
 		val = x86_perf_event_update(event);
 		val = x86_perf_event_update(event);
 		if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
 		if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 			continue;
 			continue;
 
 
 		if (perf_event_overflow(event, 1, &data, regs))
 		if (perf_event_overflow(event, 1, &data, regs))
-			x86_pmu_stop(event);
+			x86_pmu_stop(event, 0);
 	}
 	}
 
 
 	if (handled)
 	if (handled)
@@ -1180,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 	return handled;
 	return handled;
 }
 }
 
 
-void smp_perf_pending_interrupt(struct pt_regs *regs)
-{
-	irq_enter();
-	ack_APIC_irq();
-	inc_irq_stat(apic_pending_irqs);
-	perf_event_do_pending();
-	irq_exit();
-}
-
-void set_perf_event_pending(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (!x86_pmu.apic || !x86_pmu_initialized())
-		return;
-
-	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
-#endif
-}
-
 void perf_events_lapic_init(void)
 void perf_events_lapic_init(void)
 {
 {
 	if (!x86_pmu.apic || !x86_pmu_initialized())
 	if (!x86_pmu.apic || !x86_pmu_initialized())
@@ -1388,7 +1385,6 @@ void __init init_hw_perf_events(void)
 		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
 		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
 	}
 	}
 	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
 	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
-	perf_max_events = x86_pmu.num_counters;
 
 
 	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
 	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
 		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
 		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
@@ -1424,6 +1420,7 @@ void __init init_hw_perf_events(void)
 	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
 	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
 	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
 
+	perf_pmu_register(&pmu);
 	perf_cpu_notifier(x86_pmu_notifier);
 	perf_cpu_notifier(x86_pmu_notifier);
 }
 }
 
 
@@ -1437,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  * schedulability test, it will be performed at commit time
  */
  */
-static void x86_pmu_start_txn(const struct pmu *pmu)
+static void x86_pmu_start_txn(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 
+	perf_pmu_disable(pmu);
 	cpuc->group_flag |= PERF_EVENT_TXN;
 	cpuc->group_flag |= PERF_EVENT_TXN;
 	cpuc->n_txn = 0;
 	cpuc->n_txn = 0;
 }
 }
@@ -1450,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
  * Clear the flag and pmu::enable() will perform the
  * Clear the flag and pmu::enable() will perform the
  * schedulability test.
  * schedulability test.
  */
  */
-static void x86_pmu_cancel_txn(const struct pmu *pmu)
+static void x86_pmu_cancel_txn(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 
@@ -1460,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
 	 */
 	 */
 	cpuc->n_added -= cpuc->n_txn;
 	cpuc->n_added -= cpuc->n_txn;
 	cpuc->n_events -= cpuc->n_txn;
 	cpuc->n_events -= cpuc->n_txn;
+	perf_pmu_enable(pmu);
 }
 }
 
 
 /*
 /*
@@ -1467,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
  * Perform the group schedulability test as a whole
  * Perform the group schedulability test as a whole
  * Return 0 if success
  * Return 0 if success
  */
  */
-static int x86_pmu_commit_txn(const struct pmu *pmu)
+static int x86_pmu_commit_txn(struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int assign[X86_PMC_IDX_MAX];
 	int assign[X86_PMC_IDX_MAX];
@@ -1489,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 
 
 	cpuc->group_flag &= ~PERF_EVENT_TXN;
 	cpuc->group_flag &= ~PERF_EVENT_TXN;
-
+	perf_pmu_enable(pmu);
 	return 0;
 	return 0;
 }
 }
 
 
-static const struct pmu pmu = {
-	.enable		= x86_pmu_enable,
-	.disable	= x86_pmu_disable,
-	.start		= x86_pmu_start,
-	.stop		= x86_pmu_stop,
-	.read		= x86_pmu_read,
-	.unthrottle	= x86_pmu_unthrottle,
-	.start_txn	= x86_pmu_start_txn,
-	.cancel_txn	= x86_pmu_cancel_txn,
-	.commit_txn	= x86_pmu_commit_txn,
-};
-
 /*
 /*
  * validate that we can schedule this event
  * validate that we can schedule this event
  */
  */
@@ -1579,12 +1566,22 @@ out:
 	return ret;
 	return ret;
 }
 }
 
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+int x86_pmu_event_init(struct perf_event *event)
 {
 {
-	const struct pmu *tmp;
+	struct pmu *tmp;
 	int err;
 	int err;
 
 
-	err = __hw_perf_event_init(event);
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
+	err = __x86_pmu_event_init(event);
 	if (!err) {
 	if (!err) {
 		/*
 		/*
 		 * we temporarily connect event to its pmu
 		 * we temporarily connect event to its pmu
@@ -1604,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 	if (err) {
 	if (err) {
 		if (event->destroy)
 		if (event->destroy)
 			event->destroy(event);
 			event->destroy(event);
-		return ERR_PTR(err);
 	}
 	}
 
 
-	return &pmu;
+	return err;
 }
 }
 
 
-/*
- * callchain support
- */
+static struct pmu pmu = {
+	.pmu_enable	= x86_pmu_enable,
+	.pmu_disable	= x86_pmu_disable,
 
 
-static inline
-void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-	if (entry->nr < PERF_MAX_STACK_DEPTH)
-		entry->ip[entry->nr++] = ip;
-}
+	.event_init	= x86_pmu_event_init,
 
 
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
+	.add		= x86_pmu_add,
+	.del		= x86_pmu_del,
+	.start		= x86_pmu_start,
+	.stop		= x86_pmu_stop,
+	.read		= x86_pmu_read,
 
 
+	.start_txn	= x86_pmu_start_txn,
+	.cancel_txn	= x86_pmu_cancel_txn,
+	.commit_txn	= x86_pmu_commit_txn,
+};
+
+/*
+ * callchain support
+ */
 
 
 static void
 static void
 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
@@ -1645,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 {
 {
 	struct perf_callchain_entry *entry = data;
 	struct perf_callchain_entry *entry = data;
 
 
-	callchain_store(entry, addr);
+	perf_callchain_store(entry, addr);
 }
 }
 
 
 static const struct stacktrace_ops backtrace_ops = {
 static const struct stacktrace_ops backtrace_ops = {
@@ -1656,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = {
 	.walk_stack		= print_context_stack_bp,
 	.walk_stack		= print_context_stack_bp,
 };
 };
 
 
-static void
-perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 {
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
-	callchain_store(entry, regs->ip);
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* TODO: We don't support guest os callchain now */
+		return;
+	}
+
+	perf_callchain_store(entry, regs->ip);
 
 
 	dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
 	dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
 }
 }
@@ -1689,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 		if (fp < compat_ptr(regs->sp))
 		if (fp < compat_ptr(regs->sp))
 			break;
 			break;
 
 
-		callchain_store(entry, frame.return_address);
+		perf_callchain_store(entry, frame.return_address);
 		fp = compat_ptr(frame.next_frame);
 		fp = compat_ptr(frame.next_frame);
 	}
 	}
 	return 1;
 	return 1;
@@ -1702,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 }
 }
 #endif
 #endif
 
 
-static void
-perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 {
 	struct stack_frame frame;
 	struct stack_frame frame;
 	const void __user *fp;
 	const void __user *fp;
 
 
-	if (!user_mode(regs))
-		regs = task_pt_regs(current);
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* TODO: We don't support guest os callchain now */
+		return;
+	}
 
 
 	fp = (void __user *)regs->bp;
 	fp = (void __user *)regs->bp;
 
 
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, regs->ip);
+	perf_callchain_store(entry, regs->ip);
 
 
 	if (perf_callchain_user32(regs, entry))
 	if (perf_callchain_user32(regs, entry))
 		return;
 		return;
@@ -1731,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 		if ((unsigned long)fp < regs->sp)
 		if ((unsigned long)fp < regs->sp)
 			break;
 			break;
 
 
-		callchain_store(entry, frame.return_address);
+		perf_callchain_store(entry, frame.return_address);
 		fp = frame.next_frame;
 		fp = frame.next_frame;
 	}
 	}
 }
 }
 
 
-static void
-perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	int is_user;
-
-	if (!regs)
-		return;
-
-	is_user = user_mode(regs);
-
-	if (is_user && current->state != TASK_RUNNING)
-		return;
-
-	if (!is_user)
-		perf_callchain_kernel(regs, entry);
-
-	if (current->mm)
-		perf_callchain_user(regs, entry);
-}
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-	struct perf_callchain_entry *entry;
-
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-		/* TODO: We don't support guest os callchain now */
-		return NULL;
-	}
-
-	if (in_nmi())
-		entry = &__get_cpu_var(pmc_nmi_entry);
-	else
-		entry = &__get_cpu_var(pmc_irq_entry);
-
-	entry->nr = 0;
-
-	perf_do_callchain(regs, entry);
-
-	return entry;
-}
-
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
 {
 	unsigned long ip;
 	unsigned long ip;

+ 2 - 2
arch/x86/kernel/cpu/perf_event_amd.c

@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids
  [ C(DTLB) ] = {
  [ C(DTLB) ] = {
 	[ C(OP_READ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
 		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
+		[ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
 	},
 	},
 	[ C(OP_WRITE) ] = {
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = 0,
 		[ C(RESULT_ACCESS) ] = 0,
@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids
  [ C(ITLB) ] = {
  [ C(ITLB) ] = {
 	[ C(OP_READ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
 		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
-		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
+		[ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
 	},
 	},
 	[ C(OP_WRITE) ] = {
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = -1,
 		[ C(RESULT_ACCESS) ] = -1,

+ 4 - 4
arch/x86/kernel/cpu/perf_event_intel.c

@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	struct cpu_hw_events *cpuc;
 	struct cpu_hw_events *cpuc;
 	int bit, loops;
 	int bit, loops;
 	u64 status;
 	u64 status;
-	int handled = 0;
+	int handled;
 
 
 	perf_sample_data_init(&data, 0);
 	perf_sample_data_init(&data, 0);
 
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
 
 	intel_pmu_disable_all();
 	intel_pmu_disable_all();
-	intel_pmu_drain_bts_buffer();
+	handled = intel_pmu_drain_bts_buffer();
 	status = intel_pmu_get_status();
 	status = intel_pmu_get_status();
 	if (!status) {
 	if (!status) {
 		intel_pmu_enable_all(0);
 		intel_pmu_enable_all(0);
-		return 0;
+		return handled;
 	}
 	}
 
 
 	loops = 0;
 	loops = 0;
@@ -763,7 +763,7 @@ again:
 		data.period = event->hw.last_period;
 		data.period = event->hw.last_period;
 
 
 		if (perf_event_overflow(event, 1, &data, regs))
 		if (perf_event_overflow(event, 1, &data, regs))
-			x86_pmu_stop(event);
+			x86_pmu_stop(event, 0);
 	}
 	}
 
 
 	/*
 	/*

+ 7 - 6
arch/x86/kernel/cpu/perf_event_intel_ds.c

@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void)
 	update_debugctlmsr(debugctlmsr);
 	update_debugctlmsr(debugctlmsr);
 }
 }
 
 
-static void intel_pmu_drain_bts_buffer(void)
+static int intel_pmu_drain_bts_buffer(void)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
 	struct debug_store *ds = cpuc->ds;
@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void)
 	struct pt_regs regs;
 	struct pt_regs regs;
 
 
 	if (!event)
 	if (!event)
-		return;
+		return 0;
 
 
 	if (!ds)
 	if (!ds)
-		return;
+		return 0;
 
 
 	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
 	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
 	top = (struct bts_record *)(unsigned long)ds->bts_index;
 	top = (struct bts_record *)(unsigned long)ds->bts_index;
 
 
 	if (top <= at)
 	if (top <= at)
-		return;
+		return 0;
 
 
 	ds->bts_index = ds->bts_buffer_base;
 	ds->bts_index = ds->bts_buffer_base;
 
 
@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void)
 	perf_prepare_sample(&header, &data, event, &regs);
 	perf_prepare_sample(&header, &data, event, &regs);
 
 
 	if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
 	if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
-		return;
+		return 1;
 
 
 	for (; at < top; at++) {
 	for (; at < top; at++) {
 		data.ip		= at->from;
 		data.ip		= at->from;
@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void)
 	/* There's new data available. */
 	/* There's new data available. */
 	event->hw.interrupts++;
 	event->hw.interrupts++;
 	event->pending_kill = POLL_IN;
 	event->pending_kill = POLL_IN;
+	return 1;
 }
 }
 
 
 /*
 /*
@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 		regs.flags &= ~PERF_EFLAGS_EXACT;
 		regs.flags &= ~PERF_EFLAGS_EXACT;
 
 
 	if (perf_event_overflow(event, 1, &data, &regs))
 	if (perf_event_overflow(event, 1, &data, &regs))
-		x86_pmu_stop(event);
+		x86_pmu_stop(event, 0);
 }
 }
 
 
 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)

+ 268 - 24
arch/x86/kernel/cpu/perf_event_p4.c

@@ -18,6 +18,8 @@
 struct p4_event_bind {
 struct p4_event_bind {
 	unsigned int opcode;			/* Event code and ESCR selector */
 	unsigned int opcode;			/* Event code and ESCR selector */
 	unsigned int escr_msr[2];		/* ESCR MSR for this event */
 	unsigned int escr_msr[2];		/* ESCR MSR for this event */
+	unsigned int escr_emask;		/* valid ESCR EventMask bits */
+	unsigned int shared;			/* event is shared across threads */
 	char cntr[2][P4_CNTR_LIMIT];		/* counter index (offset), -1 on abscence */
 	char cntr[2][P4_CNTR_LIMIT];		/* counter index (offset), -1 on abscence */
 };
 };
 
 
@@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = {
 	[P4_EVENT_TC_DELIVER_MODE] = {
 	[P4_EVENT_TC_DELIVER_MODE] = {
 		.opcode		= P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
 		.opcode		= P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
 		.escr_msr	= { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
 		.escr_msr	= { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
+		.shared		= 1,
 		.cntr		= { {4, 5, -1}, {6, 7, -1} },
 		.cntr		= { {4, 5, -1}, {6, 7, -1} },
 	},
 	},
 	[P4_EVENT_BPU_FETCH_REQUEST] = {
 	[P4_EVENT_BPU_FETCH_REQUEST] = {
 		.opcode		= P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
 		.opcode		= P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
 		.escr_msr	= { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
 		.escr_msr	= { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 	},
 	},
 	[P4_EVENT_ITLB_REFERENCE] = {
 	[P4_EVENT_ITLB_REFERENCE] = {
 		.opcode		= P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
 		.opcode		= P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
 		.escr_msr	= { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
 		.escr_msr	= { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 	},
 	},
 	[P4_EVENT_MEMORY_CANCEL] = {
 	[P4_EVENT_MEMORY_CANCEL] = {
 		.opcode		= P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
 		.opcode		= P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
 		.escr_msr	= { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
 		.escr_msr	= { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_MEMORY_COMPLETE] = {
 	[P4_EVENT_MEMORY_COMPLETE] = {
 		.opcode		= P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
 		.opcode		= P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
 		.escr_msr	= { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
 		.escr_msr	= { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_LOAD_PORT_REPLAY] = {
 	[P4_EVENT_LOAD_PORT_REPLAY] = {
 		.opcode		= P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
 		.opcode		= P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
 		.escr_msr	= { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
 		.escr_msr	= { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_STORE_PORT_REPLAY] = {
 	[P4_EVENT_STORE_PORT_REPLAY] = {
 		.opcode		= P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
 		.opcode		= P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
 		.escr_msr	= { MSR_P4_SAAT_ESCR0 ,  MSR_P4_SAAT_ESCR1 },
 		.escr_msr	= { MSR_P4_SAAT_ESCR0 ,  MSR_P4_SAAT_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_MOB_LOAD_REPLAY] = {
 	[P4_EVENT_MOB_LOAD_REPLAY] = {
 		.opcode		= P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
 		.opcode		= P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
 		.escr_msr	= { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
 		.escr_msr	= { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 	},
 	},
 	[P4_EVENT_PAGE_WALK_TYPE] = {
 	[P4_EVENT_PAGE_WALK_TYPE] = {
 		.opcode		= P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
 		.opcode		= P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
 		.escr_msr	= { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
 		.escr_msr	= { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
+		.shared		= 1,
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 	},
 	},
 	[P4_EVENT_BSQ_CACHE_REFERENCE] = {
 	[P4_EVENT_BSQ_CACHE_REFERENCE] = {
 		.opcode		= P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
 		.opcode		= P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
 		.escr_msr	= { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
 		.escr_msr	= { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 	},
 	},
 	[P4_EVENT_IOQ_ALLOCATION] = {
 	[P4_EVENT_IOQ_ALLOCATION] = {
 		.opcode		= P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
 		.opcode		= P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 	},
 	},
 	[P4_EVENT_IOQ_ACTIVE_ENTRIES] = {	/* shared ESCR */
 	[P4_EVENT_IOQ_ACTIVE_ENTRIES] = {	/* shared ESCR */
 		.opcode		= P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
 		.opcode		= P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
 		.escr_msr	= { MSR_P4_FSB_ESCR1,  MSR_P4_FSB_ESCR1 },
 		.escr_msr	= { MSR_P4_FSB_ESCR1,  MSR_P4_FSB_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
 		.cntr		= { {2, -1, -1}, {3, -1, -1} },
 		.cntr		= { {2, -1, -1}, {3, -1, -1} },
 	},
 	},
 	[P4_EVENT_FSB_DATA_ACTIVITY] = {
 	[P4_EVENT_FSB_DATA_ACTIVITY] = {
 		.opcode		= P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
 		.opcode		= P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
+		.shared		= 1,
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 	},
 	},
 	[P4_EVENT_BSQ_ALLOCATION] = {		/* shared ESCR, broken CCCR1 */
 	[P4_EVENT_BSQ_ALLOCATION] = {		/* shared ESCR, broken CCCR1 */
 		.opcode		= P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
 		.opcode		= P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
 		.escr_msr	= { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
 		.escr_msr	= { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
 		.cntr		= { {0, -1, -1}, {1, -1, -1} },
 		.cntr		= { {0, -1, -1}, {1, -1, -1} },
 	},
 	},
 	[P4_EVENT_BSQ_ACTIVE_ENTRIES] = {	/* shared ESCR */
 	[P4_EVENT_BSQ_ACTIVE_ENTRIES] = {	/* shared ESCR */
 		.opcode		= P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
 		.opcode		= P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
 		.escr_msr	= { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
 		.escr_msr	= { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
 		.cntr		= { {2, -1, -1}, {3, -1, -1} },
 		.cntr		= { {2, -1, -1}, {3, -1, -1} },
 	},
 	},
 	[P4_EVENT_SSE_INPUT_ASSIST] = {
 	[P4_EVENT_SSE_INPUT_ASSIST] = {
 		.opcode		= P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
 		.opcode		= P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
+		.shared		= 1,
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_PACKED_SP_UOP] = {
 	[P4_EVENT_PACKED_SP_UOP] = {
 		.opcode		= P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
 		.opcode		= P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
+		.shared		= 1,
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_PACKED_DP_UOP] = {
 	[P4_EVENT_PACKED_DP_UOP] = {
 		.opcode		= P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
 		.opcode		= P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
+		.shared		= 1,
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_SCALAR_SP_UOP] = {
 	[P4_EVENT_SCALAR_SP_UOP] = {
 		.opcode		= P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
 		.opcode		= P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
+		.shared		= 1,
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_SCALAR_DP_UOP] = {
 	[P4_EVENT_SCALAR_DP_UOP] = {
 		.opcode		= P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
 		.opcode		= P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
+		.shared		= 1,
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_64BIT_MMX_UOP] = {
 	[P4_EVENT_64BIT_MMX_UOP] = {
 		.opcode		= P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
 		.opcode		= P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
+		.shared		= 1,
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_128BIT_MMX_UOP] = {
 	[P4_EVENT_128BIT_MMX_UOP] = {
 		.opcode		= P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
 		.opcode		= P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
+		.shared		= 1,
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_X87_FP_UOP] = {
 	[P4_EVENT_X87_FP_UOP] = {
 		.opcode		= P4_OPCODE(P4_EVENT_X87_FP_UOP),
 		.opcode		= P4_OPCODE(P4_EVENT_X87_FP_UOP),
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
+		.shared		= 1,
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_TC_MISC] = {
 	[P4_EVENT_TC_MISC] = {
 		.opcode		= P4_OPCODE(P4_EVENT_TC_MISC),
 		.opcode		= P4_OPCODE(P4_EVENT_TC_MISC),
 		.escr_msr	= { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
 		.escr_msr	= { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
 		.cntr		= { {4, 5, -1}, {6, 7, -1} },
 		.cntr		= { {4, 5, -1}, {6, 7, -1} },
 	},
 	},
 	[P4_EVENT_GLOBAL_POWER_EVENTS] = {
 	[P4_EVENT_GLOBAL_POWER_EVENTS] = {
 		.opcode		= P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
 		.opcode		= P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 	},
 	},
 	[P4_EVENT_TC_MS_XFER] = {
 	[P4_EVENT_TC_MS_XFER] = {
 		.opcode		= P4_OPCODE(P4_EVENT_TC_MS_XFER),
 		.opcode		= P4_OPCODE(P4_EVENT_TC_MS_XFER),
 		.escr_msr	= { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
 		.escr_msr	= { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
 		.cntr		= { {4, 5, -1}, {6, 7, -1} },
 		.cntr		= { {4, 5, -1}, {6, 7, -1} },
 	},
 	},
 	[P4_EVENT_UOP_QUEUE_WRITES] = {
 	[P4_EVENT_UOP_QUEUE_WRITES] = {
 		.opcode		= P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
 		.opcode		= P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
 		.escr_msr	= { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
 		.escr_msr	= { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
 		.cntr		= { {4, 5, -1}, {6, 7, -1} },
 		.cntr		= { {4, 5, -1}, {6, 7, -1} },
 	},
 	},
 	[P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
 	[P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
 		.opcode		= P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
 		.opcode		= P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
 		.escr_msr	= { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
 		.escr_msr	= { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
 		.cntr		= { {4, 5, -1}, {6, 7, -1} },
 		.cntr		= { {4, 5, -1}, {6, 7, -1} },
 	},
 	},
 	[P4_EVENT_RETIRED_BRANCH_TYPE] = {
 	[P4_EVENT_RETIRED_BRANCH_TYPE] = {
 		.opcode		= P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
 		.opcode		= P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
 		.escr_msr	= { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
 		.escr_msr	= { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL)	|
+			P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
 		.cntr		= { {4, 5, -1}, {6, 7, -1} },
 		.cntr		= { {4, 5, -1}, {6, 7, -1} },
 	},
 	},
 	[P4_EVENT_RESOURCE_STALL] = {
 	[P4_EVENT_RESOURCE_STALL] = {
 		.opcode		= P4_OPCODE(P4_EVENT_RESOURCE_STALL),
 		.opcode		= P4_OPCODE(P4_EVENT_RESOURCE_STALL),
 		.escr_msr	= { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
 		.escr_msr	= { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	},
 	[P4_EVENT_WC_BUFFER] = {
 	[P4_EVENT_WC_BUFFER] = {
 		.opcode		= P4_OPCODE(P4_EVENT_WC_BUFFER),
 		.opcode		= P4_OPCODE(P4_EVENT_WC_BUFFER),
 		.escr_msr	= { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
 		.escr_msr	= { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
+		.shared		= 1,
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 		.cntr		= { {8, 9, -1}, {10, 11, -1} },
 	},
 	},
 	[P4_EVENT_B2B_CYCLES] = {
 	[P4_EVENT_B2B_CYCLES] = {
 		.opcode		= P4_OPCODE(P4_EVENT_B2B_CYCLES),
 		.opcode		= P4_OPCODE(P4_EVENT_B2B_CYCLES),
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+		.escr_emask	= 0,
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 	},
 	},
 	[P4_EVENT_BNR] = {
 	[P4_EVENT_BNR] = {
 		.opcode		= P4_OPCODE(P4_EVENT_BNR),
 		.opcode		= P4_OPCODE(P4_EVENT_BNR),
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+		.escr_emask	= 0,
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 	},
 	},
 	[P4_EVENT_SNOOP] = {
 	[P4_EVENT_SNOOP] = {
 		.opcode		= P4_OPCODE(P4_EVENT_SNOOP),
 		.opcode		= P4_OPCODE(P4_EVENT_SNOOP),
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+		.escr_emask	= 0,
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 	},
 	},
 	[P4_EVENT_RESPONSE] = {
 	[P4_EVENT_RESPONSE] = {
 		.opcode		= P4_OPCODE(P4_EVENT_RESPONSE),
 		.opcode		= P4_OPCODE(P4_EVENT_RESPONSE),
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+		.escr_emask	= 0,
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 		.cntr		= { {0, -1, -1}, {2, -1, -1} },
 	},
 	},
 	[P4_EVENT_FRONT_END_EVENT] = {
 	[P4_EVENT_FRONT_END_EVENT] = {
 		.opcode		= P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
 		.opcode		= P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
 		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
 		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	},
 	[P4_EVENT_EXECUTION_EVENT] = {
 	[P4_EVENT_EXECUTION_EVENT] = {
 		.opcode		= P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
 		.opcode		= P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
 		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
 		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	},
 	[P4_EVENT_REPLAY_EVENT] = {
 	[P4_EVENT_REPLAY_EVENT] = {
 		.opcode		= P4_OPCODE(P4_EVENT_REPLAY_EVENT),
 		.opcode		= P4_OPCODE(P4_EVENT_REPLAY_EVENT),
 		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
 		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	},
 	[P4_EVENT_INSTR_RETIRED] = {
 	[P4_EVENT_INSTR_RETIRED] = {
 		.opcode		= P4_OPCODE(P4_EVENT_INSTR_RETIRED),
 		.opcode		= P4_OPCODE(P4_EVENT_INSTR_RETIRED),
 		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
 		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	},
 	[P4_EVENT_UOPS_RETIRED] = {
 	[P4_EVENT_UOPS_RETIRED] = {
 		.opcode		= P4_OPCODE(P4_EVENT_UOPS_RETIRED),
 		.opcode		= P4_OPCODE(P4_EVENT_UOPS_RETIRED),
 		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
 		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	},
 	[P4_EVENT_UOP_TYPE] = {
 	[P4_EVENT_UOP_TYPE] = {
 		.opcode		= P4_OPCODE(P4_EVENT_UOP_TYPE),
 		.opcode		= P4_OPCODE(P4_EVENT_UOP_TYPE),
 		.escr_msr	= { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
 		.escr_msr	= { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	},
 	[P4_EVENT_BRANCH_RETIRED] = {
 	[P4_EVENT_BRANCH_RETIRED] = {
 		.opcode		= P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
 		.opcode		= P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
 		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
 		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	},
 	[P4_EVENT_MISPRED_BRANCH_RETIRED] = {
 	[P4_EVENT_MISPRED_BRANCH_RETIRED] = {
 		.opcode		= P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
 		.opcode		= P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
 		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
 		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+		.escr_emask	=
+		P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	},
 	[P4_EVENT_X87_ASSIST] = {
 	[P4_EVENT_X87_ASSIST] = {
 		.opcode		= P4_OPCODE(P4_EVENT_X87_ASSIST),
 		.opcode		= P4_OPCODE(P4_EVENT_X87_ASSIST),
 		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
 		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU)			|
+			P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	},
 	[P4_EVENT_MACHINE_CLEAR] = {
 	[P4_EVENT_MACHINE_CLEAR] = {
 		.opcode		= P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
 		.opcode		= P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
 		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
 		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	},
 	[P4_EVENT_INSTR_COMPLETED] = {
 	[P4_EVENT_INSTR_COMPLETED] = {
 		.opcode		= P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
 		.opcode		= P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
 		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
 		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+		.escr_emask	=
+			P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS)		|
+			P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	},
 };
 };
@@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event)
 	return config;
 	return config;
 }
 }
 
 
+/* check cpu model specifics */
+static bool p4_event_match_cpu_model(unsigned int event_idx)
+{
+	/* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
+	if (event_idx == P4_EVENT_INSTR_COMPLETED) {
+		if (boot_cpu_data.x86_model != 3 &&
+			boot_cpu_data.x86_model != 4 &&
+			boot_cpu_data.x86_model != 6)
+			return false;
+	}
+
+	/*
+	 * For info
+	 * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
+	 */
+
+	return true;
+}
+
 static int p4_validate_raw_event(struct perf_event *event)
 static int p4_validate_raw_event(struct perf_event *event)
 {
 {
-	unsigned int v;
+	unsigned int v, emask;
 
 
-	/* user data may have out-of-bound event index */
+	/* User data may have out-of-bound event index */
 	v = p4_config_unpack_event(event->attr.config);
 	v = p4_config_unpack_event(event->attr.config);
-	if (v >= ARRAY_SIZE(p4_event_bind_map)) {
-		pr_warning("P4 PMU: Unknown event code: %d\n", v);
+	if (v >= ARRAY_SIZE(p4_event_bind_map))
+		return -EINVAL;
+
+	/* It may be unsupported: */
+	if (!p4_event_match_cpu_model(v))
 		return -EINVAL;
 		return -EINVAL;
+
+	/*
+	 * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
+	 * in Architectural Performance Monitoring, it means not
+	 * on _which_ logical cpu to count but rather _when_, ie it
+	 * depends on logical cpu state -- count event if one cpu active,
+	 * none, both or any, so we just allow user to pass any value
+	 * desired.
+	 *
+	 * In turn we always set Tx_OS/Tx_USR bits bound to logical
+	 * cpu without their propagation to another cpu
+	 */
+
+	/*
+	 * if an event is shared accross the logical threads
+	 * the user needs special permissions to be able to use it
+	 */
+	if (p4_event_bind_map[v].shared) {
+		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
 	}
 	}
 
 
+	/* ESCR EventMask bits may be invalid */
+	emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
+	if (emask & ~p4_event_bind_map[v].escr_emask)
+		return -EINVAL;
+
 	/*
 	/*
-	 * it may have some screwed PEBS bits
+	 * it may have some invalid PEBS bits
 	 */
 	 */
-	if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
-		pr_warning("P4 PMU: PEBS are not supported yet\n");
+	if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
 		return -EINVAL;
 		return -EINVAL;
-	}
+
 	v = p4_config_unpack_metric(event->attr.config);
 	v = p4_config_unpack_metric(event->attr.config);
-	if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
-		pr_warning("P4 PMU: Unknown metric code: %d\n", v);
+	if (v >= ARRAY_SIZE(p4_pebs_bind_map))
 		return -EINVAL;
 		return -EINVAL;
-	}
 
 
 	return 0;
 	return 0;
 }
 }
@@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event)
 
 
 	if (event->attr.type == PERF_TYPE_RAW) {
 	if (event->attr.type == PERF_TYPE_RAW) {
 
 
+		/*
+		 * Clear bits we reserve to be managed by kernel itself
+		 * and never allowed from a user space
+		 */
+		 event->attr.config &= P4_CONFIG_MASK;
+
 		rc = p4_validate_raw_event(event);
 		rc = p4_validate_raw_event(event);
 		if (rc)
 		if (rc)
 			goto out;
 			goto out;
 
 
 		/*
 		/*
-		 * We don't control raw events so it's up to the caller
-		 * to pass sane values (and we don't count the thread number
-		 * on HT machine but allow HT-compatible specifics to be
-		 * passed on)
-		 *
 		 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
 		 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
 		 * bits since we keep additional info here (for cache events and etc)
 		 * bits since we keep additional info here (for cache events and etc)
-		 *
-		 * XXX: HT wide things should check perf_paranoid_cpu() &&
-		 *      CAP_SYS_ADMIN
 		 */
 		 */
-		event->hw.config |= event->attr.config &
-			(p4_config_pack_escr(P4_ESCR_MASK_HT) |
-			 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
-
-		event->hw.config &= ~P4_CCCR_FORCE_OVF;
+		event->hw.config |= event->attr.config;
 	}
 	}
 
 
 	rc = x86_setup_perfctr(event);
 	rc = x86_setup_perfctr(event);

+ 3 - 3
arch/x86/kernel/entry_64.S

@@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \
 apicinterrupt SPURIOUS_APIC_VECTOR \
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
 	spurious_interrupt smp_spurious_interrupt
 
 
-#ifdef CONFIG_PERF_EVENTS
-apicinterrupt LOCAL_PENDING_VECTOR \
-	perf_pending_interrupt smp_perf_pending_interrupt
+#ifdef CONFIG_IRQ_WORK
+apicinterrupt IRQ_WORK_VECTOR \
+	irq_work_interrupt smp_irq_work_interrupt
 #endif
 #endif
 
 
 /*
 /*

+ 1 - 62
arch/x86/kernel/ftrace.c

@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	return mod_code_status;
 	return mod_code_status;
 }
 }
 
 
-
-
-
-static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
-
 static unsigned char *ftrace_nop_replace(void)
 static unsigned char *ftrace_nop_replace(void)
 {
 {
-	return ftrace_nop;
+	return ideal_nop5;
 }
 }
 
 
 static int
 static int
@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 
 
 int __init ftrace_dyn_arch_init(void *data)
 int __init ftrace_dyn_arch_init(void *data)
 {
 {
-	extern const unsigned char ftrace_test_p6nop[];
-	extern const unsigned char ftrace_test_nop5[];
-	extern const unsigned char ftrace_test_jmp[];
-	int faulted = 0;
-
-	/*
-	 * There is no good nop for all x86 archs.
-	 * We will default to using the P6_NOP5, but first we
-	 * will test to make sure that the nop will actually
-	 * work on this CPU. If it faults, we will then
-	 * go to a lesser efficient 5 byte nop. If that fails
-	 * we then just use a jmp as our nop. This isn't the most
-	 * efficient nop, but we can not use a multi part nop
-	 * since we would then risk being preempted in the middle
-	 * of that nop, and if we enabled tracing then, it might
-	 * cause a system crash.
-	 *
-	 * TODO: check the cpuid to determine the best nop.
-	 */
-	asm volatile (
-		"ftrace_test_jmp:"
-		"jmp ftrace_test_p6nop\n"
-		"nop\n"
-		"nop\n"
-		"nop\n"  /* 2 byte jmp + 3 bytes */
-		"ftrace_test_p6nop:"
-		P6_NOP5
-		"jmp 1f\n"
-		"ftrace_test_nop5:"
-		".byte 0x66,0x66,0x66,0x66,0x90\n"
-		"1:"
-		".section .fixup, \"ax\"\n"
-		"2:	movl $1, %0\n"
-		"	jmp ftrace_test_nop5\n"
-		"3:	movl $2, %0\n"
-		"	jmp 1b\n"
-		".previous\n"
-		_ASM_EXTABLE(ftrace_test_p6nop, 2b)
-		_ASM_EXTABLE(ftrace_test_nop5, 3b)
-		: "=r"(faulted) : "0" (faulted));
-
-	switch (faulted) {
-	case 0:
-		pr_info("converting mcount calls to 0f 1f 44 00 00\n");
-		memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
-		break;
-	case 1:
-		pr_info("converting mcount calls to 66 66 66 66 90\n");
-		memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
-		break;
-	case 2:
-		pr_info("converting mcount calls to jmp . + 5\n");
-		memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
-		break;
-	}
-
 	/* The return code is retured via data */
 	/* The return code is retured via data */
 	*(unsigned long *)data = 0;
 	*(unsigned long *)data = 0;
 
 

+ 4 - 4
arch/x86/kernel/irq.c

@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
 		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
 	seq_printf(p, "  Performance monitoring interrupts\n");
 	seq_printf(p, "  Performance monitoring interrupts\n");
-	seq_printf(p, "%*s: ", prec, "PND");
+	seq_printf(p, "%*s: ", prec, "IWI");
 	for_each_online_cpu(j)
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
-	seq_printf(p, "  Performance pending work\n");
+		seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
+	seq_printf(p, "  IRQ work interrupts\n");
 #endif
 #endif
 	if (x86_platform_ipi_callback) {
 	if (x86_platform_ipi_callback) {
 		seq_printf(p, "%*s: ", prec, "PLT");
 		seq_printf(p, "%*s: ", prec, "PLT");
@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->apic_timer_irqs;
 	sum += irq_stats(cpu)->apic_timer_irqs;
 	sum += irq_stats(cpu)->irq_spurious_count;
 	sum += irq_stats(cpu)->irq_spurious_count;
 	sum += irq_stats(cpu)->apic_perf_irqs;
 	sum += irq_stats(cpu)->apic_perf_irqs;
-	sum += irq_stats(cpu)->apic_pending_irqs;
+	sum += irq_stats(cpu)->apic_irq_work_irqs;
 #endif
 #endif
 	if (x86_platform_ipi_callback)
 	if (x86_platform_ipi_callback)
 		sum += irq_stats(cpu)->x86_platform_ipis;
 		sum += irq_stats(cpu)->x86_platform_ipis;

+ 30 - 0
arch/x86/kernel/irq_work.c

@@ -0,0 +1,30 @@
+/*
+ * x86 specific code for irq_work
+ *
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+#include <asm/apic.h>
+
+void smp_irq_work_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+	ack_APIC_irq();
+	inc_irq_stat(apic_irq_work_irqs);
+	irq_work_run();
+	irq_exit();
+}
+
+void arch_irq_work_raise(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (!cpu_has_apic)
+		return;
+
+	apic->send_IPI_self(IRQ_WORK_VECTOR);
+	apic_wait_icr_idle();
+#endif
+}

+ 3 - 3
arch/x86/kernel/irqinit.c

@@ -224,9 +224,9 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 
 
-	/* Performance monitoring interrupts: */
-# ifdef CONFIG_PERF_EVENTS
-	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
+	/* IRQ work interrupts: */
+# ifdef CONFIG_IRQ_WORK
+	alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
 # endif
 # endif
 
 
 #endif
 #endif

+ 50 - 0
arch/x86/kernel/jump_label.c

@@ -0,0 +1,50 @@
+/*
+ * jump label x86 support
+ *
+ * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
+ *
+ */
+#include <linux/jump_label.h>
+#include <linux/memory.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/cpu.h>
+#include <asm/kprobes.h>
+#include <asm/alternative.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+union jump_code_union {
+	char code[JUMP_LABEL_NOP_SIZE];
+	struct {
+		char jump;
+		int offset;
+	} __attribute__((packed));
+};
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	union jump_code_union code;
+
+	if (type == JUMP_LABEL_ENABLE) {
+		code.jump = 0xe9;
+		code.offset = entry->target -
+				(entry->code + JUMP_LABEL_NOP_SIZE);
+	} else
+		memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
+	get_online_cpus();
+	mutex_lock(&text_mutex);
+	text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+	mutex_unlock(&text_mutex);
+	put_online_cpus();
+}
+
+void arch_jump_label_text_poke_early(jump_label_t addr)
+{
+	text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
+}
+
+#endif

+ 5 - 9
arch/x86/kernel/kprobes.c

@@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
 	return 0;
 	return 0;
 }
 }
 
 
-/* Dummy buffers for kallsyms_lookup */
-static char __dummy_buf[KSYM_NAME_LEN];
-
 /* Check if paddr is at an instruction boundary */
 /* Check if paddr is at an instruction boundary */
 static int __kprobes can_probe(unsigned long paddr)
 static int __kprobes can_probe(unsigned long paddr)
 {
 {
@@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr)
 	struct insn insn;
 	struct insn insn;
 	kprobe_opcode_t buf[MAX_INSN_SIZE];
 	kprobe_opcode_t buf[MAX_INSN_SIZE];
 
 
-	if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
+	if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
 		return 0;
 		return 0;
 
 
 	/* Decode instructions */
 	/* Decode instructions */
@@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
 	*(unsigned long *)addr = val;
 	*(unsigned long *)addr = val;
 }
 }
 
 
-void __kprobes kprobes_optinsn_template_holder(void)
+static void __used __kprobes kprobes_optinsn_template_holder(void)
 {
 {
 	asm volatile (
 	asm volatile (
 			".global optprobe_template_entry\n"
 			".global optprobe_template_entry\n"
@@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
 	}
 	}
 	/* Check whether the address range is reserved */
 	/* Check whether the address range is reserved */
 	if (ftrace_text_reserved(src, src + len - 1) ||
 	if (ftrace_text_reserved(src, src + len - 1) ||
-	    alternatives_text_reserved(src, src + len - 1))
+	    alternatives_text_reserved(src, src + len - 1) ||
+	    jump_label_text_reserved(src, src + len - 1))
 		return -EBUSY;
 		return -EBUSY;
 
 
 	return len;
 	return len;
@@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr)
 	unsigned long addr, size = 0, offset = 0;
 	unsigned long addr, size = 0, offset = 0;
 	struct insn insn;
 	struct insn insn;
 	kprobe_opcode_t buf[MAX_INSN_SIZE];
 	kprobe_opcode_t buf[MAX_INSN_SIZE];
-	/* Dummy buffers for lookup_symbol_attrs */
-	static char __dummy_buf[KSYM_NAME_LEN];
 
 
 	/* Lookup symbol including addr */
 	/* Lookup symbol including addr */
-	if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf))
+	if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
 		return 0;
 		return 0;
 
 
 	/* Check there is enough space for a relative jump. */
 	/* Check there is enough space for a relative jump. */

+ 3 - 0
arch/x86/kernel/module.c

@@ -239,6 +239,9 @@ int module_finalize(const Elf_Ehdr *hdr,
 		apply_paravirt(pseg, pseg + para->sh_size);
 		apply_paravirt(pseg, pseg + para->sh_size);
 	}
 	}
 
 
+	/* make jump label nops */
+	jump_label_apply_nops(me);
+
 	return 0;
 	return 0;
 }
 }
 
 

+ 6 - 0
arch/x86/kernel/setup.c

@@ -112,6 +112,7 @@
 #include <asm/numa_64.h>
 #include <asm/numa_64.h>
 #endif
 #endif
 #include <asm/mce.h>
 #include <asm/mce.h>
+#include <asm/alternative.h>
 
 
 /*
 /*
  * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
  * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -726,6 +727,7 @@ void __init setup_arch(char **cmdline_p)
 {
 {
 	int acpi = 0;
 	int acpi = 0;
 	int k8 = 0;
 	int k8 = 0;
+	unsigned long flags;
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
@@ -1071,6 +1073,10 @@ void __init setup_arch(char **cmdline_p)
 	x86_init.oem.banner();
 	x86_init.oem.banner();
 
 
 	mcheck_init();
 	mcheck_init();
+
+	local_irq_save(flags);
+	arch_init_ideal_nop5();
+	local_irq_restore(flags);
 }
 }
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32

+ 4 - 0
arch/x86/mm/fault.c

@@ -251,6 +251,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
 	if (!(address >= VMALLOC_START && address < VMALLOC_END))
 	if (!(address >= VMALLOC_START && address < VMALLOC_END))
 		return -1;
 		return -1;
 
 
+	WARN_ON_ONCE(in_nmi());
+
 	/*
 	/*
 	 * Synchronize this task's top level page-table
 	 * Synchronize this task's top level page-table
 	 * with the 'reference' page table.
 	 * with the 'reference' page table.
@@ -369,6 +371,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
 	if (!(address >= VMALLOC_START && address < VMALLOC_END))
 	if (!(address >= VMALLOC_START && address < VMALLOC_END))
 		return -1;
 		return -1;
 
 
+	WARN_ON_ONCE(in_nmi());
+
 	/*
 	/*
 	 * Copy kernel mappings over when needed. This can also
 	 * Copy kernel mappings over when needed. This can also
 	 * happen within a race in page table update. In the later
 	 * happen within a race in page table update. In the later

+ 2 - 0
arch/x86/mm/kmemcheck/kmemcheck.c

@@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
 	if (!pte)
 	if (!pte)
 		return false;
 		return false;
 
 
+	WARN_ON_ONCE(in_nmi());
+
 	if (error_code & 2)
 	if (error_code & 2)
 		kmemcheck_access(regs, address, KMEMCHECK_WRITE);
 		kmemcheck_access(regs, address, KMEMCHECK_WRITE);
 	else
 	else

+ 59 - 11
arch/x86/oprofile/backtrace.c

@@ -14,6 +14,7 @@
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/stacktrace.h>
 #include <asm/stacktrace.h>
+#include <linux/compat.h>
 
 
 static void backtrace_warning_symbol(void *data, char *msg,
 static void backtrace_warning_symbol(void *data, char *msg,
 				     unsigned long symbol)
 				     unsigned long symbol)
@@ -48,14 +49,12 @@ static struct stacktrace_ops backtrace_ops = {
 	.walk_stack	= print_context_stack,
 	.walk_stack	= print_context_stack,
 };
 };
 
 
-struct frame_head {
-	struct frame_head *bp;
-	unsigned long ret;
-} __attribute__((packed));
-
-static struct frame_head *dump_user_backtrace(struct frame_head *head)
+#ifdef CONFIG_COMPAT
+static struct stack_frame_ia32 *
+dump_user_backtrace_32(struct stack_frame_ia32 *head)
 {
 {
-	struct frame_head bufhead[2];
+	struct stack_frame_ia32 bufhead[2];
+	struct stack_frame_ia32 *fp;
 
 
 	/* Also check accessibility of one struct frame_head beyond */
 	/* Also check accessibility of one struct frame_head beyond */
 	if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
 	if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
@@ -63,20 +62,66 @@ static struct frame_head *dump_user_backtrace(struct frame_head *head)
 	if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
 	if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
 		return NULL;
 		return NULL;
 
 
-	oprofile_add_trace(bufhead[0].ret);
+	fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
+
+	oprofile_add_trace(bufhead[0].return_address);
+
+	/* frame pointers should strictly progress back up the stack
+	* (towards higher addresses) */
+	if (head >= fp)
+		return NULL;
+
+	return fp;
+}
+
+static inline int
+x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
+{
+	struct stack_frame_ia32 *head;
+
+	/* User process is 32-bit */
+	if (!current || !test_thread_flag(TIF_IA32))
+		return 0;
+
+	head = (struct stack_frame_ia32 *) regs->bp;
+	while (depth-- && head)
+		head = dump_user_backtrace_32(head);
+
+	return 1;
+}
+
+#else
+static inline int
+x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
+{
+	return 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
+{
+	struct stack_frame bufhead[2];
+
+	/* Also check accessibility of one struct stack_frame beyond */
+	if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
+		return NULL;
+	if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
+		return NULL;
+
+	oprofile_add_trace(bufhead[0].return_address);
 
 
 	/* frame pointers should strictly progress back up the stack
 	/* frame pointers should strictly progress back up the stack
 	 * (towards higher addresses) */
 	 * (towards higher addresses) */
-	if (head >= bufhead[0].bp)
+	if (head >= bufhead[0].next_frame)
 		return NULL;
 		return NULL;
 
 
-	return bufhead[0].bp;
+	return bufhead[0].next_frame;
 }
 }
 
 
 void
 void
 x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
 {
-	struct frame_head *head = (struct frame_head *)frame_pointer(regs);
+	struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
 
 
 	if (!user_mode_vm(regs)) {
 	if (!user_mode_vm(regs)) {
 		unsigned long stack = kernel_stack_pointer(regs);
 		unsigned long stack = kernel_stack_pointer(regs);
@@ -86,6 +131,9 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 		return;
 		return;
 	}
 	}
 
 
+	if (x86_backtrace_32(regs, depth))
+		return;
+
 	while (depth-- && head)
 	while (depth-- && head)
 		head = dump_user_backtrace(head);
 		head = dump_user_backtrace(head);
 }
 }

+ 1 - 8
arch/x86/oprofile/nmi_int.c

@@ -695,9 +695,6 @@ static int __init ppro_init(char **cpu_type)
 	return 1;
 	return 1;
 }
 }
 
 
-/* in order to get sysfs right */
-static int using_nmi;
-
 int __init op_nmi_init(struct oprofile_operations *ops)
 int __init op_nmi_init(struct oprofile_operations *ops)
 {
 {
 	__u8 vendor = boot_cpu_data.x86_vendor;
 	__u8 vendor = boot_cpu_data.x86_vendor;
@@ -705,8 +702,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 	char *cpu_type = NULL;
 	char *cpu_type = NULL;
 	int ret = 0;
 	int ret = 0;
 
 
-	using_nmi = 0;
-
 	if (!cpu_has_apic)
 	if (!cpu_has_apic)
 		return -ENODEV;
 		return -ENODEV;
 
 
@@ -790,13 +785,11 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
-	using_nmi = 1;
 	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
 	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
 	return 0;
 	return 0;
 }
 }
 
 
 void op_nmi_exit(void)
 void op_nmi_exit(void)
 {
 {
-	if (using_nmi)
-		exit_sysfs();
+	exit_sysfs();
 }
 }

+ 8 - 24
drivers/oprofile/oprof.c

@@ -225,26 +225,17 @@ post_sync:
 	mutex_unlock(&start_mutex);
 	mutex_unlock(&start_mutex);
 }
 }
 
 
-int oprofile_set_backtrace(unsigned long val)
+int oprofile_set_ulong(unsigned long *addr, unsigned long val)
 {
 {
-	int err = 0;
+	int err = -EBUSY;
 
 
 	mutex_lock(&start_mutex);
 	mutex_lock(&start_mutex);
-
-	if (oprofile_started) {
-		err = -EBUSY;
-		goto out;
-	}
-
-	if (!oprofile_ops.backtrace) {
-		err = -EINVAL;
-		goto out;
+	if (!oprofile_started) {
+		*addr = val;
+		err = 0;
 	}
 	}
-
-	oprofile_backtrace_depth = val;
-
-out:
 	mutex_unlock(&start_mutex);
 	mutex_unlock(&start_mutex);
+
 	return err;
 	return err;
 }
 }
 
 
@@ -257,16 +248,9 @@ static int __init oprofile_init(void)
 		printk(KERN_INFO "oprofile: using timer interrupt.\n");
 		printk(KERN_INFO "oprofile: using timer interrupt.\n");
 		err = oprofile_timer_init(&oprofile_ops);
 		err = oprofile_timer_init(&oprofile_ops);
 		if (err)
 		if (err)
-			goto out_arch;
+			return err;
 	}
 	}
-	err = oprofilefs_register();
-	if (err)
-		goto out_arch;
-	return 0;
-
-out_arch:
-	oprofile_arch_exit();
-	return err;
+	return oprofilefs_register();
 }
 }
 
 
 
 

+ 1 - 1
drivers/oprofile/oprof.h

@@ -37,7 +37,7 @@ void oprofile_create_files(struct super_block *sb, struct dentry *root);
 int oprofile_timer_init(struct oprofile_operations *ops);
 int oprofile_timer_init(struct oprofile_operations *ops);
 void oprofile_timer_exit(void);
 void oprofile_timer_exit(void);
 
 
-int oprofile_set_backtrace(unsigned long depth);
+int oprofile_set_ulong(unsigned long *addr, unsigned long val);
 int oprofile_set_timeout(unsigned long time);
 int oprofile_set_timeout(unsigned long time);
 
 
 #endif /* OPROF_H */
 #endif /* OPROF_H */

+ 5 - 2
drivers/oprofile/oprofile_files.c

@@ -79,14 +79,17 @@ static ssize_t depth_write(struct file *file, char const __user *buf, size_t cou
 	if (*offset)
 	if (*offset)
 		return -EINVAL;
 		return -EINVAL;
 
 
+	if (!oprofile_ops.backtrace)
+		return -EINVAL;
+
 	retval = oprofilefs_ulong_from_user(&val, buf, count);
 	retval = oprofilefs_ulong_from_user(&val, buf, count);
 	if (retval)
 	if (retval)
 		return retval;
 		return retval;
 
 
-	retval = oprofile_set_backtrace(val);
-
+	retval = oprofile_set_ulong(&oprofile_backtrace_depth, val);
 	if (retval)
 	if (retval)
 		return retval;
 		return retval;
+
 	return count;
 	return count;
 }
 }
 
 

+ 328 - 0
drivers/oprofile/oprofile_perf.c

@@ -0,0 +1,328 @@
+/*
+ * Copyright 2010 ARM Ltd.
+ *
+ * Perf-events backend for OProfile.
+ */
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/oprofile.h>
+#include <linux/slab.h>
+
+/*
+ * Per performance monitor configuration as set via oprofilefs.
+ */
+struct op_counter_config {
+	unsigned long count;
+	unsigned long enabled;
+	unsigned long event;
+	unsigned long unit_mask;
+	unsigned long kernel;
+	unsigned long user;
+	struct perf_event_attr attr;
+};
+
+static int oprofile_perf_enabled;
+static DEFINE_MUTEX(oprofile_perf_mutex);
+
+static struct op_counter_config *counter_config;
+static struct perf_event **perf_events[nr_cpumask_bits];
+static int num_counters;
+
+/*
+ * Overflow callback for oprofile.
+ */
+static void op_overflow_handler(struct perf_event *event, int unused,
+			struct perf_sample_data *data, struct pt_regs *regs)
+{
+	int id;
+	u32 cpu = smp_processor_id();
+
+	for (id = 0; id < num_counters; ++id)
+		if (perf_events[cpu][id] == event)
+			break;
+
+	if (id != num_counters)
+		oprofile_add_sample(regs, id);
+	else
+		pr_warning("oprofile: ignoring spurious overflow "
+				"on cpu %u\n", cpu);
+}
+
+/*
+ * Called by oprofile_perf_setup to create perf attributes to mirror the oprofile
+ * settings in counter_config. Attributes are created as `pinned' events and
+ * so are permanently scheduled on the PMU.
+ */
+static void op_perf_setup(void)
+{
+	int i;
+	u32 size = sizeof(struct perf_event_attr);
+	struct perf_event_attr *attr;
+
+	for (i = 0; i < num_counters; ++i) {
+		attr = &counter_config[i].attr;
+		memset(attr, 0, size);
+		attr->type		= PERF_TYPE_RAW;
+		attr->size		= size;
+		attr->config		= counter_config[i].event;
+		attr->sample_period	= counter_config[i].count;
+		attr->pinned		= 1;
+	}
+}
+
+static int op_create_counter(int cpu, int event)
+{
+	struct perf_event *pevent;
+
+	if (!counter_config[event].enabled || perf_events[cpu][event])
+		return 0;
+
+	pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
+						  cpu, NULL,
+						  op_overflow_handler);
+
+	if (IS_ERR(pevent))
+		return PTR_ERR(pevent);
+
+	if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
+		perf_event_release_kernel(pevent);
+		pr_warning("oprofile: failed to enable event %d "
+				"on CPU %d\n", event, cpu);
+		return -EBUSY;
+	}
+
+	perf_events[cpu][event] = pevent;
+
+	return 0;
+}
+
+static void op_destroy_counter(int cpu, int event)
+{
+	struct perf_event *pevent = perf_events[cpu][event];
+
+	if (pevent) {
+		perf_event_release_kernel(pevent);
+		perf_events[cpu][event] = NULL;
+	}
+}
+
+/*
+ * Called by oprofile_perf_start to create active perf events based on the
+ * perviously configured attributes.
+ */
+static int op_perf_start(void)
+{
+	int cpu, event, ret = 0;
+
+	for_each_online_cpu(cpu) {
+		for (event = 0; event < num_counters; ++event) {
+			ret = op_create_counter(cpu, event);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Called by oprofile_perf_stop at the end of a profiling run.
+ */
+static void op_perf_stop(void)
+{
+	int cpu, event;
+
+	for_each_online_cpu(cpu)
+		for (event = 0; event < num_counters; ++event)
+			op_destroy_counter(cpu, event);
+}
+
+static int oprofile_perf_create_files(struct super_block *sb, struct dentry *root)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_counters; i++) {
+		struct dentry *dir;
+		char buf[4];
+
+		snprintf(buf, sizeof buf, "%d", i);
+		dir = oprofilefs_mkdir(sb, root, buf);
+		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
+		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
+		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
+		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
+		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
+		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
+	}
+
+	return 0;
+}
+
+static int oprofile_perf_setup(void)
+{
+	spin_lock(&oprofilefs_lock);
+	op_perf_setup();
+	spin_unlock(&oprofilefs_lock);
+	return 0;
+}
+
+static int oprofile_perf_start(void)
+{
+	int ret = -EBUSY;
+
+	mutex_lock(&oprofile_perf_mutex);
+	if (!oprofile_perf_enabled) {
+		ret = 0;
+		op_perf_start();
+		oprofile_perf_enabled = 1;
+	}
+	mutex_unlock(&oprofile_perf_mutex);
+	return ret;
+}
+
+static void oprofile_perf_stop(void)
+{
+	mutex_lock(&oprofile_perf_mutex);
+	if (oprofile_perf_enabled)
+		op_perf_stop();
+	oprofile_perf_enabled = 0;
+	mutex_unlock(&oprofile_perf_mutex);
+}
+
+#ifdef CONFIG_PM
+
+static int oprofile_perf_suspend(struct platform_device *dev, pm_message_t state)
+{
+	mutex_lock(&oprofile_perf_mutex);
+	if (oprofile_perf_enabled)
+		op_perf_stop();
+	mutex_unlock(&oprofile_perf_mutex);
+	return 0;
+}
+
+static int oprofile_perf_resume(struct platform_device *dev)
+{
+	mutex_lock(&oprofile_perf_mutex);
+	if (oprofile_perf_enabled && op_perf_start())
+		oprofile_perf_enabled = 0;
+	mutex_unlock(&oprofile_perf_mutex);
+	return 0;
+}
+
+static struct platform_driver oprofile_driver = {
+	.driver		= {
+		.name		= "oprofile-perf",
+	},
+	.resume		= oprofile_perf_resume,
+	.suspend	= oprofile_perf_suspend,
+};
+
+static struct platform_device *oprofile_pdev;
+
+static int __init init_driverfs(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&oprofile_driver);
+	if (ret)
+		return ret;
+
+	oprofile_pdev =	platform_device_register_simple(
+				oprofile_driver.driver.name, 0, NULL, 0);
+	if (IS_ERR(oprofile_pdev)) {
+		ret = PTR_ERR(oprofile_pdev);
+		platform_driver_unregister(&oprofile_driver);
+	}
+
+	return ret;
+}
+
+static void exit_driverfs(void)
+{
+	platform_device_unregister(oprofile_pdev);
+	platform_driver_unregister(&oprofile_driver);
+}
+
+#else
+
+static inline int  init_driverfs(void) { return 0; }
+static inline void exit_driverfs(void) { }
+
+#endif /* CONFIG_PM */
+
+void oprofile_perf_exit(void)
+{
+	int cpu, id;
+	struct perf_event *event;
+
+	for_each_possible_cpu(cpu) {
+		for (id = 0; id < num_counters; ++id) {
+			event = perf_events[cpu][id];
+			if (event)
+				perf_event_release_kernel(event);
+		}
+
+		kfree(perf_events[cpu]);
+	}
+
+	kfree(counter_config);
+	exit_driverfs();
+}
+
+int __init oprofile_perf_init(struct oprofile_operations *ops)
+{
+	int cpu, ret = 0;
+
+	ret = init_driverfs();
+	if (ret)
+		return ret;
+
+	memset(&perf_events, 0, sizeof(perf_events));
+
+	num_counters = perf_num_counters();
+	if (num_counters <= 0) {
+		pr_info("oprofile: no performance counters\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	counter_config = kcalloc(num_counters,
+			sizeof(struct op_counter_config), GFP_KERNEL);
+
+	if (!counter_config) {
+		pr_info("oprofile: failed to allocate %d "
+				"counters\n", num_counters);
+		ret = -ENOMEM;
+		num_counters = 0;
+		goto out;
+	}
+
+	for_each_possible_cpu(cpu) {
+		perf_events[cpu] = kcalloc(num_counters,
+				sizeof(struct perf_event *), GFP_KERNEL);
+		if (!perf_events[cpu]) {
+			pr_info("oprofile: failed to allocate %d perf events "
+					"for cpu %d\n", num_counters, cpu);
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	ops->create_files	= oprofile_perf_create_files;
+	ops->setup		= oprofile_perf_setup;
+	ops->start		= oprofile_perf_start;
+	ops->stop		= oprofile_perf_stop;
+	ops->shutdown		= oprofile_perf_stop;
+	ops->cpu_type		= op_name_from_perf_id();
+
+	if (!ops->cpu_type)
+		ret = -ENODEV;
+	else
+		pr_info("oprofile: using %s\n", ops->cpu_type);
+
+out:
+	if (ret)
+		oprofile_perf_exit();
+
+	return ret;
+}

+ 20 - 34
drivers/oprofile/oprofilefs.c

@@ -91,16 +91,20 @@ static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count
 
 
 static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset)
 static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset)
 {
 {
-	unsigned long *value = file->private_data;
+	unsigned long value;
 	int retval;
 	int retval;
 
 
 	if (*offset)
 	if (*offset)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	retval = oprofilefs_ulong_from_user(value, buf, count);
+	retval = oprofilefs_ulong_from_user(&value, buf, count);
+	if (retval)
+		return retval;
 
 
+	retval = oprofile_set_ulong(file->private_data, value);
 	if (retval)
 	if (retval)
 		return retval;
 		return retval;
+
 	return count;
 	return count;
 }
 }
 
 
@@ -126,50 +130,41 @@ static const struct file_operations ulong_ro_fops = {
 };
 };
 
 
 
 
-static struct dentry *__oprofilefs_create_file(struct super_block *sb,
+static int __oprofilefs_create_file(struct super_block *sb,
 	struct dentry *root, char const *name, const struct file_operations *fops,
 	struct dentry *root, char const *name, const struct file_operations *fops,
-	int perm)
+	int perm, void *priv)
 {
 {
 	struct dentry *dentry;
 	struct dentry *dentry;
 	struct inode *inode;
 	struct inode *inode;
 
 
 	dentry = d_alloc_name(root, name);
 	dentry = d_alloc_name(root, name);
 	if (!dentry)
 	if (!dentry)
-		return NULL;
+		return -ENOMEM;
 	inode = oprofilefs_get_inode(sb, S_IFREG | perm);
 	inode = oprofilefs_get_inode(sb, S_IFREG | perm);
 	if (!inode) {
 	if (!inode) {
 		dput(dentry);
 		dput(dentry);
-		return NULL;
+		return -ENOMEM;
 	}
 	}
 	inode->i_fop = fops;
 	inode->i_fop = fops;
 	d_add(dentry, inode);
 	d_add(dentry, inode);
-	return dentry;
+	dentry->d_inode->i_private = priv;
+	return 0;
 }
 }
 
 
 
 
 int oprofilefs_create_ulong(struct super_block *sb, struct dentry *root,
 int oprofilefs_create_ulong(struct super_block *sb, struct dentry *root,
 	char const *name, unsigned long *val)
 	char const *name, unsigned long *val)
 {
 {
-	struct dentry *d = __oprofilefs_create_file(sb, root, name,
-						     &ulong_fops, 0644);
-	if (!d)
-		return -EFAULT;
-
-	d->d_inode->i_private = val;
-	return 0;
+	return __oprofilefs_create_file(sb, root, name,
+					&ulong_fops, 0644, val);
 }
 }
 
 
 
 
 int oprofilefs_create_ro_ulong(struct super_block *sb, struct dentry *root,
 int oprofilefs_create_ro_ulong(struct super_block *sb, struct dentry *root,
 	char const *name, unsigned long *val)
 	char const *name, unsigned long *val)
 {
 {
-	struct dentry *d = __oprofilefs_create_file(sb, root, name,
-						     &ulong_ro_fops, 0444);
-	if (!d)
-		return -EFAULT;
-
-	d->d_inode->i_private = val;
-	return 0;
+	return __oprofilefs_create_file(sb, root, name,
+					&ulong_ro_fops, 0444, val);
 }
 }
 
 
 
 
@@ -189,31 +184,22 @@ static const struct file_operations atomic_ro_fops = {
 int oprofilefs_create_ro_atomic(struct super_block *sb, struct dentry *root,
 int oprofilefs_create_ro_atomic(struct super_block *sb, struct dentry *root,
 	char const *name, atomic_t *val)
 	char const *name, atomic_t *val)
 {
 {
-	struct dentry *d = __oprofilefs_create_file(sb, root, name,
-						     &atomic_ro_fops, 0444);
-	if (!d)
-		return -EFAULT;
-
-	d->d_inode->i_private = val;
-	return 0;
+	return __oprofilefs_create_file(sb, root, name,
+					&atomic_ro_fops, 0444, val);
 }
 }
 
 
 
 
 int oprofilefs_create_file(struct super_block *sb, struct dentry *root,
 int oprofilefs_create_file(struct super_block *sb, struct dentry *root,
 	char const *name, const struct file_operations *fops)
 	char const *name, const struct file_operations *fops)
 {
 {
-	if (!__oprofilefs_create_file(sb, root, name, fops, 0644))
-		return -EFAULT;
-	return 0;
+	return __oprofilefs_create_file(sb, root, name, fops, 0644, NULL);
 }
 }
 
 
 
 
 int oprofilefs_create_file_perm(struct super_block *sb, struct dentry *root,
 int oprofilefs_create_file_perm(struct super_block *sb, struct dentry *root,
 	char const *name, const struct file_operations *fops, int perm)
 	char const *name, const struct file_operations *fops, int perm)
 {
 {
-	if (!__oprofilefs_create_file(sb, root, name, fops, perm))
-		return -EFAULT;
-	return 0;
+	return __oprofilefs_create_file(sb, root, name, fops, perm, NULL);
 }
 }
 
 
 
 

+ 1 - 1
include/asm-generic/hardirq.h

@@ -3,13 +3,13 @@
 
 
 #include <linux/cache.h>
 #include <linux/cache.h>
 #include <linux/threads.h>
 #include <linux/threads.h>
-#include <linux/irq.h>
 
 
 typedef struct {
 typedef struct {
 	unsigned int __softirq_pending;
 	unsigned int __softirq_pending;
 } ____cacheline_aligned irq_cpustat_t;
 } ____cacheline_aligned irq_cpustat_t;
 
 
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+#include <linux/irq.h>
 
 
 #ifndef ack_bad_irq
 #ifndef ack_bad_irq
 static inline void ack_bad_irq(unsigned int irq)
 static inline void ack_bad_irq(unsigned int irq)

+ 10 - 0
include/asm-generic/vmlinux.lds.h

@@ -220,6 +220,8 @@
 									\
 									\
 	BUG_TABLE							\
 	BUG_TABLE							\
 									\
 									\
+	JUMP_TABLE							\
+									\
 	/* PCI quirks */						\
 	/* PCI quirks */						\
 	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {	\
 	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start_pci_fixups_early) = .;		\
 		VMLINUX_SYMBOL(__start_pci_fixups_early) = .;		\
@@ -563,6 +565,14 @@
 #define BUG_TABLE
 #define BUG_TABLE
 #endif
 #endif
 
 
+#define JUMP_TABLE							\
+	. = ALIGN(8);							\
+	__jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) {		\
+		VMLINUX_SYMBOL(__start___jump_table) = .;		\
+		*(__jump_table)						\
+		VMLINUX_SYMBOL(__stop___jump_table) = .;		\
+	}
+
 #ifdef CONFIG_PM_TRACE
 #ifdef CONFIG_PM_TRACE
 #define TRACEDATA							\
 #define TRACEDATA							\
 	. = ALIGN(4);							\
 	. = ALIGN(4);							\

+ 21 - 18
include/linux/dynamic_debug.h

@@ -1,6 +1,8 @@
 #ifndef _DYNAMIC_DEBUG_H
 #ifndef _DYNAMIC_DEBUG_H
 #define _DYNAMIC_DEBUG_H
 #define _DYNAMIC_DEBUG_H
 
 
+#include <linux/jump_label.h>
+
 /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
 /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
  * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
  * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
  * use independent hash functions, to reduce the chance of false positives.
  * use independent hash functions, to reduce the chance of false positives.
@@ -22,8 +24,6 @@ struct _ddebug {
 	const char *function;
 	const char *function;
 	const char *filename;
 	const char *filename;
 	const char *format;
 	const char *format;
-	char primary_hash;
-	char secondary_hash;
 	unsigned int lineno:24;
 	unsigned int lineno:24;
 	/*
 	/*
  	 * The flags field controls the behaviour at the callsite.
  	 * The flags field controls the behaviour at the callsite.
@@ -33,6 +33,7 @@ struct _ddebug {
 #define _DPRINTK_FLAGS_PRINT   (1<<0)  /* printk() a message using the format */
 #define _DPRINTK_FLAGS_PRINT   (1<<0)  /* printk() a message using the format */
 #define _DPRINTK_FLAGS_DEFAULT 0
 #define _DPRINTK_FLAGS_DEFAULT 0
 	unsigned int flags:8;
 	unsigned int flags:8;
+	char enabled;
 } __attribute__((aligned(8)));
 } __attribute__((aligned(8)));
 
 
 
 
@@ -42,33 +43,35 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
 #if defined(CONFIG_DYNAMIC_DEBUG)
 #if defined(CONFIG_DYNAMIC_DEBUG)
 extern int ddebug_remove_module(const char *mod_name);
 extern int ddebug_remove_module(const char *mod_name);
 
 
-#define __dynamic_dbg_enabled(dd)  ({	     \
-	int __ret = 0;							     \
-	if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) &&	     \
-			(dynamic_debug_enabled2 & (1LL << DEBUG_HASH2))))   \
-				if (unlikely(dd.flags))			     \
-					__ret = 1;			     \
-	__ret; })
-
 #define dynamic_pr_debug(fmt, ...) do {					\
 #define dynamic_pr_debug(fmt, ...) do {					\
+	__label__ do_printk;						\
+	__label__ out;							\
 	static struct _ddebug descriptor				\
 	static struct _ddebug descriptor				\
 	__used								\
 	__used								\
 	__attribute__((section("__verbose"), aligned(8))) =		\
 	__attribute__((section("__verbose"), aligned(8))) =		\
-	{ KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH,	\
-		DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT };	\
-	if (__dynamic_dbg_enabled(descriptor))				\
-		printk(KERN_DEBUG pr_fmt(fmt),	##__VA_ARGS__);		\
+	{ KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__,		\
+		_DPRINTK_FLAGS_DEFAULT };				\
+	JUMP_LABEL(&descriptor.enabled, do_printk);			\
+	goto out;							\
+do_printk:								\
+	printk(KERN_DEBUG pr_fmt(fmt),	##__VA_ARGS__);			\
+out:	;								\
 	} while (0)
 	} while (0)
 
 
 
 
 #define dynamic_dev_dbg(dev, fmt, ...) do {				\
 #define dynamic_dev_dbg(dev, fmt, ...) do {				\
+	__label__ do_printk;						\
+	__label__ out;							\
 	static struct _ddebug descriptor				\
 	static struct _ddebug descriptor				\
 	__used								\
 	__used								\
 	__attribute__((section("__verbose"), aligned(8))) =		\
 	__attribute__((section("__verbose"), aligned(8))) =		\
-	{ KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH,	\
-		DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT };	\
-	if (__dynamic_dbg_enabled(descriptor))				\
-		dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);	\
+	{ KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__,		\
+		_DPRINTK_FLAGS_DEFAULT };				\
+	JUMP_LABEL(&descriptor.enabled, do_printk);			\
+	goto out;							\
+do_printk:								\
+	dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);		\
+out:	;								\
 	} while (0)
 	} while (0)
 
 
 #else
 #else

+ 4 - 4
include/linux/ftrace_event.h

@@ -191,8 +191,8 @@ struct ftrace_event_call {
 	unsigned int		flags;
 	unsigned int		flags;
 
 
 #ifdef CONFIG_PERF_EVENTS
 #ifdef CONFIG_PERF_EVENTS
-	int			perf_refcount;
-	struct hlist_head	*perf_events;
+	int				perf_refcount;
+	struct hlist_head __percpu	*perf_events;
 #endif
 #endif
 };
 };
 
 
@@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
 
 
 extern int  perf_trace_init(struct perf_event *event);
 extern int  perf_trace_init(struct perf_event *event);
 extern void perf_trace_destroy(struct perf_event *event);
 extern void perf_trace_destroy(struct perf_event *event);
-extern int  perf_trace_enable(struct perf_event *event);
-extern void perf_trace_disable(struct perf_event *event);
+extern int  perf_trace_add(struct perf_event *event, int flags);
+extern void perf_trace_del(struct perf_event *event, int flags);
 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 				     char *filter_str);
 				     char *filter_str);
 extern void ftrace_profile_free_filter(struct perf_event *event);
 extern void ftrace_profile_free_filter(struct perf_event *event);

+ 7 - 1
include/linux/interrupt.h

@@ -18,6 +18,7 @@
 #include <asm/atomic.h>
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
 #include <asm/system.h>
+#include <trace/events/irq.h>
 
 
 /*
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
  * These correspond to the IORESOURCE_IRQ_* defines in
@@ -407,7 +408,12 @@ asmlinkage void do_softirq(void);
 asmlinkage void __do_softirq(void);
 asmlinkage void __do_softirq(void);
 extern void open_softirq(int nr, void (*action)(struct softirq_action *));
 extern void open_softirq(int nr, void (*action)(struct softirq_action *));
 extern void softirq_init(void);
 extern void softirq_init(void);
-#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
+static inline void __raise_softirq_irqoff(unsigned int nr)
+{
+	trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL);
+	or_softirq_pending(1UL << nr);
+}
+
 extern void raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq(unsigned int nr);
 extern void raise_softirq(unsigned int nr);
 extern void wakeup_softirqd(void);
 extern void wakeup_softirqd(void);

+ 20 - 0
include/linux/irq_work.h

@@ -0,0 +1,20 @@
+#ifndef _LINUX_IRQ_WORK_H
+#define _LINUX_IRQ_WORK_H
+
+struct irq_work {
+	struct irq_work *next;
+	void (*func)(struct irq_work *);
+};
+
+static inline
+void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
+{
+	entry->next = NULL;
+	entry->func = func;
+}
+
+bool irq_work_queue(struct irq_work *entry);
+void irq_work_run(void);
+void irq_work_sync(struct irq_work *entry);
+
+#endif /* _LINUX_IRQ_WORK_H */

+ 74 - 0
include/linux/jump_label.h

@@ -0,0 +1,74 @@
+#ifndef _LINUX_JUMP_LABEL_H
+#define _LINUX_JUMP_LABEL_H
+
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL)
+# include <asm/jump_label.h>
+# define HAVE_JUMP_LABEL
+#endif
+
+enum jump_label_type {
+	JUMP_LABEL_ENABLE,
+	JUMP_LABEL_DISABLE
+};
+
+struct module;
+
+#ifdef HAVE_JUMP_LABEL
+
+extern struct jump_entry __start___jump_table[];
+extern struct jump_entry __stop___jump_table[];
+
+extern void arch_jump_label_transform(struct jump_entry *entry,
+				 enum jump_label_type type);
+extern void arch_jump_label_text_poke_early(jump_label_t addr);
+extern void jump_label_update(unsigned long key, enum jump_label_type type);
+extern void jump_label_apply_nops(struct module *mod);
+extern int jump_label_text_reserved(void *start, void *end);
+
+#define jump_label_enable(key) \
+	jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
+
+#define jump_label_disable(key) \
+	jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
+
+#else
+
+#define JUMP_LABEL(key, label)			\
+do {						\
+	if (unlikely(*key))			\
+		goto label;			\
+} while (0)
+
+#define jump_label_enable(cond_var)	\
+do {					\
+       *(cond_var) = 1;			\
+} while (0)
+
+#define jump_label_disable(cond_var)	\
+do {					\
+       *(cond_var) = 0;			\
+} while (0)
+
+static inline int jump_label_apply_nops(struct module *mod)
+{
+	return 0;
+}
+
+static inline int jump_label_text_reserved(void *start, void *end)
+{
+	return 0;
+}
+
+#endif
+
+#define COND_STMT(key, stmt)					\
+do {								\
+	__label__ jl_enabled;					\
+	JUMP_LABEL(key, jl_enabled);				\
+	if (0) {						\
+jl_enabled:							\
+		stmt;						\
+	}							\
+} while (0)
+
+#endif

+ 44 - 0
include/linux/jump_label_ref.h

@@ -0,0 +1,44 @@
+#ifndef _LINUX_JUMP_LABEL_REF_H
+#define _LINUX_JUMP_LABEL_REF_H
+
+#include <linux/jump_label.h>
+#include <asm/atomic.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+static inline void jump_label_inc(atomic_t *key)
+{
+	if (atomic_add_return(1, key) == 1)
+		jump_label_enable(key);
+}
+
+static inline void jump_label_dec(atomic_t *key)
+{
+	if (atomic_dec_and_test(key))
+		jump_label_disable(key);
+}
+
+#else /* !HAVE_JUMP_LABEL */
+
+static inline void jump_label_inc(atomic_t *key)
+{
+	atomic_inc(key);
+}
+
+static inline void jump_label_dec(atomic_t *key)
+{
+	atomic_dec(key);
+}
+
+#undef JUMP_LABEL
+#define JUMP_LABEL(key, label)						\
+do {									\
+	if (unlikely(__builtin_choose_expr(				\
+	      __builtin_types_compatible_p(typeof(key), atomic_t *),	\
+	      atomic_read((atomic_t *)(key)), *(key))))			\
+		goto label;						\
+} while (0)
+
+#endif /* HAVE_JUMP_LABEL */
+
+#endif /* _LINUX_JUMP_LABEL_REF_H */

+ 4 - 1
include/linux/module.h

@@ -350,7 +350,10 @@ struct module
 	struct tracepoint *tracepoints;
 	struct tracepoint *tracepoints;
 	unsigned int num_tracepoints;
 	unsigned int num_tracepoints;
 #endif
 #endif
-
+#ifdef HAVE_JUMP_LABEL
+	struct jump_entry *jump_entries;
+	unsigned int num_jump_entries;
+#endif
 #ifdef CONFIG_TRACING
 #ifdef CONFIG_TRACING
 	const char **trace_bprintk_fmt_start;
 	const char **trace_bprintk_fmt_start;
 	unsigned int num_trace_bprintk_fmt;
 	unsigned int num_trace_bprintk_fmt;

+ 7 - 0
include/linux/oprofile.h

@@ -15,6 +15,7 @@
 
 
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
+#include <linux/init.h>
 #include <asm/atomic.h>
 #include <asm/atomic.h>
  
  
 /* Each escaped entry is prefixed by ESCAPE_CODE
 /* Each escaped entry is prefixed by ESCAPE_CODE
@@ -185,4 +186,10 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val);
 int oprofile_add_data64(struct op_entry *entry, u64 val);
 int oprofile_add_data64(struct op_entry *entry, u64 val);
 int oprofile_write_commit(struct op_entry *entry);
 int oprofile_write_commit(struct op_entry *entry);
 
 
+#ifdef CONFIG_PERF_EVENTS
+int __init oprofile_perf_init(struct oprofile_operations *ops);
+void oprofile_perf_exit(void);
+char *op_name_from_perf_id(void);
+#endif /* CONFIG_PERF_EVENTS */
+
 #endif /* OPROFILE_H */
 #endif /* OPROFILE_H */

+ 9 - 0
include/linux/percpu.h

@@ -39,6 +39,15 @@
 	preempt_enable();				\
 	preempt_enable();				\
 } while (0)
 } while (0)
 
 
+#define get_cpu_ptr(var) ({				\
+	preempt_disable();				\
+	this_cpu_ptr(var); })
+
+#define put_cpu_ptr(var) do {				\
+	(void)(var);					\
+	preempt_enable();				\
+} while (0)
+
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 
 
 /* minimum unit size, also is the maximum supported allocation size */
 /* minimum unit size, also is the maximum supported allocation size */

+ 138 - 74
include/linux/perf_event.h

@@ -486,6 +486,8 @@ struct perf_guest_info_callbacks {
 #include <linux/workqueue.h>
 #include <linux/workqueue.h>
 #include <linux/ftrace.h>
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
 #include <linux/cpu.h>
+#include <linux/irq_work.h>
+#include <linux/jump_label_ref.h>
 #include <asm/atomic.h>
 #include <asm/atomic.h>
 #include <asm/local.h>
 #include <asm/local.h>
 
 
@@ -529,16 +531,22 @@ struct hw_perf_event {
 			int		last_cpu;
 			int		last_cpu;
 		};
 		};
 		struct { /* software */
 		struct { /* software */
-			s64		remaining;
 			struct hrtimer	hrtimer;
 			struct hrtimer	hrtimer;
 		};
 		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		struct { /* breakpoint */
 		struct { /* breakpoint */
 			struct arch_hw_breakpoint	info;
 			struct arch_hw_breakpoint	info;
 			struct list_head		bp_list;
 			struct list_head		bp_list;
+			/*
+			 * Crufty hack to avoid the chicken and egg
+			 * problem hw_breakpoint has with context
+			 * creation and event initalization.
+			 */
+			struct task_struct		*bp_target;
 		};
 		};
 #endif
 #endif
 	};
 	};
+	int				state;
 	local64_t			prev_count;
 	local64_t			prev_count;
 	u64				sample_period;
 	u64				sample_period;
 	u64				last_period;
 	u64				last_period;
@@ -550,6 +558,13 @@ struct hw_perf_event {
 #endif
 #endif
 };
 };
 
 
+/*
+ * hw_perf_event::state flags
+ */
+#define PERF_HES_STOPPED	0x01 /* the counter is stopped */
+#define PERF_HES_UPTODATE	0x02 /* event->count up-to-date */
+#define PERF_HES_ARCH		0x04
+
 struct perf_event;
 struct perf_event;
 
 
 /*
 /*
@@ -561,36 +576,70 @@ struct perf_event;
  * struct pmu - generic performance monitoring unit
  * struct pmu - generic performance monitoring unit
  */
  */
 struct pmu {
 struct pmu {
-	int (*enable)			(struct perf_event *event);
-	void (*disable)			(struct perf_event *event);
-	int (*start)			(struct perf_event *event);
-	void (*stop)			(struct perf_event *event);
-	void (*read)			(struct perf_event *event);
-	void (*unthrottle)		(struct perf_event *event);
+	struct list_head		entry;
+
+	int * __percpu			pmu_disable_count;
+	struct perf_cpu_context * __percpu pmu_cpu_context;
+	int				task_ctx_nr;
+
+	/*
+	 * Fully disable/enable this PMU, can be used to protect from the PMI
+	 * as well as for lazy/batch writing of the MSRs.
+	 */
+	void (*pmu_enable)		(struct pmu *pmu); /* optional */
+	void (*pmu_disable)		(struct pmu *pmu); /* optional */
 
 
 	/*
 	/*
-	 * Group events scheduling is treated as a transaction, add group
-	 * events as a whole and perform one schedulability test. If the test
-	 * fails, roll back the whole group
+	 * Try and initialize the event for this PMU.
+	 * Should return -ENOENT when the @event doesn't match this PMU.
 	 */
 	 */
+	int (*event_init)		(struct perf_event *event);
+
+#define PERF_EF_START	0x01		/* start the counter when adding    */
+#define PERF_EF_RELOAD	0x02		/* reload the counter when starting */
+#define PERF_EF_UPDATE	0x04		/* update the counter when stopping */
 
 
 	/*
 	/*
-	 * Start the transaction, after this ->enable() doesn't need
-	 * to do schedulability tests.
+	 * Adds/Removes a counter to/from the PMU, can be done inside
+	 * a transaction, see the ->*_txn() methods.
 	 */
 	 */
-	void (*start_txn)	(const struct pmu *pmu);
+	int  (*add)			(struct perf_event *event, int flags);
+	void (*del)			(struct perf_event *event, int flags);
+
 	/*
 	/*
-	 * If ->start_txn() disabled the ->enable() schedulability test
+	 * Starts/Stops a counter present on the PMU. The PMI handler
+	 * should stop the counter when perf_event_overflow() returns
+	 * !0. ->start() will be used to continue.
+	 */
+	void (*start)			(struct perf_event *event, int flags);
+	void (*stop)			(struct perf_event *event, int flags);
+
+	/*
+	 * Updates the counter value of the event.
+	 */
+	void (*read)			(struct perf_event *event);
+
+	/*
+	 * Group events scheduling is treated as a transaction, add
+	 * group events as a whole and perform one schedulability test.
+	 * If the test fails, roll back the whole group
+	 *
+	 * Start the transaction, after this ->add() doesn't need to
+	 * do schedulability tests.
+	 */
+	void (*start_txn)	(struct pmu *pmu); /* optional */
+	/*
+	 * If ->start_txn() disabled the ->add() schedulability test
 	 * then ->commit_txn() is required to perform one. On success
 	 * then ->commit_txn() is required to perform one. On success
 	 * the transaction is closed. On error the transaction is kept
 	 * the transaction is closed. On error the transaction is kept
 	 * open until ->cancel_txn() is called.
 	 * open until ->cancel_txn() is called.
 	 */
 	 */
-	int  (*commit_txn)	(const struct pmu *pmu);
+	int  (*commit_txn)	(struct pmu *pmu); /* optional */
 	/*
 	/*
-	 * Will cancel the transaction, assumes ->disable() is called for
-	 * each successfull ->enable() during the transaction.
+	 * Will cancel the transaction, assumes ->del() is called
+	 * for each successfull ->add() during the transaction.
 	 */
 	 */
-	void (*cancel_txn)	(const struct pmu *pmu);
+	void (*cancel_txn)	(struct pmu *pmu); /* optional */
 };
 };
 
 
 /**
 /**
@@ -631,11 +680,6 @@ struct perf_buffer {
 	void				*data_pages[0];
 	void				*data_pages[0];
 };
 };
 
 
-struct perf_pending_entry {
-	struct perf_pending_entry *next;
-	void (*func)(struct perf_pending_entry *);
-};
-
 struct perf_sample_data;
 struct perf_sample_data;
 
 
 typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
 typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
@@ -656,6 +700,7 @@ struct swevent_hlist {
 
 
 #define PERF_ATTACH_CONTEXT	0x01
 #define PERF_ATTACH_CONTEXT	0x01
 #define PERF_ATTACH_GROUP	0x02
 #define PERF_ATTACH_GROUP	0x02
+#define PERF_ATTACH_TASK	0x04
 
 
 /**
 /**
  * struct perf_event - performance event kernel representation:
  * struct perf_event - performance event kernel representation:
@@ -669,7 +714,7 @@ struct perf_event {
 	int				nr_siblings;
 	int				nr_siblings;
 	int				group_flags;
 	int				group_flags;
 	struct perf_event		*group_leader;
 	struct perf_event		*group_leader;
-	const struct pmu		*pmu;
+	struct pmu			*pmu;
 
 
 	enum perf_event_active_state	state;
 	enum perf_event_active_state	state;
 	unsigned int			attach_state;
 	unsigned int			attach_state;
@@ -743,7 +788,7 @@ struct perf_event {
 	int				pending_wakeup;
 	int				pending_wakeup;
 	int				pending_kill;
 	int				pending_kill;
 	int				pending_disable;
 	int				pending_disable;
-	struct perf_pending_entry	pending;
+	struct irq_work			pending;
 
 
 	atomic_t			event_limit;
 	atomic_t			event_limit;
 
 
@@ -763,12 +808,19 @@ struct perf_event {
 #endif /* CONFIG_PERF_EVENTS */
 #endif /* CONFIG_PERF_EVENTS */
 };
 };
 
 
+enum perf_event_context_type {
+	task_context,
+	cpu_context,
+};
+
 /**
 /**
  * struct perf_event_context - event context structure
  * struct perf_event_context - event context structure
  *
  *
  * Used as a container for task events and CPU events as well:
  * Used as a container for task events and CPU events as well:
  */
  */
 struct perf_event_context {
 struct perf_event_context {
+	enum perf_event_context_type	type;
+	struct pmu			*pmu;
 	/*
 	/*
 	 * Protect the states of the events in the list,
 	 * Protect the states of the events in the list,
 	 * nr_active, and the list:
 	 * nr_active, and the list:
@@ -808,6 +860,12 @@ struct perf_event_context {
 	struct rcu_head			rcu_head;
 	struct rcu_head			rcu_head;
 };
 };
 
 
+/*
+ * Number of contexts where an event can trigger:
+ * 	task, softirq, hardirq, nmi.
+ */
+#define PERF_NR_CONTEXTS	4
+
 /**
 /**
  * struct perf_event_cpu_context - per cpu event context structure
  * struct perf_event_cpu_context - per cpu event context structure
  */
  */
@@ -815,18 +873,9 @@ struct perf_cpu_context {
 	struct perf_event_context	ctx;
 	struct perf_event_context	ctx;
 	struct perf_event_context	*task_ctx;
 	struct perf_event_context	*task_ctx;
 	int				active_oncpu;
 	int				active_oncpu;
-	int				max_pertask;
 	int				exclusive;
 	int				exclusive;
-	struct swevent_hlist		*swevent_hlist;
-	struct mutex			hlist_mutex;
-	int				hlist_refcount;
-
-	/*
-	 * Recursion avoidance:
-	 *
-	 * task, softirq, irq, nmi context
-	 */
-	int				recursion[4];
+	struct list_head		rotation_list;
+	int				jiffies_interval;
 };
 };
 
 
 struct perf_output_handle {
 struct perf_output_handle {
@@ -842,26 +891,34 @@ struct perf_output_handle {
 
 
 #ifdef CONFIG_PERF_EVENTS
 #ifdef CONFIG_PERF_EVENTS
 
 
-/*
- * Set by architecture code:
- */
-extern int perf_max_events;
+extern int perf_pmu_register(struct pmu *pmu);
+extern void perf_pmu_unregister(struct pmu *pmu);
+
+extern int perf_num_counters(void);
+extern const char *perf_pmu_name(void);
+extern void __perf_event_task_sched_in(struct task_struct *task);
+extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
 
 
-extern const struct pmu *hw_perf_event_init(struct perf_event *event);
+extern atomic_t perf_task_events;
+
+static inline void perf_event_task_sched_in(struct task_struct *task)
+{
+	COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
+}
+
+static inline
+void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
+{
+	COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
+}
 
 
-extern void perf_event_task_sched_in(struct task_struct *task);
-extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
-extern void perf_event_task_tick(struct task_struct *task);
 extern int perf_event_init_task(struct task_struct *child);
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_free_task(struct task_struct *task);
-extern void set_perf_event_pending(void);
-extern void perf_event_do_pending(void);
+extern void perf_event_delayed_put(struct task_struct *task);
 extern void perf_event_print_debug(void);
 extern void perf_event_print_debug(void);
-extern void __perf_disable(void);
-extern bool __perf_enable(void);
-extern void perf_disable(void);
-extern void perf_enable(void);
+extern void perf_pmu_disable(struct pmu *pmu);
+extern void perf_pmu_enable(struct pmu *pmu);
 extern int perf_event_task_disable(void);
 extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
 extern int perf_event_task_enable(void);
 extern void perf_event_update_userpage(struct perf_event *event);
 extern void perf_event_update_userpage(struct perf_event *event);
@@ -869,7 +926,7 @@ extern int perf_event_release_kernel(struct perf_event *event);
 extern struct perf_event *
 extern struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr,
 perf_event_create_kernel_counter(struct perf_event_attr *attr,
 				int cpu,
 				int cpu,
-				pid_t pid,
+				struct task_struct *task,
 				perf_overflow_handler_t callback);
 				perf_overflow_handler_t callback);
 extern u64 perf_event_read_value(struct perf_event *event,
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
 				 u64 *enabled, u64 *running);
@@ -920,14 +977,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
  */
  */
 static inline int is_software_event(struct perf_event *event)
 static inline int is_software_event(struct perf_event *event)
 {
 {
-	switch (event->attr.type) {
-	case PERF_TYPE_SOFTWARE:
-	case PERF_TYPE_TRACEPOINT:
-	/* for now the breakpoint stuff also works as software event */
-	case PERF_TYPE_BREAKPOINT:
-		return 1;
-	}
-	return 0;
+	return event->pmu->task_ctx_nr == perf_sw_context;
 }
 }
 
 
 extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@ -954,18 +1004,20 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs)
 	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
 	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
 }
 }
 
 
-static inline void
+static __always_inline void
 perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 {
 {
-	if (atomic_read(&perf_swevent_enabled[event_id])) {
-		struct pt_regs hot_regs;
-
-		if (!regs) {
-			perf_fetch_caller_regs(&hot_regs);
-			regs = &hot_regs;
-		}
-		__perf_sw_event(event_id, nr, nmi, regs, addr);
+	struct pt_regs hot_regs;
+
+	JUMP_LABEL(&perf_swevent_enabled[event_id], have_event);
+	return;
+
+have_event:
+	if (!regs) {
+		perf_fetch_caller_regs(&hot_regs);
+		regs = &hot_regs;
 	}
 	}
+	__perf_sw_event(event_id, nr, nmi, regs, addr);
 }
 }
 
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
 extern void perf_event_mmap(struct vm_area_struct *vma);
@@ -976,7 +1028,21 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
 extern void perf_event_comm(struct task_struct *tsk);
 extern void perf_event_comm(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
 
 
-extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
+/* Callchains */
+DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
+
+extern void perf_callchain_user(struct perf_callchain_entry *entry,
+				struct pt_regs *regs);
+extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
+				  struct pt_regs *regs);
+
+
+static inline void
+perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
+{
+	if (entry->nr < PERF_MAX_STACK_DEPTH)
+		entry->ip[entry->nr++] = ip;
+}
 
 
 extern int sysctl_perf_event_paranoid;
 extern int sysctl_perf_event_paranoid;
 extern int sysctl_perf_event_mlock;
 extern int sysctl_perf_event_mlock;
@@ -1019,21 +1085,18 @@ extern int perf_swevent_get_recursion_context(void);
 extern void perf_swevent_put_recursion_context(int rctx);
 extern void perf_swevent_put_recursion_context(int rctx);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
+extern void perf_event_task_tick(void);
 #else
 #else
 static inline void
 static inline void
 perf_event_task_sched_in(struct task_struct *task)			{ }
 perf_event_task_sched_in(struct task_struct *task)			{ }
 static inline void
 static inline void
 perf_event_task_sched_out(struct task_struct *task,
 perf_event_task_sched_out(struct task_struct *task,
 			    struct task_struct *next)			{ }
 			    struct task_struct *next)			{ }
-static inline void
-perf_event_task_tick(struct task_struct *task)				{ }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
-static inline void perf_event_do_pending(void)				{ }
+static inline void perf_event_delayed_put(struct task_struct *task)	{ }
 static inline void perf_event_print_debug(void)				{ }
 static inline void perf_event_print_debug(void)				{ }
-static inline void perf_disable(void)					{ }
-static inline void perf_enable(void)					{ }
 static inline int perf_event_task_disable(void)				{ return -EINVAL; }
 static inline int perf_event_task_disable(void)				{ return -EINVAL; }
 static inline int perf_event_task_enable(void)				{ return -EINVAL; }
 static inline int perf_event_task_enable(void)				{ return -EINVAL; }
 
 
@@ -1056,6 +1119,7 @@ static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
 static inline void perf_swevent_put_recursion_context(int rctx)		{ }
 static inline void perf_swevent_put_recursion_context(int rctx)		{ }
 static inline void perf_event_enable(struct perf_event *event)		{ }
 static inline void perf_event_enable(struct perf_event *event)		{ }
 static inline void perf_event_disable(struct perf_event *event)		{ }
 static inline void perf_event_disable(struct perf_event *event)		{ }
+static inline void perf_event_task_tick(void)				{ }
 #endif
 #endif
 
 
 #define perf_output_put(handle, x) \
 #define perf_output_put(handle, x) \

+ 8 - 1
include/linux/sched.h

@@ -1160,6 +1160,13 @@ struct sched_rt_entity {
 
 
 struct rcu_node;
 struct rcu_node;
 
 
+enum perf_event_task_context {
+	perf_invalid_context = -1,
+	perf_hw_context = 0,
+	perf_sw_context,
+	perf_nr_task_contexts,
+};
+
 struct task_struct {
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	void *stack;
 	void *stack;
@@ -1433,7 +1440,7 @@ struct task_struct {
 	struct futex_pi_state *pi_state_cache;
 	struct futex_pi_state *pi_state_cache;
 #endif
 #endif
 #ifdef CONFIG_PERF_EVENTS
 #ifdef CONFIG_PERF_EVENTS
-	struct perf_event_context *perf_event_ctxp;
+	struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
 	struct mutex perf_event_mutex;
 	struct mutex perf_event_mutex;
 	struct list_head perf_event_list;
 	struct list_head perf_event_list;
 #endif
 #endif

+ 8 - 2
include/linux/stop_machine.h

@@ -126,8 +126,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
 
 
 #else	 /* CONFIG_STOP_MACHINE && CONFIG_SMP */
 #else	 /* CONFIG_STOP_MACHINE && CONFIG_SMP */
 
 
-static inline int stop_machine(int (*fn)(void *), void *data,
-			       const struct cpumask *cpus)
+static inline int __stop_machine(int (*fn)(void *), void *data,
+				 const struct cpumask *cpus)
 {
 {
 	int ret;
 	int ret;
 	local_irq_disable();
 	local_irq_disable();
@@ -136,5 +136,11 @@ static inline int stop_machine(int (*fn)(void *), void *data,
 	return ret;
 	return ret;
 }
 }
 
 
+static inline int stop_machine(int (*fn)(void *), void *data,
+			       const struct cpumask *cpus)
+{
+	return __stop_machine(fn, data, cpus);
+}
+
 #endif	/* CONFIG_STOP_MACHINE && CONFIG_SMP */
 #endif	/* CONFIG_STOP_MACHINE && CONFIG_SMP */
 #endif	/* _LINUX_STOP_MACHINE */
 #endif	/* _LINUX_STOP_MACHINE */

+ 4 - 1
include/linux/tracepoint.h

@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/rcupdate.h>
 #include <linux/rcupdate.h>
+#include <linux/jump_label.h>
 
 
 struct module;
 struct module;
 struct tracepoint;
 struct tracepoint;
@@ -145,7 +146,9 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 	extern struct tracepoint __tracepoint_##name;			\
 	extern struct tracepoint __tracepoint_##name;			\
 	static inline void trace_##name(proto)				\
 	static inline void trace_##name(proto)				\
 	{								\
 	{								\
-		if (unlikely(__tracepoint_##name.state))		\
+		JUMP_LABEL(&__tracepoint_##name.state, do_trace);	\
+		return;							\
+do_trace:								\
 			__DO_TRACE(&__tracepoint_##name,		\
 			__DO_TRACE(&__tracepoint_##name,		\
 				TP_PROTO(data_proto),			\
 				TP_PROTO(data_proto),			\
 				TP_ARGS(data_args));			\
 				TP_ARGS(data_args));			\

+ 24 - 2
include/trace/events/irq.h

@@ -5,7 +5,9 @@
 #define _TRACE_IRQ_H
 #define _TRACE_IRQ_H
 
 
 #include <linux/tracepoint.h>
 #include <linux/tracepoint.h>
-#include <linux/interrupt.h>
+
+struct irqaction;
+struct softirq_action;
 
 
 #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
 #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
 #define show_softirq_name(val)				\
 #define show_softirq_name(val)				\
@@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq,
 	),
 	),
 
 
 	TP_fast_assign(
 	TP_fast_assign(
-		__entry->vec = (int)(h - vec);
+		if (vec)
+			__entry->vec = (int)(h - vec);
+		else
+			__entry->vec = (int)(long)h;
 	),
 	),
 
 
 	TP_printk("vec=%d [action=%s]", __entry->vec,
 	TP_printk("vec=%d [action=%s]", __entry->vec,
@@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit,
 	TP_ARGS(h, vec)
 	TP_ARGS(h, vec)
 );
 );
 
 
+/**
+ * softirq_raise - called immediately when a softirq is raised
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter contains a pointer to the softirq vector number which is
+ * raised. @vec is NULL and it means @h includes vector number not
+ * softirq_action. When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq raise latency.
+ */
+DEFINE_EVENT(softirq, softirq_raise,
+
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
+	TP_ARGS(h, vec)
+);
+
 #endif /*  _TRACE_IRQ_H */
 #endif /*  _TRACE_IRQ_H */
 
 
 /* This part must be outside protection */
 /* This part must be outside protection */

+ 23 - 2
include/trace/events/napi.h

@@ -6,10 +6,31 @@
 
 
 #include <linux/netdevice.h>
 #include <linux/netdevice.h>
 #include <linux/tracepoint.h>
 #include <linux/tracepoint.h>
+#include <linux/ftrace.h>
+
+#define NO_DEV "(no_device)"
+
+TRACE_EVENT(napi_poll,
 
 
-DECLARE_TRACE(napi_poll,
 	TP_PROTO(struct napi_struct *napi),
 	TP_PROTO(struct napi_struct *napi),
-	TP_ARGS(napi));
+
+	TP_ARGS(napi),
+
+	TP_STRUCT__entry(
+		__field(	struct napi_struct *,	napi)
+		__string(	dev_name, napi->dev ? napi->dev->name : NO_DEV)
+	),
+
+	TP_fast_assign(
+		__entry->napi = napi;
+		__assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
+	),
+
+	TP_printk("napi poll on napi struct %p for device %s",
+		__entry->napi, __get_str(dev_name))
+);
+
+#undef NO_DEV
 
 
 #endif /* _TRACE_NAPI_H_ */
 #endif /* _TRACE_NAPI_H_ */
 
 

+ 82 - 0
include/trace/events/net.h

@@ -0,0 +1,82 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM net
+
+#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NET_H
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(net_dev_xmit,
+
+	TP_PROTO(struct sk_buff *skb,
+		 int rc),
+
+	TP_ARGS(skb, rc),
+
+	TP_STRUCT__entry(
+		__field(	void *,		skbaddr		)
+		__field(	unsigned int,	len		)
+		__field(	int,		rc		)
+		__string(	name,		skb->dev->name	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		__entry->len = skb->len;
+		__entry->rc = rc;
+		__assign_str(name, skb->dev->name);
+	),
+
+	TP_printk("dev=%s skbaddr=%p len=%u rc=%d",
+		__get_str(name), __entry->skbaddr, __entry->len, __entry->rc)
+);
+
+DECLARE_EVENT_CLASS(net_dev_template,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb),
+
+	TP_STRUCT__entry(
+		__field(	void *,		skbaddr		)
+		__field(	unsigned int,	len		)
+		__string(	name,		skb->dev->name	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		__entry->len = skb->len;
+		__assign_str(name, skb->dev->name);
+	),
+
+	TP_printk("dev=%s skbaddr=%p len=%u",
+		__get_str(name), __entry->skbaddr, __entry->len)
+)
+
+DEFINE_EVENT(net_dev_template, net_dev_queue,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb)
+);
+
+DEFINE_EVENT(net_dev_template, netif_receive_skb,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb)
+);
+
+DEFINE_EVENT(net_dev_template, netif_rx,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb)
+);
+#endif /* _TRACE_NET_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>

+ 87 - 3
include/trace/events/power.h

@@ -10,12 +10,17 @@
 #ifndef _TRACE_POWER_ENUM_
 #ifndef _TRACE_POWER_ENUM_
 #define _TRACE_POWER_ENUM_
 #define _TRACE_POWER_ENUM_
 enum {
 enum {
-	POWER_NONE = 0,
-	POWER_CSTATE = 1,
-	POWER_PSTATE = 2,
+	POWER_NONE	= 0,
+	POWER_CSTATE	= 1,	/* C-State */
+	POWER_PSTATE	= 2,	/* Fequency change or DVFS */
+	POWER_SSTATE	= 3,	/* Suspend */
 };
 };
 #endif
 #endif
 
 
+/*
+ * The power events are used for cpuidle & suspend (power_start, power_end)
+ *  and for cpufreq (power_frequency)
+ */
 DECLARE_EVENT_CLASS(power,
 DECLARE_EVENT_CLASS(power,
 
 
 	TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 	TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
@@ -70,6 +75,85 @@ TRACE_EVENT(power_end,
 
 
 );
 );
 
 
+/*
+ * The clock events are used for clock enable/disable and for
+ *  clock rate change
+ */
+DECLARE_EVENT_CLASS(clock,
+
+	TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
+
+	TP_ARGS(name, state, cpu_id),
+
+	TP_STRUCT__entry(
+		__string(       name,           name            )
+		__field(        u64,            state           )
+		__field(        u64,            cpu_id          )
+	),
+
+	TP_fast_assign(
+		__assign_str(name, name);
+		__entry->state = state;
+		__entry->cpu_id = cpu_id;
+	),
+
+	TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
+		(unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
+);
+
+DEFINE_EVENT(clock, clock_enable,
+
+	TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
+
+	TP_ARGS(name, state, cpu_id)
+);
+
+DEFINE_EVENT(clock, clock_disable,
+
+	TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
+
+	TP_ARGS(name, state, cpu_id)
+);
+
+DEFINE_EVENT(clock, clock_set_rate,
+
+	TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
+
+	TP_ARGS(name, state, cpu_id)
+);
+
+/*
+ * The power domain events are used for power domains transitions
+ */
+DECLARE_EVENT_CLASS(power_domain,
+
+	TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
+
+	TP_ARGS(name, state, cpu_id),
+
+	TP_STRUCT__entry(
+		__string(       name,           name            )
+		__field(        u64,            state           )
+		__field(        u64,            cpu_id          )
+	),
+
+	TP_fast_assign(
+		__assign_str(name, name);
+		__entry->state = state;
+		__entry->cpu_id = cpu_id;
+),
+
+	TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
+		(unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
+);
+
+DEFINE_EVENT(power_domain, power_domain_target,
+
+	TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
+
+	TP_ARGS(name, state, cpu_id)
+);
+
 #endif /* _TRACE_POWER_H */
 #endif /* _TRACE_POWER_H */
 
 
 /* This part must be outside protection */
 /* This part must be outside protection */

+ 17 - 0
include/trace/events/skb.h

@@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb,
 		__entry->skbaddr, __entry->protocol, __entry->location)
 		__entry->skbaddr, __entry->protocol, __entry->location)
 );
 );
 
 
+TRACE_EVENT(consume_skb,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb),
+
+	TP_STRUCT__entry(
+		__field(	void *,	skbaddr	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+	),
+
+	TP_printk("skbaddr=%p", __entry->skbaddr)
+);
+
 TRACE_EVENT(skb_copy_datagram_iovec,
 TRACE_EVENT(skb_copy_datagram_iovec,
 
 
 	TP_PROTO(const struct sk_buff *skb, int len),
 	TP_PROTO(const struct sk_buff *skb, int len),

+ 8 - 0
init/Kconfig

@@ -21,6 +21,13 @@ config CONSTRUCTORS
 	depends on !UML
 	depends on !UML
 	default y
 	default y
 
 
+config HAVE_IRQ_WORK
+	bool
+
+config IRQ_WORK
+	bool
+	depends on HAVE_IRQ_WORK
+
 menu "General setup"
 menu "General setup"
 
 
 config EXPERIMENTAL
 config EXPERIMENTAL
@@ -1005,6 +1012,7 @@ config PERF_EVENTS
 	default y if (PROFILING || PERF_COUNTERS)
 	default y if (PROFILING || PERF_COUNTERS)
 	depends on HAVE_PERF_EVENTS
 	depends on HAVE_PERF_EVENTS
 	select ANON_INODES
 	select ANON_INODES
+	select IRQ_WORK
 	help
 	help
 	  Enable kernel support for various performance events provided
 	  Enable kernel support for various performance events provided
 	  by software and hardware.
 	  by software and hardware.

+ 3 - 1
kernel/Makefile

@@ -10,7 +10,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
-	    async.o range.o
+	    async.o range.o jump_label.o
 obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o
 obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o
 obj-y += groups.o
 obj-y += groups.o
 
 
@@ -23,6 +23,7 @@ CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_perf_event.o = -pg
 CFLAGS_REMOVE_perf_event.o = -pg
+CFLAGS_REMOVE_irq_work.o = -pg
 endif
 endif
 
 
 obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_FREEZER) += freezer.o
@@ -101,6 +102,7 @@ obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SMP) += sched_cpupri.o
+obj-$(CONFIG_IRQ_WORK) += irq_work.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o

+ 1 - 3
kernel/exit.c

@@ -149,9 +149,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 {
 {
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
 
-#ifdef CONFIG_PERF_EVENTS
-	WARN_ON_ONCE(tsk->perf_event_ctxp);
-#endif
+	perf_event_delayed_put(tsk);
 	trace_sched_process_free(tsk);
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
 	put_task_struct(tsk);
 }
 }

+ 63 - 12
kernel/hw_breakpoint.c

@@ -113,12 +113,12 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
  */
  */
 static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
 static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
 {
 {
-	struct perf_event_context *ctx = bp->ctx;
+	struct task_struct *tsk = bp->hw.bp_target;
 	struct perf_event *iter;
 	struct perf_event *iter;
 	int count = 0;
 	int count = 0;
 
 
 	list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
 	list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
-		if (iter->ctx == ctx && find_slot_idx(iter) == type)
+		if (iter->hw.bp_target == tsk && find_slot_idx(iter) == type)
 			count += hw_breakpoint_weight(iter);
 			count += hw_breakpoint_weight(iter);
 	}
 	}
 
 
@@ -134,7 +134,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
 		    enum bp_type_idx type)
 		    enum bp_type_idx type)
 {
 {
 	int cpu = bp->cpu;
 	int cpu = bp->cpu;
-	struct task_struct *tsk = bp->ctx->task;
+	struct task_struct *tsk = bp->hw.bp_target;
 
 
 	if (cpu >= 0) {
 	if (cpu >= 0) {
 		slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
 		slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
@@ -213,7 +213,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
 	       int weight)
 	       int weight)
 {
 {
 	int cpu = bp->cpu;
 	int cpu = bp->cpu;
-	struct task_struct *tsk = bp->ctx->task;
+	struct task_struct *tsk = bp->hw.bp_target;
 
 
 	/* Pinned counter cpu profiling */
 	/* Pinned counter cpu profiling */
 	if (!tsk) {
 	if (!tsk) {
@@ -433,8 +433,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered,
 			    perf_overflow_handler_t triggered,
 			    struct task_struct *tsk)
 			    struct task_struct *tsk)
 {
 {
-	return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk),
-						triggered);
+	return perf_event_create_kernel_counter(attr, -1, tsk, triggered);
 }
 }
 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 
 
@@ -516,7 +515,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
 	get_online_cpus();
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
 	for_each_online_cpu(cpu) {
 		pevent = per_cpu_ptr(cpu_events, cpu);
 		pevent = per_cpu_ptr(cpu_events, cpu);
-		bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
+		bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered);
 
 
 		*pevent = bp;
 		*pevent = bp;
 
 
@@ -566,6 +565,61 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
 	.priority = 0x7fffffff
 	.priority = 0x7fffffff
 };
 };
 
 
+static void bp_perf_event_destroy(struct perf_event *event)
+{
+	release_bp_slot(event);
+}
+
+static int hw_breakpoint_event_init(struct perf_event *bp)
+{
+	int err;
+
+	if (bp->attr.type != PERF_TYPE_BREAKPOINT)
+		return -ENOENT;
+
+	err = register_perf_hw_breakpoint(bp);
+	if (err)
+		return err;
+
+	bp->destroy = bp_perf_event_destroy;
+
+	return 0;
+}
+
+static int hw_breakpoint_add(struct perf_event *bp, int flags)
+{
+	if (!(flags & PERF_EF_START))
+		bp->hw.state = PERF_HES_STOPPED;
+
+	return arch_install_hw_breakpoint(bp);
+}
+
+static void hw_breakpoint_del(struct perf_event *bp, int flags)
+{
+	arch_uninstall_hw_breakpoint(bp);
+}
+
+static void hw_breakpoint_start(struct perf_event *bp, int flags)
+{
+	bp->hw.state = 0;
+}
+
+static void hw_breakpoint_stop(struct perf_event *bp, int flags)
+{
+	bp->hw.state = PERF_HES_STOPPED;
+}
+
+static struct pmu perf_breakpoint = {
+	.task_ctx_nr	= perf_sw_context, /* could eventually get its own */
+
+	.event_init	= hw_breakpoint_event_init,
+	.add		= hw_breakpoint_add,
+	.del		= hw_breakpoint_del,
+	.start		= hw_breakpoint_start,
+	.stop		= hw_breakpoint_stop,
+	.read		= hw_breakpoint_pmu_read,
+};
+
 static int __init init_hw_breakpoint(void)
 static int __init init_hw_breakpoint(void)
 {
 {
 	unsigned int **task_bp_pinned;
 	unsigned int **task_bp_pinned;
@@ -587,6 +641,8 @@ static int __init init_hw_breakpoint(void)
 
 
 	constraints_initialized = 1;
 	constraints_initialized = 1;
 
 
+	perf_pmu_register(&perf_breakpoint);
+
 	return register_die_notifier(&hw_breakpoint_exceptions_nb);
 	return register_die_notifier(&hw_breakpoint_exceptions_nb);
 
 
  err_alloc:
  err_alloc:
@@ -602,8 +658,3 @@ static int __init init_hw_breakpoint(void)
 core_initcall(init_hw_breakpoint);
 core_initcall(init_hw_breakpoint);
 
 
 
 
-struct pmu perf_ops_bp = {
-	.enable		= arch_install_hw_breakpoint,
-	.disable	= arch_uninstall_hw_breakpoint,
-	.read		= hw_breakpoint_pmu_read,
-};

+ 164 - 0
kernel/irq_work.c

@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Provides a framework for enqueueing and running callbacks from hardirq
+ * context. The enqueueing is NMI-safe.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+
+/*
+ * An entry can be in one of four states:
+ *
+ * free	     NULL, 0 -> {claimed}       : free to be used
+ * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
+ * pending   next, 3 -> {busy}          : queued, pending callback
+ * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
+ *
+ * We use the lower two bits of the next pointer to keep PENDING and BUSY
+ * flags.
+ */
+
+#define IRQ_WORK_PENDING	1UL
+#define IRQ_WORK_BUSY		2UL
+#define IRQ_WORK_FLAGS		3UL
+
+static inline bool irq_work_is_set(struct irq_work *entry, int flags)
+{
+	return (unsigned long)entry->next & flags;
+}
+
+static inline struct irq_work *irq_work_next(struct irq_work *entry)
+{
+	unsigned long next = (unsigned long)entry->next;
+	next &= ~IRQ_WORK_FLAGS;
+	return (struct irq_work *)next;
+}
+
+static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
+{
+	unsigned long next = (unsigned long)entry;
+	next |= flags;
+	return (struct irq_work *)next;
+}
+
+static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
+
+/*
+ * Claim the entry so that no one else will poke at it.
+ */
+static bool irq_work_claim(struct irq_work *entry)
+{
+	struct irq_work *next, *nflags;
+
+	do {
+		next = entry->next;
+		if ((unsigned long)next & IRQ_WORK_PENDING)
+			return false;
+		nflags = next_flags(next, IRQ_WORK_FLAGS);
+	} while (cmpxchg(&entry->next, next, nflags) != next);
+
+	return true;
+}
+
+
+void __weak arch_irq_work_raise(void)
+{
+	/*
+	 * Lame architectures will get the timer tick callback
+	 */
+}
+
+/*
+ * Queue the entry and raise the IPI if needed.
+ */
+static void __irq_work_queue(struct irq_work *entry)
+{
+	struct irq_work **head, *next;
+
+	head = &get_cpu_var(irq_work_list);
+
+	do {
+		next = *head;
+		/* Can assign non-atomic because we keep the flags set. */
+		entry->next = next_flags(next, IRQ_WORK_FLAGS);
+	} while (cmpxchg(head, next, entry) != next);
+
+	/* The list was empty, raise self-interrupt to start processing. */
+	if (!irq_work_next(entry))
+		arch_irq_work_raise();
+
+	put_cpu_var(irq_work_list);
+}
+
+/*
+ * Enqueue the irq_work @entry, returns true on success, failure when the
+ * @entry was already enqueued by someone else.
+ *
+ * Can be re-enqueued while the callback is still in progress.
+ */
+bool irq_work_queue(struct irq_work *entry)
+{
+	if (!irq_work_claim(entry)) {
+		/*
+		 * Already enqueued, can't do!
+		 */
+		return false;
+	}
+
+	__irq_work_queue(entry);
+	return true;
+}
+EXPORT_SYMBOL_GPL(irq_work_queue);
+
+/*
+ * Run the irq_work entries on this cpu. Requires to be ran from hardirq
+ * context with local IRQs disabled.
+ */
+void irq_work_run(void)
+{
+	struct irq_work *list, **head;
+
+	head = &__get_cpu_var(irq_work_list);
+	if (*head == NULL)
+		return;
+
+	BUG_ON(!in_irq());
+	BUG_ON(!irqs_disabled());
+
+	list = xchg(head, NULL);
+	while (list != NULL) {
+		struct irq_work *entry = list;
+
+		list = irq_work_next(list);
+
+		/*
+		 * Clear the PENDING bit, after this point the @entry
+		 * can be re-used.
+		 */
+		entry->next = next_flags(NULL, IRQ_WORK_BUSY);
+		entry->func(entry);
+		/*
+		 * Clear the BUSY bit and return to the free state if
+		 * no-one else claimed it meanwhile.
+		 */
+		cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
+	}
+}
+EXPORT_SYMBOL_GPL(irq_work_run);
+
+/*
+ * Synchronize against the irq_work @entry, ensures the entry is not
+ * currently in use.
+ */
+void irq_work_sync(struct irq_work *entry)
+{
+	WARN_ON_ONCE(irqs_disabled());
+
+	while (irq_work_is_set(entry, IRQ_WORK_BUSY))
+		cpu_relax();
+}
+EXPORT_SYMBOL_GPL(irq_work_sync);

部分文件因为文件数量过多而无法显示