12 years ago · 8f55cea410
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
@@ -0,0 +1,62 @@
 
				+What:		/sys/devices/cpu/events/
			
 
				+		/sys/devices/cpu/events/branch-misses
			
 
				+		/sys/devices/cpu/events/cache-references
			
 
				+		/sys/devices/cpu/events/cache-misses
			
 
				+		/sys/devices/cpu/events/stalled-cycles-frontend
			
 
				+		/sys/devices/cpu/events/branch-instructions
			
 
				+		/sys/devices/cpu/events/stalled-cycles-backend
			
 
				+		/sys/devices/cpu/events/instructions
			
 
				+		/sys/devices/cpu/events/cpu-cycles
			
 
				+
			
 
				+Date:		2013/01/08
			
 
				+
			
 
				+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
			
 
				+
			
 
				+Description:	Generic performance monitoring events
			
 
				+
			
 
				+		A collection of performance monitoring events that may be
			
 
				+		supported by many/most CPUs. These events can be monitored
			
 
				+		using the 'perf(1)' tool.
			
 
				+
			
 
				+		The contents of each file would look like:
			
 
				+
			
 
				+			event=0xNNNN
			
 
				+
			
 
				+		where 'N' is a hex digit and the number '0xNNNN' shows the
			
 
				+		"raw code" for the perf event identified by the file's
			
 
				+		"basename".
			
 
				+
			
 
				+
			
 
				+What: 		/sys/devices/cpu/events/PM_LD_MISS_L1
			
 
				+		/sys/devices/cpu/events/PM_LD_REF_L1
			
 
				+		/sys/devices/cpu/events/PM_CYC
			
 
				+		/sys/devices/cpu/events/PM_BRU_FIN
			
 
				+		/sys/devices/cpu/events/PM_GCT_NOSLOT_CYC
			
 
				+		/sys/devices/cpu/events/PM_BRU_MPRED
			
 
				+		/sys/devices/cpu/events/PM_INST_CMPL
			
 
				+		/sys/devices/cpu/events/PM_CMPLU_STALL
			
 
				+
			
 
				+Date:		2013/01/08
			
 
				+
			
 
				+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
			
 
				+		Linux Powerpc mailing list <linuxppc-dev@ozlabs.org>
			
 
				+
			
 
				+Description:	POWER-systems specific performance monitoring events
			
 
				+
			
 
				+		A collection of performance monitoring events that may be
			
 
				+		supported by the POWER CPU. These events can be monitored
			
 
				+		using the 'perf(1)' tool.
			
 
				+
			
 
				+		These events may not be supported by other CPUs.
			
 
				+
			
 
				+		The contents of each file would look like:
			
 
				+
			
 
				+			event=0xNNNN
			
 
				+
			
 
				+		where 'N' is a hex digit and the number '0xNNNN' shows the
			
 
				+		"raw code" for the perf event identified by the file's
			
 
				+		"basename".
			
 
				+
			
 
				+		Further, multiple terms like 'event=0xNNNN' can be specified
			
 
				+		and separated with comma. All available terms are defined in
			
 
				+		the /sys/bus/event_source/devices/<dev>/format file.
			
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1842,6 +1842,89 @@ an error.
 
				  # cat buffer_size_kb
			
 
				 85
			
 
				 
			
 
				+Snapshot
			
 
				+--------
			
 
				+CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature
			
 
				+available to all non latency tracers. (Latency tracers which
			
 
				+record max latency, such as "irqsoff" or "wakeup", can't use
			
 
				+this feature, since those are already using the snapshot
			
 
				+mechanism internally.)
			
 
				+
			
 
				+Snapshot preserves a current trace buffer at a particular point
			
 
				+in time without stopping tracing. Ftrace swaps the current
			
 
				+buffer with a spare buffer, and tracing continues in the new
			
 
				+current (=previous spare) buffer.
			
 
				+
			
 
				+The following debugfs files in "tracing" are related to this
			
 
				+feature:
			
 
				+
			
 
				+  snapshot:
			
 
				+
			
 
				+	This is used to take a snapshot and to read the output
			
 
				+	of the snapshot. Echo 1 into this file to allocate a
			
 
				+	spare buffer and to take a snapshot (swap), then read
			
 
				+	the snapshot from this file in the same format as
			
 
				+	"trace" (described above in the section "The File
			
 
				+	System"). Both reads snapshot and tracing are executable
			
 
				+	in parallel. When the spare buffer is allocated, echoing
			
 
				+	0 frees it, and echoing else (positive) values clear the
			
 
				+	snapshot contents.
			
 
				+	More details are shown in the table below.
			
 
				+
			
 
				+	status\input  |     0      |     1      |    else    |
			
 
				+	--------------+------------+------------+------------+
			
 
				+	not allocated |(do nothing)| alloc+swap |   EINVAL   |
			
 
				+	--------------+------------+------------+------------+
			
 
				+	allocated     |    free    |    swap    |   clear    |
			
 
				+	--------------+------------+------------+------------+
			
 
				+
			
 
				+Here is an example of using the snapshot feature.
			
 
				+
			
 
				+ # echo 1 > events/sched/enable
			
 
				+ # echo 1 > snapshot
			
 
				+ # cat snapshot
			
 
				+# tracer: nop
			
 
				+#
			
 
				+# entries-in-buffer/entries-written: 71/71   #P:8
			
 
				+#
			
 
				+#                              _-----=> irqs-off
			
 
				+#                             / _----=> need-resched
			
 
				+#                            | / _---=> hardirq/softirq
			
 
				+#                            || / _--=> preempt-depth
			
 
				+#                            ||| /     delay
			
 
				+#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
			
 
				+#              | |       |   ||||       |         |
			
 
				+          <idle>-0     [005] d...  2440.603828: sched_switch: prev_comm=swapper/5 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2242 next_prio=120
			
 
				+           sleep-2242  [005] d...  2440.603846: sched_switch: prev_comm=snapshot-test-2 prev_pid=2242 prev_prio=120 prev_state=R ==> next_comm=kworker/5:1 next_pid=60 next_prio=120
			
 
				+[...]
			
 
				+          <idle>-0     [002] d...  2440.707230: sched_switch: prev_comm=swapper/2 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2229 next_prio=120
			
 
				+
			
 
				+ # cat trace
			
 
				+# tracer: nop
			
 
				+#
			
 
				+# entries-in-buffer/entries-written: 77/77   #P:8
			
 
				+#
			
 
				+#                              _-----=> irqs-off
			
 
				+#                             / _----=> need-resched
			
 
				+#                            | / _---=> hardirq/softirq
			
 
				+#                            || / _--=> preempt-depth
			
 
				+#                            ||| /     delay
			
 
				+#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
			
 
				+#              | |       |   ||||       |         |
			
 
				+          <idle>-0     [007] d...  2440.707395: sched_switch: prev_comm=swapper/7 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2243 next_prio=120
			
 
				+ snapshot-test-2-2229  [002] d...  2440.707438: sched_switch: prev_comm=snapshot-test-2 prev_pid=2229 prev_prio=120 prev_state=S ==> next_comm=swapper/2 next_pid=0 next_prio=120
			
 
				+[...]
			
 
				+
			
 
				+
			
 
				+If you try to use this snapshot feature when current tracer is
			
 
				+one of the latency tracers, you will get the following results.
			
 
				+
			
 
				+ # echo wakeup > current_tracer
			
 
				+ # echo 1 > snapshot
			
 
				+bash: echo: write error: Device or resource busy
			
 
				+ # cat snapshot
			
 
				+cat: snapshot: Device or resource busy
			
 
				+
			
 
				 -----------
			
 
				 
			
 
				 More details can be found in the source code, in the
			
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -76,6 +76,15 @@ config OPTPROBES
 
				 	depends on KPROBES && HAVE_OPTPROBES
			
 
				 	depends on !PREEMPT
			
 
				 
			
 
				+config KPROBES_ON_FTRACE
			
 
				+	def_bool y
			
 
				+	depends on KPROBES && HAVE_KPROBES_ON_FTRACE
			
 
				+	depends on DYNAMIC_FTRACE_WITH_REGS
			
 
				+	help
			
 
				+	 If function tracer is enabled and the arch supports full
			
 
				+	 passing of pt_regs to function tracing, then kprobes can
			
 
				+	 optimize on top of function tracing.
			
 
				+
			
 
				 config UPROBES
			
 
				 	bool "Transparent user-space probes (EXPERIMENTAL)"
			
 
				 	depends on UPROBE_EVENT && PERF_EVENTS
			
@@ -158,6 +167,9 @@ config HAVE_KRETPROBES
 
				 config HAVE_OPTPROBES
			
 
				 	bool
			
 
				 
			
 
				+config HAVE_KPROBES_ON_FTRACE
			
 
				+	bool
			
 
				+
			
 
				 config HAVE_NMI_WATCHDOG
			
 
				 	bool
			
 
				 #
			
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -11,6 +11,7 @@
 
				 
			
 
				 #include <linux/types.h>
			
 
				 #include <asm/hw_irq.h>
			
 
				+#include <linux/device.h>
			
 
				 
			
 
				 #define MAX_HWEVENTS		8
			
 
				 #define MAX_EVENT_ALTERNATIVES	8
			
@@ -35,6 +36,7 @@ struct power_pmu {
 
				 	void		(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
			
 
				 	int		(*limited_pmc_event)(u64 event_id);
			
 
				 	u32		flags;
			
 
				+	const struct attribute_group	**attr_groups;
			
 
				 	int		n_generic;
			
 
				 	int		*generic_events;
			
 
				 	int		(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
			
@@ -109,3 +111,27 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 
				  * If an event_id is not subject to the constraint expressed by a particular
			
 
				  * field, then it will have 0 in both the mask and value for that field.
			
 
				  */
			
 
				+
			
 
				+extern ssize_t power_events_sysfs_show(struct device *dev,
			
 
				+				struct device_attribute *attr, char *page);
			
 
				+
			
 
				+/*
			
 
				+ * EVENT_VAR() is same as PMU_EVENT_VAR with a suffix.
			
 
				+ *
			
 
				+ * Having a suffix allows us to have aliases in sysfs - eg: the generic
			
 
				+ * event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and
			
 
				+ * 'PM_CYC' where the latter is the name by which the event is known in
			
 
				+ * POWER CPU specification.
			
 
				+ */
			
 
				+#define	EVENT_VAR(_id, _suffix)		event_attr_##_id##_suffix
			
 
				+#define	EVENT_PTR(_id, _suffix)		&EVENT_VAR(_id, _suffix).attr.attr
			
 
				+
			
 
				+#define	EVENT_ATTR(_name, _id, _suffix)					\
			
 
				+	PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_PM_##_id,	\
			
 
				+			power_events_sysfs_show)
			
 
				+
			
 
				+#define	GENERIC_EVENT_ATTR(_name, _id)	EVENT_ATTR(_name, _id, _g)
			
 
				+#define	GENERIC_EVENT_PTR(_id)		EVENT_PTR(_id, _g)
			
 
				+
			
 
				+#define	POWER_EVENT_ATTR(_name, _id)	EVENT_ATTR(PM_##_name, _id, _p)
			
 
				+#define	POWER_EVENT_PTR(_id)		EVENT_PTR(_id, _p)
			
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1305,6 +1305,16 @@ static int power_pmu_event_idx(struct perf_event *event)
 
				 	return event->hw.idx;
			
 
				 }
			
 
				 
			
 
				+ssize_t power_events_sysfs_show(struct device *dev,
			
 
				+				struct device_attribute *attr, char *page)
			
 
				+{
			
 
				+	struct perf_pmu_events_attr *pmu_attr;
			
 
				+
			
 
				+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
			
 
				+
			
 
				+	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
			
 
				+}
			
 
				+
			
 
				 struct pmu power_pmu = {
			
 
				 	.pmu_enable	= power_pmu_enable,
			
 
				 	.pmu_disable	= power_pmu_disable,
			
@@ -1537,6 +1547,8 @@ int __cpuinit register_power_pmu(struct power_pmu *pmu)
 
				 	pr_info("%s performance monitor hardware support registered\n",
			
 
				 		pmu->name);
			
 
				 
			
 
				+	power_pmu.attr_groups = ppmu->attr_groups;
			
 
				+
			
 
				 #ifdef MSR_HV
			
 
				 	/*
			
 
				 	 * Use FCHV to ignore kernel events if MSR.HV is set.
			
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -50,6 +50,18 @@
 
				 #define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
			
 
				 #define MMCR1_PMCSEL_MSK	0xff
			
 
				 
			
 
				+/*
			
 
				+ * Power7 event codes.
			
 
				+ */
			
 
				+#define	PME_PM_CYC			0x1e
			
 
				+#define	PME_PM_GCT_NOSLOT_CYC		0x100f8
			
 
				+#define	PME_PM_CMPLU_STALL		0x4000a
			
 
				+#define	PME_PM_INST_CMPL		0x2
			
 
				+#define	PME_PM_LD_REF_L1		0xc880
			
 
				+#define	PME_PM_LD_MISS_L1		0x400f0
			
 
				+#define	PME_PM_BRU_FIN			0x10068
			
 
				+#define	PME_PM_BRU_MPRED		0x400f6
			
 
				+
			
 
				 /*
			
 
				  * Layout of constraint bits:
			
 
				  * 6666555555555544444444443333333333222222222211111111110000000000
			
@@ -307,14 +319,14 @@ static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
 
				 }
			
 
				 
			
 
				 static int power7_generic_events[] = {
			
 
				-	[PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
			
 
				-	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x100f8, /* GCT_NOSLOT_CYC */
			
 
				-	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x4000a,  /* CMPLU_STALL */
			
 
				-	[PERF_COUNT_HW_INSTRUCTIONS] = 2,
			
 
				-	[PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880,	/* LD_REF_L1_LSU*/
			
 
				-	[PERF_COUNT_HW_CACHE_MISSES] = 0x400f0,		/* LD_MISS_L1	*/
			
 
				-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068,	/* BRU_FIN	*/
			
 
				-	[PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6,	/* BR_MPRED	*/
			
 
				+	[PERF_COUNT_HW_CPU_CYCLES] =			PME_PM_CYC,
			
 
				+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =	PME_PM_GCT_NOSLOT_CYC,
			
 
				+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =	PME_PM_CMPLU_STALL,
			
 
				+	[PERF_COUNT_HW_INSTRUCTIONS] =			PME_PM_INST_CMPL,
			
 
				+	[PERF_COUNT_HW_CACHE_REFERENCES] =		PME_PM_LD_REF_L1,
			
 
				+	[PERF_COUNT_HW_CACHE_MISSES] =			PME_PM_LD_MISS_L1,
			
 
				+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PME_PM_BRU_FIN,
			
 
				+	[PERF_COUNT_HW_BRANCH_MISSES] =			PME_PM_BRU_MPRED,
			
 
				 };
			
 
				 
			
 
				 #define C(x)	PERF_COUNT_HW_CACHE_##x
			
@@ -362,6 +374,57 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 
				 	},
			
 
				 };
			
 
				 
			
 
				+
			
 
				+GENERIC_EVENT_ATTR(cpu-cycles,			CYC);
			
 
				+GENERIC_EVENT_ATTR(stalled-cycles-frontend,	GCT_NOSLOT_CYC);
			
 
				+GENERIC_EVENT_ATTR(stalled-cycles-backend,	CMPLU_STALL);
			
 
				+GENERIC_EVENT_ATTR(instructions,		INST_CMPL);
			
 
				+GENERIC_EVENT_ATTR(cache-references,		LD_REF_L1);
			
 
				+GENERIC_EVENT_ATTR(cache-misses,		LD_MISS_L1);
			
 
				+GENERIC_EVENT_ATTR(branch-instructions,		BRU_FIN);
			
 
				+GENERIC_EVENT_ATTR(branch-misses,		BRU_MPRED);
			
 
				+
			
 
				+POWER_EVENT_ATTR(CYC,				CYC);
			
 
				+POWER_EVENT_ATTR(GCT_NOSLOT_CYC,		GCT_NOSLOT_CYC);
			
 
				+POWER_EVENT_ATTR(CMPLU_STALL,			CMPLU_STALL);
			
 
				+POWER_EVENT_ATTR(INST_CMPL,			INST_CMPL);
			
 
				+POWER_EVENT_ATTR(LD_REF_L1,			LD_REF_L1);
			
 
				+POWER_EVENT_ATTR(LD_MISS_L1,			LD_MISS_L1);
			
 
				+POWER_EVENT_ATTR(BRU_FIN,			BRU_FIN)
			
 
				+POWER_EVENT_ATTR(BRU_MPRED,			BRU_MPRED);
			
 
				+
			
 
				+static struct attribute *power7_events_attr[] = {
			
 
				+	GENERIC_EVENT_PTR(CYC),
			
 
				+	GENERIC_EVENT_PTR(GCT_NOSLOT_CYC),
			
 
				+	GENERIC_EVENT_PTR(CMPLU_STALL),
			
 
				+	GENERIC_EVENT_PTR(INST_CMPL),
			
 
				+	GENERIC_EVENT_PTR(LD_REF_L1),
			
 
				+	GENERIC_EVENT_PTR(LD_MISS_L1),
			
 
				+	GENERIC_EVENT_PTR(BRU_FIN),
			
 
				+	GENERIC_EVENT_PTR(BRU_MPRED),
			
 
				+
			
 
				+	POWER_EVENT_PTR(CYC),
			
 
				+	POWER_EVENT_PTR(GCT_NOSLOT_CYC),
			
 
				+	POWER_EVENT_PTR(CMPLU_STALL),
			
 
				+	POWER_EVENT_PTR(INST_CMPL),
			
 
				+	POWER_EVENT_PTR(LD_REF_L1),
			
 
				+	POWER_EVENT_PTR(LD_MISS_L1),
			
 
				+	POWER_EVENT_PTR(BRU_FIN),
			
 
				+	POWER_EVENT_PTR(BRU_MPRED),
			
 
				+	NULL
			
 
				+};
			
 
				+
			
 
				+
			
 
				+static struct attribute_group power7_pmu_events_group = {
			
 
				+	.name = "events",
			
 
				+	.attrs = power7_events_attr,
			
 
				+};
			
 
				+
			
 
				+static const struct attribute_group *power7_pmu_attr_groups[] = {
			
 
				+	&power7_pmu_events_group,
			
 
				+	NULL,
			
 
				+};
			
 
				+
			
 
				 static struct power_pmu power7_pmu = {
			
 
				 	.name			= "POWER7",
			
 
				 	.n_counter		= 6,
			
@@ -373,6 +436,7 @@ static struct power_pmu power7_pmu = {
 
				 	.get_alternatives	= power7_get_alternatives,
			
 
				 	.disable_pmc		= power7_disable_pmc,
			
 
				 	.flags			= PPMU_ALT_SIPR,
			
 
				+	.attr_groups		= power7_pmu_attr_groups,
			
 
				 	.n_generic		= ARRAY_SIZE(power7_generic_events),
			
 
				 	.generic_events		= power7_generic_events,
			
 
				 	.cache_events		= &power7_cache_events,
			
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -39,10 +39,12 @@ config X86
 
				 	select HAVE_DMA_CONTIGUOUS if !SWIOTLB
			
 
				 	select HAVE_KRETPROBES
			
 
				 	select HAVE_OPTPROBES
			
 
				+	select HAVE_KPROBES_ON_FTRACE
			
 
				 	select HAVE_FTRACE_MCOUNT_RECORD
			
 
				 	select HAVE_FENTRY if X86_64
			
 
				 	select HAVE_C_RECORDMCOUNT
			
 
				 	select HAVE_DYNAMIC_FTRACE
			
 
				+	select HAVE_DYNAMIC_FTRACE_WITH_REGS
			
 
				 	select HAVE_FUNCTION_TRACER
			
 
				 	select HAVE_FUNCTION_GRAPH_TRACER
			
 
				 	select HAVE_FUNCTION_GRAPH_FP_TEST
			
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -167,6 +167,7 @@
 
				 #define X86_FEATURE_TBM		(6*32+21) /* trailing bit manipulations */
			
 
				 #define X86_FEATURE_TOPOEXT	(6*32+22) /* topology extensions CPUID leafs */
			
 
				 #define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
			
 
				+#define X86_FEATURE_PERFCTR_NB  (6*32+24) /* NB performance counter extensions */
			
 
				 
			
 
				 /*
			
 
				  * Auxiliary flags: Linux defined - For features scattered in various
			
@@ -309,6 +310,7 @@ extern const char * const x86_power_flags[32];
 
				 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
			
 
				 #define cpu_has_pclmulqdq	boot_cpu_has(X86_FEATURE_PCLMULQDQ)
			
 
				 #define cpu_has_perfctr_core	boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
			
 
				+#define cpu_has_perfctr_nb	boot_cpu_has(X86_FEATURE_PERFCTR_NB)
			
 
				 #define cpu_has_cx8		boot_cpu_has(X86_FEATURE_CX8)
			
 
				 #define cpu_has_cx16		boot_cpu_has(X86_FEATURE_CX16)
			
 
				 #define cpu_has_eager_fpu	boot_cpu_has(X86_FEATURE_EAGER_FPU)
			
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -44,7 +44,6 @@
 
				 
			
 
				 #ifdef CONFIG_DYNAMIC_FTRACE
			
 
				 #define ARCH_SUPPORTS_FTRACE_OPS 1
			
 
				-#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
			
 
				 #endif
			
 
				 
			
 
				 #ifndef __ASSEMBLY__
			
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,8 +29,13 @@
 
				 #define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
			
 
				 #define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
			
 
				 
			
 
				-#define AMD_PERFMON_EVENTSEL_GUESTONLY			(1ULL << 40)
			
 
				-#define AMD_PERFMON_EVENTSEL_HOSTONLY			(1ULL << 41)
			
 
				+#define AMD64_EVENTSEL_INT_CORE_ENABLE			(1ULL << 36)
			
 
				+#define AMD64_EVENTSEL_GUESTONLY			(1ULL << 40)
			
 
				+#define AMD64_EVENTSEL_HOSTONLY				(1ULL << 41)
			
 
				+
			
 
				+#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT		37
			
 
				+#define AMD64_EVENTSEL_INT_CORE_SEL_MASK		\
			
 
				+	(0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
			
 
				 
			
 
				 #define AMD64_EVENTSEL_EVENT	\
			
 
				 	(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
			
@@ -46,8 +51,12 @@
 
				 #define AMD64_RAW_EVENT_MASK		\
			
 
				 	(X86_RAW_EVENT_MASK          |  \
			
 
				 	 AMD64_EVENTSEL_EVENT)
			
 
				+#define AMD64_RAW_EVENT_MASK_NB		\
			
 
				+	(AMD64_EVENTSEL_EVENT        |  \
			
 
				+	 ARCH_PERFMON_EVENTSEL_UMASK)
			
 
				 #define AMD64_NUM_COUNTERS				4
			
 
				 #define AMD64_NUM_COUNTERS_CORE				6
			
 
				+#define AMD64_NUM_COUNTERS_NB				4
			
 
				 
			
 
				 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		0x3c
			
 
				 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
			
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -194,6 +194,8 @@
 
				 /* Fam 15h MSRs */
			
 
				 #define MSR_F15H_PERF_CTL		0xc0010200
			
 
				 #define MSR_F15H_PERF_CTR		0xc0010201
			
 
				+#define MSR_F15H_NB_PERF_CTL		0xc0010240
			
 
				+#define MSR_F15H_NB_PERF_CTR		0xc0010241
			
 
				 
			
 
				 /* Fam 10h MSRs */
			
 
				 #define MSR_FAM10H_MMIO_CONF_BASE	0xc0010058
			
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -65,8 +65,7 @@ obj-$(CONFIG_X86_TSC)		+= trace_clock.o
 
				 obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
			
 
				 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
			
 
				 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
			
 
				-obj-$(CONFIG_KPROBES)		+= kprobes.o
			
 
				-obj-$(CONFIG_OPTPROBES)		+= kprobes-opt.o
			
 
				+obj-y				+= kprobes/
			
 
				 obj-$(CONFIG_MODULES)		+= module.o
			
 
				 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
			
 
				 obj-$(CONFIG_KGDB)		+= kgdb.o
			
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -829,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
 
				 	} else {
			
 
				 		hwc->config_base = x86_pmu_config_addr(hwc->idx);
			
 
				 		hwc->event_base  = x86_pmu_event_addr(hwc->idx);
			
 
				-		hwc->event_base_rdpmc = hwc->idx;
			
 
				+		hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1310,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = {
 
				 	.attrs = NULL,
			
 
				 };
			
 
				 
			
 
				-struct perf_pmu_events_attr {
			
 
				-	struct device_attribute attr;
			
 
				-	u64 id;
			
 
				-};
			
 
				-
			
 
				 /*
			
 
				  * Remove all undefined events (x86_pmu.event_map(id) == 0)
			
 
				  * out of events_attr attributes.
			
@@ -1348,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at
 
				 #define EVENT_VAR(_id)  event_attr_##_id
			
 
				 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
			
 
				 
			
 
				-#define EVENT_ATTR(_name, _id)					\
			
 
				-static struct perf_pmu_events_attr EVENT_VAR(_id) = {		\
			
 
				-	.attr = __ATTR(_name, 0444, events_sysfs_show, NULL),	\
			
 
				-	.id   =  PERF_COUNT_HW_##_id,				\
			
 
				-};
			
 
				+#define EVENT_ATTR(_name, _id)						\
			
 
				+	PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id,	\
			
 
				+			events_sysfs_show)
			
 
				 
			
 
				 EVENT_ATTR(cpu-cycles,			CPU_CYCLES		);
			
 
				 EVENT_ATTR(instructions,		INSTRUCTIONS		);
			
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -325,6 +325,8 @@ struct x86_pmu {
 
				 	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
			
 
				 	unsigned	eventsel;
			
 
				 	unsigned	perfctr;
			
 
				+	int		(*addr_offset)(int index, bool eventsel);
			
 
				+	int		(*rdpmc_index)(int index);
			
 
				 	u64		(*event_map)(int);
			
 
				 	int		max_events;
			
 
				 	int		num_counters;
			
@@ -446,28 +448,21 @@ extern u64 __read_mostly hw_cache_extra_regs
 
				 
			
 
				 u64 x86_perf_event_update(struct perf_event *event);
			
 
				 
			
 
				-static inline int x86_pmu_addr_offset(int index)
			
 
				+static inline unsigned int x86_pmu_config_addr(int index)
			
 
				 {
			
 
				-	int offset;
			
 
				-
			
 
				-	/* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
			
 
				-	alternative_io(ASM_NOP2,
			
 
				-		       "shll $1, %%eax",
			
 
				-		       X86_FEATURE_PERFCTR_CORE,
			
 
				-		       "=a" (offset),
			
 
				-		       "a"  (index));
			
 
				-
			
 
				-	return offset;
			
 
				+	return x86_pmu.eventsel + (x86_pmu.addr_offset ?
			
 
				+				   x86_pmu.addr_offset(index, true) : index);
			
 
				 }
			
 
				 
			
 
				-static inline unsigned int x86_pmu_config_addr(int index)
			
 
				+static inline unsigned int x86_pmu_event_addr(int index)
			
 
				 {
			
 
				-	return x86_pmu.eventsel + x86_pmu_addr_offset(index);
			
 
				+	return x86_pmu.perfctr + (x86_pmu.addr_offset ?
			
 
				+				  x86_pmu.addr_offset(index, false) : index);
			
 
				 }
			
 
				 
			
 
				-static inline unsigned int x86_pmu_event_addr(int index)
			
 
				+static inline int x86_pmu_rdpmc_index(int index)
			
 
				 {
			
 
				-	return x86_pmu.perfctr + x86_pmu_addr_offset(index);
			
 
				+	return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
			
 
				 }
			
 
				 
			
 
				 int x86_setup_perfctr(struct perf_event *event);
			
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,21 +132,102 @@ static u64 amd_pmu_event_map(int hw_event)
 
				 	return amd_perfmon_event_map[hw_event];
			
 
				 }
			
 
				 
			
 
				-static int amd_pmu_hw_config(struct perf_event *event)
			
 
				+static struct event_constraint *amd_nb_event_constraint;
			
 
				+
			
 
				+/*
			
 
				+ * Previously calculated offsets
			
 
				+ */
			
 
				+static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
			
 
				+static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
			
 
				+static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
			
 
				+
			
 
				+/*
			
 
				+ * Legacy CPUs:
			
 
				+ *   4 counters starting at 0xc0010000 each offset by 1
			
 
				+ *
			
 
				+ * CPUs with core performance counter extensions:
			
 
				+ *   6 counters starting at 0xc0010200 each offset by 2
			
 
				+ *
			
 
				+ * CPUs with north bridge performance counter extensions:
			
 
				+ *   4 additional counters starting at 0xc0010240 each offset by 2
			
 
				+ *   (indexed right above either one of the above core counters)
			
 
				+ */
			
 
				+static inline int amd_pmu_addr_offset(int index, bool eventsel)
			
 
				 {
			
 
				-	int ret;
			
 
				+	int offset, first, base;
			
 
				 
			
 
				-	/* pass precise event sampling to ibs: */
			
 
				-	if (event->attr.precise_ip && get_ibs_caps())
			
 
				-		return -ENOENT;
			
 
				+	if (!index)
			
 
				+		return index;
			
 
				+
			
 
				+	if (eventsel)
			
 
				+		offset = event_offsets[index];
			
 
				+	else
			
 
				+		offset = count_offsets[index];
			
 
				+
			
 
				+	if (offset)
			
 
				+		return offset;
			
 
				+
			
 
				+	if (amd_nb_event_constraint &&
			
 
				+	    test_bit(index, amd_nb_event_constraint->idxmsk)) {
			
 
				+		/*
			
 
				+		 * calculate the offset of NB counters with respect to
			
 
				+		 * base eventsel or perfctr
			
 
				+		 */
			
 
				+
			
 
				+		first = find_first_bit(amd_nb_event_constraint->idxmsk,
			
 
				+				       X86_PMC_IDX_MAX);
			
 
				+
			
 
				+		if (eventsel)
			
 
				+			base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
			
 
				+		else
			
 
				+			base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
			
 
				+
			
 
				+		offset = base + ((index - first) << 1);
			
 
				+	} else if (!cpu_has_perfctr_core)
			
 
				+		offset = index;
			
 
				+	else
			
 
				+		offset = index << 1;
			
 
				+
			
 
				+	if (eventsel)
			
 
				+		event_offsets[index] = offset;
			
 
				+	else
			
 
				+		count_offsets[index] = offset;
			
 
				+
			
 
				+	return offset;
			
 
				+}
			
 
				+
			
 
				+static inline int amd_pmu_rdpmc_index(int index)
			
 
				+{
			
 
				+	int ret, first;
			
 
				+
			
 
				+	if (!index)
			
 
				+		return index;
			
 
				+
			
 
				+	ret = rdpmc_indexes[index];
			
 
				 
			
 
				-	ret = x86_pmu_hw_config(event);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	if (has_branch_stack(event))
			
 
				-		return -EOPNOTSUPP;
			
 
				+	if (amd_nb_event_constraint &&
			
 
				+	    test_bit(index, amd_nb_event_constraint->idxmsk)) {
			
 
				+		/*
			
 
				+		 * according to the mnual, ECX value of the NB counters is
			
 
				+		 * the index of the NB counter (0, 1, 2 or 3) plus 6
			
 
				+		 */
			
 
				+
			
 
				+		first = find_first_bit(amd_nb_event_constraint->idxmsk,
			
 
				+				       X86_PMC_IDX_MAX);
			
 
				+		ret = index - first + 6;
			
 
				+	} else
			
 
				+		ret = index;
			
 
				+
			
 
				+	rdpmc_indexes[index] = ret;
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				 
			
 
				+static int amd_core_hw_config(struct perf_event *event)
			
 
				+{
			
 
				 	if (event->attr.exclude_host && event->attr.exclude_guest)
			
 
				 		/*
			
 
				 		 * When HO == GO == 1 the hardware treats that as GO == HO == 0
			
@@ -156,14 +237,37 @@ static int amd_pmu_hw_config(struct perf_event *event)
 
				 		event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
			
 
				 				      ARCH_PERFMON_EVENTSEL_OS);
			
 
				 	else if (event->attr.exclude_host)
			
 
				-		event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
			
 
				+		event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
			
 
				 	else if (event->attr.exclude_guest)
			
 
				-		event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
			
 
				+		event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * NB counters do not support the following event select bits:
			
 
				+ *   Host/Guest only
			
 
				+ *   Counter mask
			
 
				+ *   Invert counter mask
			
 
				+ *   Edge detect
			
 
				+ *   OS/User mode
			
 
				+ */
			
 
				+static int amd_nb_hw_config(struct perf_event *event)
			
 
				+{
			
 
				+	/* for NB, we only allow system wide counting mode */
			
 
				+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (event->attr.exclude_user || event->attr.exclude_kernel ||
			
 
				+	    event->attr.exclude_host || event->attr.exclude_guest)
			
 
				+		return -EINVAL;
			
 
				 
			
 
				-	if (event->attr.type != PERF_TYPE_RAW)
			
 
				-		return 0;
			
 
				+	event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
			
 
				+			      ARCH_PERFMON_EVENTSEL_OS);
			
 
				 
			
 
				-	event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
			
 
				+	if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
			
 
				+				 ARCH_PERFMON_EVENTSEL_INT))
			
 
				+		return -EINVAL;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -181,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
 
				 	return (hwc->config & 0xe0) == 0xe0;
			
 
				 }
			
 
				 
			
 
				+static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
			
 
				+{
			
 
				+	return amd_nb_event_constraint && amd_is_nb_event(hwc);
			
 
				+}
			
 
				+
			
 
				 static inline int amd_has_nb(struct cpu_hw_events *cpuc)
			
 
				 {
			
 
				 	struct amd_nb *nb = cpuc->amd_nb;
			
@@ -188,19 +297,36 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
 
				 	return nb && nb->nb_id != -1;
			
 
				 }
			
 
				 
			
 
				-static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
			
 
				-				      struct perf_event *event)
			
 
				+static int amd_pmu_hw_config(struct perf_event *event)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	/* pass precise event sampling to ibs: */
			
 
				+	if (event->attr.precise_ip && get_ibs_caps())
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	if (has_branch_stack(event))
			
 
				+		return -EOPNOTSUPP;
			
 
				+
			
 
				+	ret = x86_pmu_hw_config(event);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	if (event->attr.type == PERF_TYPE_RAW)
			
 
				+		event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
			
 
				+
			
 
				+	if (amd_is_perfctr_nb_event(&event->hw))
			
 
				+		return amd_nb_hw_config(event);
			
 
				+
			
 
				+	return amd_core_hw_config(event);
			
 
				+}
			
 
				+
			
 
				+static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
			
 
				+					   struct perf_event *event)
			
 
				 {
			
 
				-	struct hw_perf_event *hwc = &event->hw;
			
 
				 	struct amd_nb *nb = cpuc->amd_nb;
			
 
				 	int i;
			
 
				 
			
 
				-	/*
			
 
				-	 * only care about NB events
			
 
				-	 */
			
 
				-	if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
			
 
				-		return;
			
 
				-
			
 
				 	/*
			
 
				 	 * need to scan whole list because event may not have
			
 
				 	 * been assigned during scheduling
			
@@ -215,6 +341,19 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
			
 
				+{
			
 
				+	int core_id = cpu_data(smp_processor_id()).cpu_core_id;
			
 
				+
			
 
				+	/* deliver interrupts only to this core */
			
 
				+	if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
			
 
				+		hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
			
 
				+		hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
			
 
				+		hwc->config |= (u64)(core_id) <<
			
 
				+			AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				  /*
			
 
				   * AMD64 NorthBridge events need special treatment because
			
 
				   * counter access needs to be synchronized across all cores
			
@@ -247,24 +386,24 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
 
				   *
			
 
				   * Given that resources are allocated (cmpxchg), they must be
			
 
				   * eventually freed for others to use. This is accomplished by
			
 
				-  * calling amd_put_event_constraints().
			
 
				+  * calling __amd_put_nb_event_constraints()
			
 
				   *
			
 
				   * Non NB events are not impacted by this restriction.
			
 
				   */
			
 
				 static struct event_constraint *
			
 
				-amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
			
 
				+__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
			
 
				+			       struct event_constraint *c)
			
 
				 {
			
 
				 	struct hw_perf_event *hwc = &event->hw;
			
 
				 	struct amd_nb *nb = cpuc->amd_nb;
			
 
				-	struct perf_event *old = NULL;
			
 
				-	int max = x86_pmu.num_counters;
			
 
				-	int i, j, k = -1;
			
 
				+	struct perf_event *old;
			
 
				+	int idx, new = -1;
			
 
				 
			
 
				-	/*
			
 
				-	 * if not NB event or no NB, then no constraints
			
 
				-	 */
			
 
				-	if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
			
 
				-		return &unconstrained;
			
 
				+	if (!c)
			
 
				+		c = &unconstrained;
			
 
				+
			
 
				+	if (cpuc->is_fake)
			
 
				+		return c;
			
 
				 
			
 
				 	/*
			
 
				 	 * detect if already present, if so reuse
			
@@ -276,48 +415,36 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 
				 	 * because of successive calls to x86_schedule_events() from
			
 
				 	 * hw_perf_group_sched_in() without hw_perf_enable()
			
 
				 	 */
			
 
				-	for (i = 0; i < max; i++) {
			
 
				-		/*
			
 
				-		 * keep track of first free slot
			
 
				-		 */
			
 
				-		if (k == -1 && !nb->owners[i])
			
 
				-			k = i;
			
 
				+	for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
			
 
				+		if (new == -1 || hwc->idx == idx)
			
 
				+			/* assign free slot, prefer hwc->idx */
			
 
				+			old = cmpxchg(nb->owners + idx, NULL, event);
			
 
				+		else if (nb->owners[idx] == event)
			
 
				+			/* event already present */
			
 
				+			old = event;
			
 
				+		else
			
 
				+			continue;
			
 
				+
			
 
				+		if (old && old != event)
			
 
				+			continue;
			
 
				+
			
 
				+		/* reassign to this slot */
			
 
				+		if (new != -1)
			
 
				+			cmpxchg(nb->owners + new, event, NULL);
			
 
				+		new = idx;
			
 
				 
			
 
				 		/* already present, reuse */
			
 
				-		if (nb->owners[i] == event)
			
 
				-			goto done;
			
 
				-	}
			
 
				-	/*
			
 
				-	 * not present, so grab a new slot
			
 
				-	 * starting either at:
			
 
				-	 */
			
 
				-	if (hwc->idx != -1) {
			
 
				-		/* previous assignment */
			
 
				-		i = hwc->idx;
			
 
				-	} else if (k != -1) {
			
 
				-		/* start from free slot found */
			
 
				-		i = k;
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * event not found, no slot found in
			
 
				-		 * first pass, try again from the
			
 
				-		 * beginning
			
 
				-		 */
			
 
				-		i = 0;
			
 
				-	}
			
 
				-	j = i;
			
 
				-	do {
			
 
				-		old = cmpxchg(nb->owners+i, NULL, event);
			
 
				-		if (!old)
			
 
				+		if (old == event)
			
 
				 			break;
			
 
				-		if (++i == max)
			
 
				-			i = 0;
			
 
				-	} while (i != j);
			
 
				-done:
			
 
				-	if (!old)
			
 
				-		return &nb->event_constraints[i];
			
 
				-
			
 
				-	return &emptyconstraint;
			
 
				+	}
			
 
				+
			
 
				+	if (new == -1)
			
 
				+		return &emptyconstraint;
			
 
				+
			
 
				+	if (amd_is_perfctr_nb_event(hwc))
			
 
				+		amd_nb_interrupt_hw_config(hwc);
			
 
				+
			
 
				+	return &nb->event_constraints[new];
			
 
				 }
			
 
				 
			
 
				 static struct amd_nb *amd_alloc_nb(int cpu)
			
@@ -364,7 +491,7 @@ static void amd_pmu_cpu_starting(int cpu)
 
				 	struct amd_nb *nb;
			
 
				 	int i, nb_id;
			
 
				 
			
 
				-	cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
			
 
				+	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
			
 
				 
			
 
				 	if (boot_cpu_data.x86_max_cores < 2)
			
 
				 		return;
			
@@ -407,6 +534,26 @@ static void amd_pmu_cpu_dead(int cpu)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static struct event_constraint *
			
 
				+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
			
 
				+{
			
 
				+	/*
			
 
				+	 * if not NB event or no NB, then no constraints
			
 
				+	 */
			
 
				+	if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
			
 
				+		return &unconstrained;
			
 
				+
			
 
				+	return __amd_get_nb_event_constraints(cpuc, event,
			
 
				+					      amd_nb_event_constraint);
			
 
				+}
			
 
				+
			
 
				+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
			
 
				+				      struct perf_event *event)
			
 
				+{
			
 
				+	if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
			
 
				+		__amd_put_nb_event_constraints(cpuc, event);
			
 
				+}
			
 
				+
			
 
				 PMU_FORMAT_ATTR(event,	"config:0-7,32-35");
			
 
				 PMU_FORMAT_ATTR(umask,	"config:8-15"	);
			
 
				 PMU_FORMAT_ATTR(edge,	"config:18"	);
			
@@ -496,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
 
				 static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
			
 
				 static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
			
 
				 
			
 
				+static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
			
 
				+static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
			
 
				+
			
 
				 static struct event_constraint *
			
 
				 amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
			
 
				 {
			
@@ -561,8 +711,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
 
				 			return &amd_f15_PMC20;
			
 
				 		}
			
 
				 	case AMD_EVENT_NB:
			
 
				-		/* not yet implemented */
			
 
				-		return &emptyconstraint;
			
 
				+		return __amd_get_nb_event_constraints(cpuc, event,
			
 
				+						      amd_nb_event_constraint);
			
 
				 	default:
			
 
				 		return &emptyconstraint;
			
 
				 	}
			
@@ -587,6 +737,8 @@ static __initconst const struct x86_pmu amd_pmu = {
 
				 	.schedule_events	= x86_schedule_events,
			
 
				 	.eventsel		= MSR_K7_EVNTSEL0,
			
 
				 	.perfctr		= MSR_K7_PERFCTR0,
			
 
				+	.addr_offset            = amd_pmu_addr_offset,
			
 
				+	.rdpmc_index		= amd_pmu_rdpmc_index,
			
 
				 	.event_map		= amd_pmu_event_map,
			
 
				 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
			
 
				 	.num_counters		= AMD64_NUM_COUNTERS,
			
@@ -608,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = {
 
				 
			
 
				 static int setup_event_constraints(void)
			
 
				 {
			
 
				-	if (boot_cpu_data.x86 >= 0x15)
			
 
				+	if (boot_cpu_data.x86 == 0x15)
			
 
				 		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
			
 
				 	return 0;
			
 
				 }
			
@@ -638,6 +790,23 @@ static int setup_perfctr_core(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int setup_perfctr_nb(void)
			
 
				+{
			
 
				+	if (!cpu_has_perfctr_nb)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
			
 
				+
			
 
				+	if (cpu_has_perfctr_core)
			
 
				+		amd_nb_event_constraint = &amd_NBPMC96;
			
 
				+	else
			
 
				+		amd_nb_event_constraint = &amd_NBPMC74;
			
 
				+
			
 
				+	printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 __init int amd_pmu_init(void)
			
 
				 {
			
 
				 	/* Performance-monitoring supported from K7 and later: */
			
@@ -648,6 +817,7 @@ __init int amd_pmu_init(void)
 
				 
			
 
				 	setup_event_constraints();
			
 
				 	setup_perfctr_core();
			
 
				+	setup_perfctr_nb();
			
 
				 
			
 
				 	/* Events are common for all AMDs */
			
 
				 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
			
@@ -678,7 +848,7 @@ void amd_pmu_disable_virt(void)
 
				 	 * SVM is disabled the Guest-only bits still gets set and the counter
			
 
				 	 * will not count anything.
			
 
				 	 */
			
 
				-	cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
			
 
				+	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
			
 
				 
			
 
				 	/* Reload all events */
			
 
				 	x86_pmu_disable_all();
			
--- a/arch/x86/kernel/kprobes/Makefile
+++ b/arch/x86/kernel/kprobes/Makefile
@@ -0,0 +1,7 @@
 
				+#
			
 
				+# Makefile for kernel probes
			
 
				+#
			
 
				+
			
 
				+obj-$(CONFIG_KPROBES)		+= core.o
			
 
				+obj-$(CONFIG_OPTPROBES)		+= opt.o
			
 
				+obj-$(CONFIG_KPROBES_ON_FTRACE)	+= ftrace.o
			
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
 
				 	return addr;
			
 
				 }
			
 
				 #endif
			
 
				+
			
 
				+#ifdef CONFIG_KPROBES_ON_FTRACE
			
 
				+extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
			
 
				+			   struct kprobe_ctlblk *kcb);
			
 
				+#else
			
 
				+static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
			
 
				+				  struct kprobe_ctlblk *kcb)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				 #endif
			
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -58,7 +58,7 @@
 
				 #include <asm/insn.h>
			
 
				 #include <asm/debugreg.h>
			
 
				 
			
 
				-#include "kprobes-common.h"
			
 
				+#include "common.h"
			
 
				 
			
 
				 void jprobe_return_end(void);
			
 
				 
			
@@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
				 	 * Groups, and some special opcodes can not boost.
			
 
				 	 * This is non-const and volatile to keep gcc from statically
			
 
				 	 * optimizing it out, as variable_test_bit makes gcc think only
			
 
				-	 * *(unsigned long*) is used. 
			
 
				+	 * *(unsigned long*) is used.
			
 
				 	 */
			
 
				 static volatile u32 twobyte_is_boostable[256 / 32] = {
			
 
				 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
			
@@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
 
				 	struct __arch_relative_insn {
			
 
				 		u8 op;
			
 
				 		s32 raddr;
			
 
				-	} __attribute__((packed)) *insn;
			
 
				+	} __packed *insn;
			
 
				 
			
 
				 	insn = (struct __arch_relative_insn *)from;
			
 
				 	insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
			
@@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-#ifdef KPROBES_CAN_USE_FTRACE
			
 
				-static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
			
 
				-				      struct kprobe_ctlblk *kcb)
			
 
				-{
			
 
				-	/*
			
 
				-	 * Emulate singlestep (and also recover regs->ip)
			
 
				-	 * as if there is a 5byte nop
			
 
				-	 */
			
 
				-	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
			
 
				-	if (unlikely(p->post_handler)) {
			
 
				-		kcb->kprobe_status = KPROBE_HIT_SSDONE;
			
 
				-		p->post_handler(p, regs, 0);
			
 
				-	}
			
 
				-	__this_cpu_write(current_kprobe, NULL);
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 /*
			
 
				  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
			
 
				  * remain disabled throughout this function.
			
@@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 
				 	} else if (kprobe_running()) {
			
 
				 		p = __this_cpu_read(current_kprobe);
			
 
				 		if (p->break_handler && p->break_handler(p, regs)) {
			
 
				-#ifdef KPROBES_CAN_USE_FTRACE
			
 
				-			if (kprobe_ftrace(p)) {
			
 
				-				skip_singlestep(p, regs, kcb);
			
 
				-				return 1;
			
 
				-			}
			
 
				-#endif
			
 
				-			setup_singlestep(p, regs, kcb, 0);
			
 
				+			if (!skip_singlestep(p, regs, kcb))
			
 
				+				setup_singlestep(p, regs, kcb, 0);
			
 
				 			return 1;
			
 
				 		}
			
 
				 	} /* else: not a kprobe fault; let the kernel handle it */
			
@@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-#ifdef KPROBES_CAN_USE_FTRACE
			
 
				-/* Ftrace callback handler for kprobes */
			
 
				-void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
			
 
				-				     struct ftrace_ops *ops, struct pt_regs *regs)
			
 
				-{
			
 
				-	struct kprobe *p;
			
 
				-	struct kprobe_ctlblk *kcb;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	/* Disable irq for emulating a breakpoint and avoiding preempt */
			
 
				-	local_irq_save(flags);
			
 
				-
			
 
				-	p = get_kprobe((kprobe_opcode_t *)ip);
			
 
				-	if (unlikely(!p) || kprobe_disabled(p))
			
 
				-		goto end;
			
 
				-
			
 
				-	kcb = get_kprobe_ctlblk();
			
 
				-	if (kprobe_running()) {
			
 
				-		kprobes_inc_nmissed_count(p);
			
 
				-	} else {
			
 
				-		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
			
 
				-		regs->ip = ip + sizeof(kprobe_opcode_t);
			
 
				-
			
 
				-		__this_cpu_write(current_kprobe, p);
			
 
				-		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
			
 
				-		if (!p->pre_handler || !p->pre_handler(p, regs))
			
 
				-			skip_singlestep(p, regs, kcb);
			
 
				-		/*
			
 
				-		 * If pre_handler returns !0, it sets regs->ip and
			
 
				-		 * resets current kprobe.
			
 
				-		 */
			
 
				-	}
			
 
				-end:
			
 
				-	local_irq_restore(flags);
			
 
				-}
			
 
				-
			
 
				-int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
			
 
				-{
			
 
				-	p->ainsn.insn = NULL;
			
 
				-	p->ainsn.boostable = -1;
			
 
				-	return 0;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 int __init arch_init_kprobes(void)
			
 
				 {
			
 
				 	return arch_init_optprobes();
			
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -0,0 +1,93 @@
 
				+/*
			
 
				+ * Dynamic Ftrace based Kprobes Optimization
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2 of the License, or
			
 
				+ * (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write to the Free Software
			
 
				+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
			
 
				+ *
			
 
				+ * Copyright (C) Hitachi Ltd., 2012
			
 
				+ */
			
 
				+#include <linux/kprobes.h>
			
 
				+#include <linux/ptrace.h>
			
 
				+#include <linux/hardirq.h>
			
 
				+#include <linux/preempt.h>
			
 
				+#include <linux/ftrace.h>
			
 
				+
			
 
				+#include "common.h"
			
 
				+
			
 
				+static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
			
 
				+			     struct kprobe_ctlblk *kcb)
			
 
				+{
			
 
				+	/*
			
 
				+	 * Emulate singlestep (and also recover regs->ip)
			
 
				+	 * as if there is a 5byte nop
			
 
				+	 */
			
 
				+	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
			
 
				+	if (unlikely(p->post_handler)) {
			
 
				+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
			
 
				+		p->post_handler(p, regs, 0);
			
 
				+	}
			
 
				+	__this_cpu_write(current_kprobe, NULL);
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
			
 
				+			      struct kprobe_ctlblk *kcb)
			
 
				+{
			
 
				+	if (kprobe_ftrace(p))
			
 
				+		return __skip_singlestep(p, regs, kcb);
			
 
				+	else
			
 
				+		return 0;
			
 
				+}
			
 
				+
			
 
				+/* Ftrace callback handler for kprobes */
			
 
				+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
			
 
				+				     struct ftrace_ops *ops, struct pt_regs *regs)
			
 
				+{
			
 
				+	struct kprobe *p;
			
 
				+	struct kprobe_ctlblk *kcb;
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	/* Disable irq for emulating a breakpoint and avoiding preempt */
			
 
				+	local_irq_save(flags);
			
 
				+
			
 
				+	p = get_kprobe((kprobe_opcode_t *)ip);
			
 
				+	if (unlikely(!p) || kprobe_disabled(p))
			
 
				+		goto end;
			
 
				+
			
 
				+	kcb = get_kprobe_ctlblk();
			
 
				+	if (kprobe_running()) {
			
 
				+		kprobes_inc_nmissed_count(p);
			
 
				+	} else {
			
 
				+		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
			
 
				+		regs->ip = ip + sizeof(kprobe_opcode_t);
			
 
				+
			
 
				+		__this_cpu_write(current_kprobe, p);
			
 
				+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
			
 
				+		if (!p->pre_handler || !p->pre_handler(p, regs))
			
 
				+			__skip_singlestep(p, regs, kcb);
			
 
				+		/*
			
 
				+		 * If pre_handler returns !0, it sets regs->ip and
			
 
				+		 * resets current kprobe.
			
 
				+		 */
			
 
				+	}
			
 
				+end:
			
 
				+	local_irq_restore(flags);
			
 
				+}
			
 
				+
			
 
				+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
			
 
				+{
			
 
				+	p->ainsn.insn = NULL;
			
 
				+	p->ainsn.boostable = -1;
			
 
				+	return 0;
			
 
				+}
			
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -37,7 +37,7 @@
 
				 #include <asm/insn.h>
			
 
				 #include <asm/debugreg.h>
			
 
				 
			
 
				-#include "kprobes-common.h"
			
 
				+#include "common.h"
			
 
				 
			
 
				 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
			
 
				 {
			
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -680,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 
				 		if (auprobe->insn[i] == 0x66)
			
 
				 			continue;
			
 
				 
			
 
				-		if (auprobe->insn[i] == 0x90)
			
 
				+		if (auprobe->insn[i] == 0x90) {
			
 
				+			regs->ip += i + 1;
			
 
				 			return true;
			
 
				+		}
			
 
				 
			
 
				 		break;
			
 
				 	}
			
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -29,6 +29,7 @@
 
				 #include <linux/time.h>
			
 
				 #include <linux/cper.h>
			
 
				 #include <linux/acpi.h>
			
 
				+#include <linux/pci.h>
			
 
				 #include <linux/aer.h>
			
 
				 
			
 
				 /*
			
@@ -249,6 +250,10 @@ static const char *cper_pcie_port_type_strs[] = {
 
				 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
			
 
				 			    const struct acpi_hest_generic_data *gdata)
			
 
				 {
			
 
				+#ifdef CONFIG_ACPI_APEI_PCIEAER
			
 
				+	struct pci_dev *dev;
			
 
				+#endif
			
 
				+
			
 
				 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
			
 
				 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
			
 
				 		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
			
@@ -281,10 +286,18 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 
				 	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
			
 
				 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
			
 
				 #ifdef CONFIG_ACPI_APEI_PCIEAER
			
 
				-	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
			
 
				-		struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
			
 
				-		cper_print_aer(pfx, gdata->error_severity, aer_regs);
			
 
				+	dev = pci_get_domain_bus_and_slot(pcie->device_id.segment,
			
 
				+			pcie->device_id.bus, pcie->device_id.function);
			
 
				+	if (!dev) {
			
 
				+		pr_err("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n",
			
 
				+			pcie->device_id.segment, pcie->device_id.bus,
			
 
				+			pcie->device_id.slot, pcie->device_id.function);
			
 
				+		return;
			
 
				 	}
			
 
				+	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO)
			
 
				+		cper_print_aer(pfx, dev, gdata->error_severity,
			
 
				+				(struct aer_capability_regs *) pcie->aer_info);
			
 
				+	pci_dev_put(dev);
			
 
				 #endif
			
 
				 }
			
 
				 
			
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -23,6 +23,9 @@
 
				 
			
 
				 #include "aerdrv.h"
			
 
				 
			
 
				+#define CREATE_TRACE_POINTS
			
 
				+#include <trace/events/ras.h>
			
 
				+
			
 
				 #define AER_AGENT_RECEIVER		0
			
 
				 #define AER_AGENT_REQUESTER		1
			
 
				 #define AER_AGENT_COMPLETER		2
			
@@ -121,12 +124,11 @@ static const char *aer_agent_string[] = {
 
				 	"Transmitter ID"
			
 
				 };
			
 
				 
			
 
				-static void __aer_print_error(const char *prefix,
			
 
				+static void __aer_print_error(struct pci_dev *dev,
			
 
				 			      struct aer_err_info *info)
			
 
				 {
			
 
				 	int i, status;
			
 
				 	const char *errmsg = NULL;
			
 
				-
			
 
				 	status = (info->status & ~info->mask);
			
 
				 
			
 
				 	for (i = 0; i < 32; i++) {
			
@@ -141,26 +143,22 @@ static void __aer_print_error(const char *prefix,
 
				 				aer_uncorrectable_error_string[i] : NULL;
			
 
				 
			
 
				 		if (errmsg)
			
 
				-			printk("%s""   [%2d] %-22s%s\n", prefix, i, errmsg,
			
 
				+			dev_err(&dev->dev, "   [%2d] %-22s%s\n", i, errmsg,
			
 
				 				info->first_error == i ? " (First)" : "");
			
 
				 		else
			
 
				-			printk("%s""   [%2d] Unknown Error Bit%s\n", prefix, i,
			
 
				-				info->first_error == i ? " (First)" : "");
			
 
				+			dev_err(&dev->dev, "   [%2d] Unknown Error Bit%s\n",
			
 
				+				i, info->first_error == i ? " (First)" : "");
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
			
 
				 {
			
 
				 	int id = ((dev->bus->number << 8) | dev->devfn);
			
 
				-	char prefix[44];
			
 
				-
			
 
				-	snprintf(prefix, sizeof(prefix), "%s%s %s: ",
			
 
				-		 (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR,
			
 
				-		 dev_driver_string(&dev->dev), dev_name(&dev->dev));
			
 
				 
			
 
				 	if (info->status == 0) {
			
 
				-		printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, "
			
 
				-			"id=%04x(Unregistered Agent ID)\n", prefix,
			
 
				+		dev_err(&dev->dev,
			
 
				+			"PCIe Bus Error: severity=%s, type=Unaccessible, "
			
 
				+			"id=%04x(Unregistered Agent ID)\n",
			
 
				 			aer_error_severity_string[info->severity], id);
			
 
				 	} else {
			
 
				 		int layer, agent;
			
@@ -168,22 +166,24 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 
				 		layer = AER_GET_LAYER_ERROR(info->severity, info->status);
			
 
				 		agent = AER_GET_AGENT(info->severity, info->status);
			
 
				 
			
 
				-		printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
			
 
				-			prefix, aer_error_severity_string[info->severity],
			
 
				+		dev_err(&dev->dev,
			
 
				+			"PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
			
 
				+			aer_error_severity_string[info->severity],
			
 
				 			aer_error_layer[layer], id, aer_agent_string[agent]);
			
 
				 
			
 
				-		printk("%s""  device [%04x:%04x] error status/mask=%08x/%08x\n",
			
 
				-			prefix, dev->vendor, dev->device,
			
 
				+		dev_err(&dev->dev,
			
 
				+			"  device [%04x:%04x] error status/mask=%08x/%08x\n",
			
 
				+			dev->vendor, dev->device,
			
 
				 			info->status, info->mask);
			
 
				 
			
 
				-		__aer_print_error(prefix, info);
			
 
				+		__aer_print_error(dev, info);
			
 
				 
			
 
				 		if (info->tlp_header_valid) {
			
 
				 			unsigned char *tlp = (unsigned char *) &info->tlp;
			
 
				-			printk("%s""  TLP Header:"
			
 
				+			dev_err(&dev->dev, "  TLP Header:"
			
 
				 				" %02x%02x%02x%02x %02x%02x%02x%02x"
			
 
				 				" %02x%02x%02x%02x %02x%02x%02x%02x\n",
			
 
				-				prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
			
 
				+				*(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
			
 
				 				*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
			
 
				 				*(tlp + 11), *(tlp + 10), *(tlp + 9),
			
 
				 				*(tlp + 8), *(tlp + 15), *(tlp + 14),
			
@@ -192,8 +192,11 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 
				 	}
			
 
				 
			
 
				 	if (info->id && info->error_dev_num > 1 && info->id == id)
			
 
				-		printk("%s""  Error of this Agent(%04x) is reported first\n",
			
 
				-			prefix, id);
			
 
				+		dev_err(&dev->dev,
			
 
				+			   "  Error of this Agent(%04x) is reported first\n",
			
 
				+			id);
			
 
				+	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
			
 
				+			info->severity);
			
 
				 }
			
 
				 
			
 
				 void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
			
@@ -217,7 +220,7 @@ int cper_severity_to_aer(int cper_severity)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(cper_severity_to_aer);
			
 
				 
			
 
				-void cper_print_aer(const char *prefix, int cper_severity,
			
 
				+void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity,
			
 
				 		    struct aer_capability_regs *aer)
			
 
				 {
			
 
				 	int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
			
@@ -239,25 +242,27 @@ void cper_print_aer(const char *prefix, int cper_severity,
 
				 	}
			
 
				 	layer = AER_GET_LAYER_ERROR(aer_severity, status);
			
 
				 	agent = AER_GET_AGENT(aer_severity, status);
			
 
				-	printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n",
			
 
				-	       prefix, status, mask);
			
 
				+	dev_err(&dev->dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n",
			
 
				+	       status, mask);
			
 
				 	cper_print_bits(prefix, status, status_strs, status_strs_size);
			
 
				-	printk("%s""aer_layer=%s, aer_agent=%s\n", prefix,
			
 
				+	dev_err(&dev->dev, "aer_layer=%s, aer_agent=%s\n",
			
 
				 	       aer_error_layer[layer], aer_agent_string[agent]);
			
 
				 	if (aer_severity != AER_CORRECTABLE)
			
 
				-		printk("%s""aer_uncor_severity: 0x%08x\n",
			
 
				-		       prefix, aer->uncor_severity);
			
 
				+		dev_err(&dev->dev, "aer_uncor_severity: 0x%08x\n",
			
 
				+		       aer->uncor_severity);
			
 
				 	if (tlp_header_valid) {
			
 
				 		const unsigned char *tlp;
			
 
				 		tlp = (const unsigned char *)&aer->header_log;
			
 
				-		printk("%s""aer_tlp_header:"
			
 
				+		dev_err(&dev->dev, "aer_tlp_header:"
			
 
				 			" %02x%02x%02x%02x %02x%02x%02x%02x"
			
 
				 			" %02x%02x%02x%02x %02x%02x%02x%02x\n",
			
 
				-			prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
			
 
				+			*(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
			
 
				 			*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
			
 
				 			*(tlp + 11), *(tlp + 10), *(tlp + 9),
			
 
				 			*(tlp + 8), *(tlp + 15), *(tlp + 14),
			
 
				 			*(tlp + 13), *(tlp + 12));
			
 
				 	}
			
 
				+	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
			
 
				+			aer_severity);
			
 
				 }
			
 
				 #endif
			
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-extern void cper_print_aer(const char *prefix, int cper_severity,
			
 
				-			   struct aer_capability_regs *aer);
			
 
				+extern void cper_print_aer(const char *prefix, struct pci_dev *dev,
			
 
				+			   int cper_severity, struct aer_capability_regs *aer);
			
 
				 extern int cper_severity_to_aer(int cper_severity);
			
 
				 extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
			
 
				 			      int severity);
			
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -74,7 +74,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
 
				  * SAVE_REGS - The ftrace_ops wants regs saved at each function called
			
 
				  *            and passed to the callback. If this flag is set, but the
			
 
				  *            architecture does not support passing regs
			
 
				- *            (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the
			
 
				+ *            (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the
			
 
				  *            ftrace_ops will fail to register, unless the next flag
			
 
				  *            is set.
			
 
				  * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the
			
@@ -418,7 +418,7 @@ void ftrace_modify_all_code(int command);
 
				 #endif
			
 
				 
			
 
				 #ifndef FTRACE_REGS_ADDR
			
 
				-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
			
 
				+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
			
 
				 # define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller)
			
 
				 #else
			
 
				 # define FTRACE_REGS_ADDR FTRACE_ADDR
			
@@ -480,7 +480,7 @@ extern int ftrace_make_nop(struct module *mod,
 
				  */
			
 
				 extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
			
 
				 
			
 
				-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
			
 
				+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
			
 
				 /**
			
 
				  * ftrace_modify_call - convert from one addr to another (no nop)
			
 
				  * @rec: the mcount call site record
			
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -49,7 +49,6 @@ struct trace_entry {
 
				 	unsigned char		flags;
			
 
				 	unsigned char		preempt_count;
			
 
				 	int			pid;
			
 
				-	int			padding;
			
 
				 };
			
 
				 
			
 
				 #define FTRACE_MAX_EVENT						\
			
@@ -84,6 +83,9 @@ struct trace_iterator {
 
				 	long			idx;
			
 
				 
			
 
				 	cpumask_var_t		started;
			
 
				+
			
 
				+	/* it's true when current open file is snapshot */
			
 
				+	bool			snapshot;
			
 
				 };
			
 
				 
			
 
				 enum trace_iter_flags {
			
@@ -272,7 +274,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 
				 extern int trace_add_event_call(struct ftrace_event_call *call);
			
 
				 extern void trace_remove_event_call(struct ftrace_event_call *call);
			
 
				 
			
 
				-#define is_signed_type(type)	(((type)(-1)) < 0)
			
 
				+#define is_signed_type(type)	(((type)(-1)) < (type)0)
			
 
				 
			
 
				 int trace_set_clr_event(const char *system, const char *event, int set);
			
 
				 
			
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -180,10 +180,10 @@ extern void irq_exit(void);
 
				 
			
 
				 #define nmi_enter()						\
			
 
				 	do {							\
			
 
				+		lockdep_off();					\
			
 
				 		ftrace_nmi_enter();				\
			
 
				 		BUG_ON(in_nmi());				\
			
 
				 		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
			
 
				-		lockdep_off();					\
			
 
				 		rcu_nmi_enter();				\
			
 
				 		trace_hardirq_enter();				\
			
 
				 	} while (0)
			
@@ -192,10 +192,10 @@ extern void irq_exit(void);
 
				 	do {							\
			
 
				 		trace_hardirq_exit();				\
			
 
				 		rcu_nmi_exit();					\
			
 
				-		lockdep_on();					\
			
 
				 		BUG_ON(!in_nmi());				\
			
 
				 		sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
			
 
				 		ftrace_nmi_exit();				\
			
 
				+		lockdep_on();					\
			
 
				 	} while (0)
			
 
				 
			
 
				 #endif /* LINUX_HARDIRQ_H */
			
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -49,16 +49,6 @@
 
				 #define KPROBE_REENTER		0x00000004
			
 
				 #define KPROBE_HIT_SSDONE	0x00000008
			
 
				 
			
 
				-/*
			
 
				- * If function tracer is enabled and the arch supports full
			
 
				- * passing of pt_regs to function tracing, then kprobes can
			
 
				- * optimize on top of function tracing.
			
 
				- */
			
 
				-#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \
			
 
				-	&& defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE)
			
 
				-# define KPROBES_CAN_USE_FTRACE
			
 
				-#endif
			
 
				-
			
 
				 /* Attach to insert probes on any functions which should be ignored*/
			
 
				 #define __kprobes	__attribute__((__section__(".kprobes.text")))
			
 
				 
			
@@ -316,7 +306,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
 
				 #endif
			
 
				 
			
 
				 #endif /* CONFIG_OPTPROBES */
			
 
				-#ifdef KPROBES_CAN_USE_FTRACE
			
 
				+#ifdef CONFIG_KPROBES_ON_FTRACE
			
 
				 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
			
 
				 				  struct ftrace_ops *ops, struct pt_regs *regs);
			
 
				 extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
			
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -135,16 +135,21 @@ struct hw_perf_event {
 
				 		struct { /* software */
			
 
				 			struct hrtimer	hrtimer;
			
 
				 		};
			
 
				+		struct { /* tracepoint */
			
 
				+			struct task_struct	*tp_target;
			
 
				+			/* for tp_event->class */
			
 
				+			struct list_head	tp_list;
			
 
				+		};
			
 
				 #ifdef CONFIG_HAVE_HW_BREAKPOINT
			
 
				 		struct { /* breakpoint */
			
 
				-			struct arch_hw_breakpoint	info;
			
 
				-			struct list_head		bp_list;
			
 
				 			/*
			
 
				 			 * Crufty hack to avoid the chicken and egg
			
 
				 			 * problem hw_breakpoint has with context
			
 
				 			 * creation and event initalization.
			
 
				 			 */
			
 
				 			struct task_struct		*bp_target;
			
 
				+			struct arch_hw_breakpoint	info;
			
 
				+			struct list_head		bp_list;
			
 
				 		};
			
 
				 #endif
			
 
				 	};
			
@@ -817,6 +822,17 @@ do {									\
 
				 } while (0)
			
 
				 
			
 
				 
			
 
				+struct perf_pmu_events_attr {
			
 
				+	struct device_attribute attr;
			
 
				+	u64 id;
			
 
				+};
			
 
				+
			
 
				+#define PMU_EVENT_ATTR(_name, _var, _id, _show)				\
			
 
				+static struct perf_pmu_events_attr _var = {				\
			
 
				+	.attr = __ATTR(_name, 0444, _show, NULL),			\
			
 
				+	.id   =  _id,							\
			
 
				+};
			
 
				+
			
 
				 #define PMU_FORMAT_ATTR(_name, _format)					\
			
 
				 static ssize_t								\
			
 
				 _name##_show(struct device *dev,					\
			
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -82,9 +82,6 @@ int task_handoff_unregister(struct notifier_block * n);
 
				 int profile_event_register(enum profile_type, struct notifier_block * n);
			
 
				 int profile_event_unregister(enum profile_type, struct notifier_block * n);
			
 
				 
			
 
				-int register_timer_hook(int (*hook)(struct pt_regs *));
			
 
				-void unregister_timer_hook(int (*hook)(struct pt_regs *));
			
 
				-
			
 
				 struct pt_regs;
			
 
				 
			
 
				 #else
			
@@ -135,16 +132,6 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_
 
				 #define profile_handoff_task(a) (0)
			
 
				 #define profile_munmap(a) do { } while (0)
			
 
				 
			
 
				-static inline int register_timer_hook(int (*hook)(struct pt_regs *))
			
 
				-{
			
 
				-	return -ENOSYS;
			
 
				-}
			
 
				-
			
 
				-static inline void unregister_timer_hook(int (*hook)(struct pt_regs *))
			
 
				-{
			
 
				-	return;
			
 
				-}
			
 
				-
			
 
				 #endif /* CONFIG_PROFILING */
			
 
				 
			
 
				 #endif /* _LINUX_PROFILE_H */
			
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -167,6 +167,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 
				 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
			
 
				 unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
			
 
				 unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu);
			
 
				+unsigned long ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu);
			
 
				 
			
 
				 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
			
 
				 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
			
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -35,13 +35,20 @@ struct inode;
 
				 # include <asm/uprobes.h>
			
 
				 #endif
			
 
				 
			
 
				+#define UPROBE_HANDLER_REMOVE		1
			
 
				+#define UPROBE_HANDLER_MASK		1
			
 
				+
			
 
				+enum uprobe_filter_ctx {
			
 
				+	UPROBE_FILTER_REGISTER,
			
 
				+	UPROBE_FILTER_UNREGISTER,
			
 
				+	UPROBE_FILTER_MMAP,
			
 
				+};
			
 
				+
			
 
				 struct uprobe_consumer {
			
 
				 	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
			
 
				-	/*
			
 
				-	 * filter is optional; If a filter exists, handler is run
			
 
				-	 * if and only if filter returns true.
			
 
				-	 */
			
 
				-	bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
			
 
				+	bool (*filter)(struct uprobe_consumer *self,
			
 
				+				enum uprobe_filter_ctx ctx,
			
 
				+				struct mm_struct *mm);
			
 
				 
			
 
				 	struct uprobe_consumer *next;
			
 
				 };
			
@@ -94,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign
 
				 extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
			
 
				 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
			
 
				 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
			
 
				+extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
			
 
				 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
			
 
				 extern int uprobe_mmap(struct vm_area_struct *vma);
			
 
				 extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
			
@@ -117,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 
				 {
			
 
				 	return -ENOSYS;
			
 
				 }
			
 
				+static inline int
			
 
				+uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
			
 
				+{
			
 
				+	return -ENOSYS;
			
 
				+}
			
 
				 static inline void
			
 
				 uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
			
 
				 {
			
--- a/include/trace/events/ras.h
+++ b/include/trace/events/ras.h
@@ -0,0 +1,77 @@
 
				+#undef TRACE_SYSTEM
			
 
				+#define TRACE_SYSTEM ras
			
 
				+
			
 
				+#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
			
 
				+#define _TRACE_AER_H
			
 
				+
			
 
				+#include <linux/tracepoint.h>
			
 
				+#include <linux/edac.h>
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * PCIe AER Trace event
			
 
				+ *
			
 
				+ * These events are generated when hardware detects a corrected or
			
 
				+ * uncorrected event on a PCIe device. The event report has
			
 
				+ * the following structure:
			
 
				+ *
			
 
				+ * char * dev_name -	The name of the slot where the device resides
			
 
				+ *			([domain:]bus:device.function).
			
 
				+ * u32 status -		Either the correctable or uncorrectable register
			
 
				+ *			indicating what error or errors have been seen
			
 
				+ * u8 severity -	error severity 0:NONFATAL 1:FATAL 2:CORRECTED
			
 
				+ */
			
 
				+
			
 
				+#define aer_correctable_errors		\
			
 
				+	{BIT(0),	"Receiver Error"},		\
			
 
				+	{BIT(6),	"Bad TLP"},			\
			
 
				+	{BIT(7),	"Bad DLLP"},			\
			
 
				+	{BIT(8),	"RELAY_NUM Rollover"},		\
			
 
				+	{BIT(12),	"Replay Timer Timeout"},	\
			
 
				+	{BIT(13),	"Advisory Non-Fatal"}
			
 
				+
			
 
				+#define aer_uncorrectable_errors		\
			
 
				+	{BIT(4),	"Data Link Protocol"},		\
			
 
				+	{BIT(12),	"Poisoned TLP"},		\
			
 
				+	{BIT(13),	"Flow Control Protocol"},	\
			
 
				+	{BIT(14),	"Completion Timeout"},		\
			
 
				+	{BIT(15),	"Completer Abort"},		\
			
 
				+	{BIT(16),	"Unexpected Completion"},	\
			
 
				+	{BIT(17),	"Receiver Overflow"},		\
			
 
				+	{BIT(18),	"Malformed TLP"},		\
			
 
				+	{BIT(19),	"ECRC"},			\
			
 
				+	{BIT(20),	"Unsupported Request"}
			
 
				+
			
 
				+TRACE_EVENT(aer_event,
			
 
				+	TP_PROTO(const char *dev_name,
			
 
				+		 const u32 status,
			
 
				+		 const u8 severity),
			
 
				+
			
 
				+	TP_ARGS(dev_name, status, severity),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__string(	dev_name,	dev_name	)
			
 
				+		__field(	u32,		status		)
			
 
				+		__field(	u8,		severity	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__assign_str(dev_name, dev_name);
			
 
				+		__entry->status		= status;
			
 
				+		__entry->severity	= severity;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
			
 
				+		__get_str(dev_name),
			
 
				+		__entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" :
			
 
				+			__entry->severity == HW_EVENT_ERR_FATAL ?
			
 
				+			"Fatal" : "Uncorrected",
			
 
				+		__entry->severity == HW_EVENT_ERR_CORRECTED ?
			
 
				+		__print_flags(__entry->status, "|", aer_correctable_errors) :
			
 
				+		__print_flags(__entry->status, "|", aer_uncorrectable_errors))
			
 
				+);
			
 
				+
			
 
				+#endif /* _TRACE_AER_H */
			
 
				+
			
 
				+/* This part must be outside protection */
			
 
				+#include <trace/define_trace.h>
			
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -579,7 +579,8 @@ enum perf_event_type {
 
				 	 *	{ u32			size;
			
 
				 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
			
 
				 	 *
			
 
				-	 *	{ u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
			
 
				+	 *	{ u64                   nr;
			
 
				+	 *        { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
			
 
				 	 *
			
 
				 	 * 	{ u64			abi; # enum perf_sample_regs_abi
			
 
				 	 * 	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
			
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6171,11 +6171,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
				 
			
 
				 	if (task) {
			
 
				 		event->attach_state = PERF_ATTACH_TASK;
			
 
				+
			
 
				+		if (attr->type == PERF_TYPE_TRACEPOINT)
			
 
				+			event->hw.tp_target = task;
			
 
				 #ifdef CONFIG_HAVE_HW_BREAKPOINT
			
 
				 		/*
			
 
				 		 * hw_breakpoint is a bit difficult here..
			
 
				 		 */
			
 
				-		if (attr->type == PERF_TYPE_BREAKPOINT)
			
 
				+		else if (attr->type == PERF_TYPE_BREAKPOINT)
			
 
				 			event->hw.bp_target = task;
			
 
				 #endif
			
 
				 	}
			
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -676,7 +676,7 @@ int __init init_hw_breakpoint(void)
 
				  err_alloc:
			
 
				 	for_each_possible_cpu(err_cpu) {
			
 
				 		for (i = 0; i < TYPE_MAX; i++)
			
 
				-			kfree(per_cpu(nr_task_bp_pinned[i], cpu));
			
 
				+			kfree(per_cpu(nr_task_bp_pinned[i], err_cpu));
			
 
				 		if (err_cpu == cpu)
			
 
				 			break;
			
 
				 	}
			
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -27,6 +27,7 @@
 
				 #include <linux/pagemap.h>	/* read_mapping_page */
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/sched.h>
			
 
				+#include <linux/export.h>
			
 
				 #include <linux/rmap.h>		/* anon_vma_prepare */
			
 
				 #include <linux/mmu_notifier.h>	/* set_pte_at_notify */
			
 
				 #include <linux/swap.h>		/* try_to_free_swap */
			
@@ -41,58 +42,31 @@
 
				 #define MAX_UPROBE_XOL_SLOTS		UINSNS_PER_PAGE
			
 
				 
			
 
				 static struct rb_root uprobes_tree = RB_ROOT;
			
 
				-
			
 
				-static DEFINE_SPINLOCK(uprobes_treelock);	/* serialize rbtree access */
			
 
				-
			
 
				-#define UPROBES_HASH_SZ	13
			
 
				-
			
 
				 /*
			
 
				- * We need separate register/unregister and mmap/munmap lock hashes because
			
 
				- * of mmap_sem nesting.
			
 
				- *
			
 
				- * uprobe_register() needs to install probes on (potentially) all processes
			
 
				- * and thus needs to acquire multiple mmap_sems (consequtively, not
			
 
				- * concurrently), whereas uprobe_mmap() is called while holding mmap_sem
			
 
				- * for the particular process doing the mmap.
			
 
				- *
			
 
				- * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem
			
 
				- * because of lock order against i_mmap_mutex. This means there's a hole in
			
 
				- * the register vma iteration where a mmap() can happen.
			
 
				- *
			
 
				- * Thus uprobe_register() can race with uprobe_mmap() and we can try and
			
 
				- * install a probe where one is already installed.
			
 
				+ * allows us to skip the uprobe_mmap if there are no uprobe events active
			
 
				+ * at this time.  Probably a fine grained per inode count is better?
			
 
				  */
			
 
				+#define no_uprobe_events()	RB_EMPTY_ROOT(&uprobes_tree)
			
 
				 
			
 
				-/* serialize (un)register */
			
 
				-static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
			
 
				-
			
 
				-#define uprobes_hash(v)		(&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
			
 
				+static DEFINE_SPINLOCK(uprobes_treelock);	/* serialize rbtree access */
			
 
				 
			
 
				+#define UPROBES_HASH_SZ	13
			
 
				 /* serialize uprobe->pending_list */
			
 
				 static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
			
 
				 #define uprobes_mmap_hash(v)	(&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
			
 
				 
			
 
				 static struct percpu_rw_semaphore dup_mmap_sem;
			
 
				 
			
 
				-/*
			
 
				- * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
			
 
				- * events active at this time.  Probably a fine grained per inode count is
			
 
				- * better?
			
 
				- */
			
 
				-static atomic_t uprobe_events = ATOMIC_INIT(0);
			
 
				-
			
 
				 /* Have a copy of original instruction */
			
 
				 #define UPROBE_COPY_INSN	0
			
 
				-/* Dont run handlers when first register/ last unregister in progress*/
			
 
				-#define UPROBE_RUN_HANDLER	1
			
 
				 /* Can skip singlestep */
			
 
				-#define UPROBE_SKIP_SSTEP	2
			
 
				+#define UPROBE_SKIP_SSTEP	1
			
 
				 
			
 
				 struct uprobe {
			
 
				 	struct rb_node		rb_node;	/* node in the rb tree */
			
 
				 	atomic_t		ref;
			
 
				+	struct rw_semaphore	register_rwsem;
			
 
				 	struct rw_semaphore	consumer_rwsem;
			
 
				-	struct mutex		copy_mutex;	/* TODO: kill me and UPROBE_COPY_INSN */
			
 
				 	struct list_head	pending_list;
			
 
				 	struct uprobe_consumer	*consumers;
			
 
				 	struct inode		*inode;		/* Also hold a ref to inode */
			
@@ -430,9 +404,6 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
 
				 	u = __insert_uprobe(uprobe);
			
 
				 	spin_unlock(&uprobes_treelock);
			
 
				 
			
 
				-	/* For now assume that the instruction need not be single-stepped */
			
 
				-	__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
			
 
				-
			
 
				 	return u;
			
 
				 }
			
 
				 
			
@@ -452,8 +423,10 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
 
				 
			
 
				 	uprobe->inode = igrab(inode);
			
 
				 	uprobe->offset = offset;
			
 
				+	init_rwsem(&uprobe->register_rwsem);
			
 
				 	init_rwsem(&uprobe->consumer_rwsem);
			
 
				-	mutex_init(&uprobe->copy_mutex);
			
 
				+	/* For now assume that the instruction need not be single-stepped */
			
 
				+	__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
			
 
				 
			
 
				 	/* add to uprobes_tree, sorted on inode:offset */
			
 
				 	cur_uprobe = insert_uprobe(uprobe);
			
@@ -463,38 +436,17 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
 
				 		kfree(uprobe);
			
 
				 		uprobe = cur_uprobe;
			
 
				 		iput(inode);
			
 
				-	} else {
			
 
				-		atomic_inc(&uprobe_events);
			
 
				 	}
			
 
				 
			
 
				 	return uprobe;
			
 
				 }
			
 
				 
			
 
				-static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
			
 
				-{
			
 
				-	struct uprobe_consumer *uc;
			
 
				-
			
 
				-	if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags))
			
 
				-		return;
			
 
				-
			
 
				-	down_read(&uprobe->consumer_rwsem);
			
 
				-	for (uc = uprobe->consumers; uc; uc = uc->next) {
			
 
				-		if (!uc->filter || uc->filter(uc, current))
			
 
				-			uc->handler(uc, regs);
			
 
				-	}
			
 
				-	up_read(&uprobe->consumer_rwsem);
			
 
				-}
			
 
				-
			
 
				-/* Returns the previous consumer */
			
 
				-static struct uprobe_consumer *
			
 
				-consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
			
 
				+static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
			
 
				 {
			
 
				 	down_write(&uprobe->consumer_rwsem);
			
 
				 	uc->next = uprobe->consumers;
			
 
				 	uprobe->consumers = uc;
			
 
				 	up_write(&uprobe->consumer_rwsem);
			
 
				-
			
 
				-	return uc->next;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -588,7 +540,8 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
 
				 	if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
			
 
				 		return ret;
			
 
				 
			
 
				-	mutex_lock(&uprobe->copy_mutex);
			
 
				+	/* TODO: move this into _register, until then we abuse this sem. */
			
 
				+	down_write(&uprobe->consumer_rwsem);
			
 
				 	if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
			
 
				 		goto out;
			
 
				 
			
@@ -612,7 +565,30 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
 
				 	set_bit(UPROBE_COPY_INSN, &uprobe->flags);
			
 
				 
			
 
				  out:
			
 
				-	mutex_unlock(&uprobe->copy_mutex);
			
 
				+	up_write(&uprobe->consumer_rwsem);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static inline bool consumer_filter(struct uprobe_consumer *uc,
			
 
				+				   enum uprobe_filter_ctx ctx, struct mm_struct *mm)
			
 
				+{
			
 
				+	return !uc->filter || uc->filter(uc, ctx, mm);
			
 
				+}
			
 
				+
			
 
				+static bool filter_chain(struct uprobe *uprobe,
			
 
				+			 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
			
 
				+{
			
 
				+	struct uprobe_consumer *uc;
			
 
				+	bool ret = false;
			
 
				+
			
 
				+	down_read(&uprobe->consumer_rwsem);
			
 
				+	for (uc = uprobe->consumers; uc; uc = uc->next) {
			
 
				+		ret = consumer_filter(uc, ctx, mm);
			
 
				+		if (ret)
			
 
				+			break;
			
 
				+	}
			
 
				+	up_read(&uprobe->consumer_rwsem);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -624,16 +600,6 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 
				 	bool first_uprobe;
			
 
				 	int ret;
			
 
				 
			
 
				-	/*
			
 
				-	 * If probe is being deleted, unregister thread could be done with
			
 
				-	 * the vma-rmap-walk through. Adding a probe now can be fatal since
			
 
				-	 * nobody will be able to cleanup. Also we could be from fork or
			
 
				-	 * mremap path, where the probe might have already been inserted.
			
 
				-	 * Hence behave as if probe already existed.
			
 
				-	 */
			
 
				-	if (!uprobe->consumers)
			
 
				-		return 0;
			
 
				-
			
 
				 	ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr);
			
 
				 	if (ret)
			
 
				 		return ret;
			
@@ -658,14 +624,14 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 
				 static int
			
 
				 remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
			
 
				 {
			
 
				-	/* can happen if uprobe_register() fails */
			
 
				-	if (!test_bit(MMF_HAS_UPROBES, &mm->flags))
			
 
				-		return 0;
			
 
				-
			
 
				 	set_bit(MMF_RECALC_UPROBES, &mm->flags);
			
 
				 	return set_orig_insn(&uprobe->arch, mm, vaddr);
			
 
				 }
			
 
				 
			
 
				+static inline bool uprobe_is_active(struct uprobe *uprobe)
			
 
				+{
			
 
				+	return !RB_EMPTY_NODE(&uprobe->rb_node);
			
 
				+}
			
 
				 /*
			
 
				  * There could be threads that have already hit the breakpoint. They
			
 
				  * will recheck the current insn and restart if find_uprobe() fails.
			
@@ -673,12 +639,15 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad
 
				  */
			
 
				 static void delete_uprobe(struct uprobe *uprobe)
			
 
				 {
			
 
				+	if (WARN_ON(!uprobe_is_active(uprobe)))
			
 
				+		return;
			
 
				+
			
 
				 	spin_lock(&uprobes_treelock);
			
 
				 	rb_erase(&uprobe->rb_node, &uprobes_tree);
			
 
				 	spin_unlock(&uprobes_treelock);
			
 
				+	RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
			
 
				 	iput(uprobe->inode);
			
 
				 	put_uprobe(uprobe);
			
 
				-	atomic_dec(&uprobe_events);
			
 
				 }
			
 
				 
			
 
				 struct map_info {
			
@@ -764,8 +733,10 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 
				 	return curr;
			
 
				 }
			
 
				 
			
 
				-static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
			
 
				+static int
			
 
				+register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
			
 
				 {
			
 
				+	bool is_register = !!new;
			
 
				 	struct map_info *info;
			
 
				 	int err = 0;
			
 
				 
			
@@ -794,10 +765,16 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 
				 		    vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
			
 
				 			goto unlock;
			
 
				 
			
 
				-		if (is_register)
			
 
				-			err = install_breakpoint(uprobe, mm, vma, info->vaddr);
			
 
				-		else
			
 
				-			err |= remove_breakpoint(uprobe, mm, info->vaddr);
			
 
				+		if (is_register) {
			
 
				+			/* consult only the "caller", new consumer. */
			
 
				+			if (consumer_filter(new,
			
 
				+					UPROBE_FILTER_REGISTER, mm))
			
 
				+				err = install_breakpoint(uprobe, mm, vma, info->vaddr);
			
 
				+		} else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) {
			
 
				+			if (!filter_chain(uprobe,
			
 
				+					UPROBE_FILTER_UNREGISTER, mm))
			
 
				+				err |= remove_breakpoint(uprobe, mm, info->vaddr);
			
 
				+		}
			
 
				 
			
 
				  unlock:
			
 
				 		up_write(&mm->mmap_sem);
			
@@ -810,17 +787,23 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int __uprobe_register(struct uprobe *uprobe)
			
 
				+static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc)
			
 
				 {
			
 
				-	return register_for_each_vma(uprobe, true);
			
 
				+	consumer_add(uprobe, uc);
			
 
				+	return register_for_each_vma(uprobe, uc);
			
 
				 }
			
 
				 
			
 
				-static void __uprobe_unregister(struct uprobe *uprobe)
			
 
				+static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
			
 
				 {
			
 
				-	if (!register_for_each_vma(uprobe, false))
			
 
				-		delete_uprobe(uprobe);
			
 
				+	int err;
			
 
				+
			
 
				+	if (!consumer_del(uprobe, uc))	/* WARN? */
			
 
				+		return;
			
 
				 
			
 
				+	err = register_for_each_vma(uprobe, NULL);
			
 
				 	/* TODO : cant unregister? schedule a worker thread */
			
 
				+	if (!uprobe->consumers && !err)
			
 
				+		delete_uprobe(uprobe);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -845,31 +828,59 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
 
				 	struct uprobe *uprobe;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (!inode || !uc || uc->next)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				+	/* Racy, just to catch the obvious mistakes */
			
 
				 	if (offset > i_size_read(inode))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	ret = 0;
			
 
				-	mutex_lock(uprobes_hash(inode));
			
 
				+ retry:
			
 
				 	uprobe = alloc_uprobe(inode, offset);
			
 
				-
			
 
				-	if (!uprobe) {
			
 
				-		ret = -ENOMEM;
			
 
				-	} else if (!consumer_add(uprobe, uc)) {
			
 
				-		ret = __uprobe_register(uprobe);
			
 
				-		if (ret) {
			
 
				-			uprobe->consumers = NULL;
			
 
				-			__uprobe_unregister(uprobe);
			
 
				-		} else {
			
 
				-			set_bit(UPROBE_RUN_HANDLER, &uprobe->flags);
			
 
				-		}
			
 
				+	if (!uprobe)
			
 
				+		return -ENOMEM;
			
 
				+	/*
			
 
				+	 * We can race with uprobe_unregister()->delete_uprobe().
			
 
				+	 * Check uprobe_is_active() and retry if it is false.
			
 
				+	 */
			
 
				+	down_write(&uprobe->register_rwsem);
			
 
				+	ret = -EAGAIN;
			
 
				+	if (likely(uprobe_is_active(uprobe))) {
			
 
				+		ret = __uprobe_register(uprobe, uc);
			
 
				+		if (ret)
			
 
				+			__uprobe_unregister(uprobe, uc);
			
 
				 	}
			
 
				+	up_write(&uprobe->register_rwsem);
			
 
				+	put_uprobe(uprobe);
			
 
				 
			
 
				-	mutex_unlock(uprobes_hash(inode));
			
 
				-	if (uprobe)
			
 
				-		put_uprobe(uprobe);
			
 
				+	if (unlikely(ret == -EAGAIN))
			
 
				+		goto retry;
			
 
				+	return ret;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(uprobe_register);
			
 
				+
			
 
				+/*
			
 
				+ * uprobe_apply - unregister a already registered probe.
			
 
				+ * @inode: the file in which the probe has to be removed.
			
 
				+ * @offset: offset from the start of the file.
			
 
				+ * @uc: consumer which wants to add more or remove some breakpoints
			
 
				+ * @add: add or remove the breakpoints
			
 
				+ */
			
 
				+int uprobe_apply(struct inode *inode, loff_t offset,
			
 
				+			struct uprobe_consumer *uc, bool add)
			
 
				+{
			
 
				+	struct uprobe *uprobe;
			
 
				+	struct uprobe_consumer *con;
			
 
				+	int ret = -ENOENT;
			
 
				+
			
 
				+	uprobe = find_uprobe(inode, offset);
			
 
				+	if (!uprobe)
			
 
				+		return ret;
			
 
				+
			
 
				+	down_write(&uprobe->register_rwsem);
			
 
				+	for (con = uprobe->consumers; con && con != uc ; con = con->next)
			
 
				+		;
			
 
				+	if (con)
			
 
				+		ret = register_for_each_vma(uprobe, add ? uc : NULL);
			
 
				+	up_write(&uprobe->register_rwsem);
			
 
				+	put_uprobe(uprobe);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -884,25 +895,42 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
 
				 {
			
 
				 	struct uprobe *uprobe;
			
 
				 
			
 
				-	if (!inode || !uc)
			
 
				-		return;
			
 
				-
			
 
				 	uprobe = find_uprobe(inode, offset);
			
 
				 	if (!uprobe)
			
 
				 		return;
			
 
				 
			
 
				-	mutex_lock(uprobes_hash(inode));
			
 
				+	down_write(&uprobe->register_rwsem);
			
 
				+	__uprobe_unregister(uprobe, uc);
			
 
				+	up_write(&uprobe->register_rwsem);
			
 
				+	put_uprobe(uprobe);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(uprobe_unregister);
			
 
				 
			
 
				-	if (consumer_del(uprobe, uc)) {
			
 
				-		if (!uprobe->consumers) {
			
 
				-			__uprobe_unregister(uprobe);
			
 
				-			clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags);
			
 
				-		}
			
 
				+static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
			
 
				+{
			
 
				+	struct vm_area_struct *vma;
			
 
				+	int err = 0;
			
 
				+
			
 
				+	down_read(&mm->mmap_sem);
			
 
				+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
			
 
				+		unsigned long vaddr;
			
 
				+		loff_t offset;
			
 
				+
			
 
				+		if (!valid_vma(vma, false) ||
			
 
				+		    vma->vm_file->f_mapping->host != uprobe->inode)
			
 
				+			continue;
			
 
				+
			
 
				+		offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
			
 
				+		if (uprobe->offset <  offset ||
			
 
				+		    uprobe->offset >= offset + vma->vm_end - vma->vm_start)
			
 
				+			continue;
			
 
				+
			
 
				+		vaddr = offset_to_vaddr(vma, uprobe->offset);
			
 
				+		err |= remove_breakpoint(uprobe, mm, vaddr);
			
 
				 	}
			
 
				+	up_read(&mm->mmap_sem);
			
 
				 
			
 
				-	mutex_unlock(uprobes_hash(inode));
			
 
				-	if (uprobe)
			
 
				-		put_uprobe(uprobe);
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				 static struct rb_node *
			
@@ -979,7 +1007,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
 
				 	struct uprobe *uprobe, *u;
			
 
				 	struct inode *inode;
			
 
				 
			
 
				-	if (!atomic_read(&uprobe_events) || !valid_vma(vma, true))
			
 
				+	if (no_uprobe_events() || !valid_vma(vma, true))
			
 
				 		return 0;
			
 
				 
			
 
				 	inode = vma->vm_file->f_mapping->host;
			
@@ -988,9 +1016,14 @@ int uprobe_mmap(struct vm_area_struct *vma)
 
				 
			
 
				 	mutex_lock(uprobes_mmap_hash(inode));
			
 
				 	build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
			
 
				-
			
 
				+	/*
			
 
				+	 * We can race with uprobe_unregister(), this uprobe can be already
			
 
				+	 * removed. But in this case filter_chain() must return false, all
			
 
				+	 * consumers have gone away.
			
 
				+	 */
			
 
				 	list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
			
 
				-		if (!fatal_signal_pending(current)) {
			
 
				+		if (!fatal_signal_pending(current) &&
			
 
				+		    filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) {
			
 
				 			unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
			
 
				 			install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
			
 
				 		}
			
@@ -1025,7 +1058,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e
 
				  */
			
 
				 void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
			
 
				 {
			
 
				-	if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
			
 
				+	if (no_uprobe_events() || !valid_vma(vma, false))
			
 
				 		return;
			
 
				 
			
 
				 	if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
			
@@ -1042,22 +1075,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
 
				 /* Slot allocation for XOL */
			
 
				 static int xol_add_vma(struct xol_area *area)
			
 
				 {
			
 
				-	struct mm_struct *mm;
			
 
				-	int ret;
			
 
				-
			
 
				-	area->page = alloc_page(GFP_HIGHUSER);
			
 
				-	if (!area->page)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	ret = -EALREADY;
			
 
				-	mm = current->mm;
			
 
				+	struct mm_struct *mm = current->mm;
			
 
				+	int ret = -EALREADY;
			
 
				 
			
 
				 	down_write(&mm->mmap_sem);
			
 
				 	if (mm->uprobes_state.xol_area)
			
 
				 		goto fail;
			
 
				 
			
 
				 	ret = -ENOMEM;
			
 
				-
			
 
				 	/* Try to map as high as possible, this is only a hint. */
			
 
				 	area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
			
 
				 	if (area->vaddr & ~PAGE_MASK) {
			
@@ -1073,54 +1098,53 @@ static int xol_add_vma(struct xol_area *area)
 
				 	smp_wmb();	/* pairs with get_xol_area() */
			
 
				 	mm->uprobes_state.xol_area = area;
			
 
				 	ret = 0;
			
 
				-
			
 
				-fail:
			
 
				+ fail:
			
 
				 	up_write(&mm->mmap_sem);
			
 
				-	if (ret)
			
 
				-		__free_page(area->page);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static struct xol_area *get_xol_area(struct mm_struct *mm)
			
 
				-{
			
 
				-	struct xol_area *area;
			
 
				-
			
 
				-	area = mm->uprobes_state.xol_area;
			
 
				-	smp_read_barrier_depends();	/* pairs with wmb in xol_add_vma() */
			
 
				-
			
 
				-	return area;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				- * xol_alloc_area - Allocate process's xol_area.
			
 
				- * This area will be used for storing instructions for execution out of
			
 
				- * line.
			
 
				+ * get_xol_area - Allocate process's xol_area if necessary.
			
 
				+ * This area will be used for storing instructions for execution out of line.
			
 
				  *
			
 
				  * Returns the allocated area or NULL.
			
 
				  */
			
 
				-static struct xol_area *xol_alloc_area(void)
			
 
				+static struct xol_area *get_xol_area(void)
			
 
				 {
			
 
				+	struct mm_struct *mm = current->mm;
			
 
				 	struct xol_area *area;
			
 
				 
			
 
				+	area = mm->uprobes_state.xol_area;
			
 
				+	if (area)
			
 
				+		goto ret;
			
 
				+
			
 
				 	area = kzalloc(sizeof(*area), GFP_KERNEL);
			
 
				 	if (unlikely(!area))
			
 
				-		return NULL;
			
 
				+		goto out;
			
 
				 
			
 
				 	area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL);
			
 
				-
			
 
				 	if (!area->bitmap)
			
 
				-		goto fail;
			
 
				+		goto free_area;
			
 
				+
			
 
				+	area->page = alloc_page(GFP_HIGHUSER);
			
 
				+	if (!area->page)
			
 
				+		goto free_bitmap;
			
 
				 
			
 
				 	init_waitqueue_head(&area->wq);
			
 
				 	if (!xol_add_vma(area))
			
 
				 		return area;
			
 
				 
			
 
				-fail:
			
 
				+	__free_page(area->page);
			
 
				+ free_bitmap:
			
 
				 	kfree(area->bitmap);
			
 
				+ free_area:
			
 
				 	kfree(area);
			
 
				-
			
 
				-	return get_xol_area(current->mm);
			
 
				+ out:
			
 
				+	area = mm->uprobes_state.xol_area;
			
 
				+ ret:
			
 
				+	smp_read_barrier_depends();     /* pairs with wmb in xol_add_vma() */
			
 
				+	return area;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1186,33 +1210,26 @@ static unsigned long xol_take_insn_slot(struct xol_area *area)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * xol_get_insn_slot - If was not allocated a slot, then
			
 
				- * allocate a slot.
			
 
				+ * xol_get_insn_slot - allocate a slot for xol.
			
 
				  * Returns the allocated slot address or 0.
			
 
				  */
			
 
				-static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr)
			
 
				+static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
			
 
				 {
			
 
				 	struct xol_area *area;
			
 
				 	unsigned long offset;
			
 
				+	unsigned long xol_vaddr;
			
 
				 	void *vaddr;
			
 
				 
			
 
				-	area = get_xol_area(current->mm);
			
 
				-	if (!area) {
			
 
				-		area = xol_alloc_area();
			
 
				-		if (!area)
			
 
				-			return 0;
			
 
				-	}
			
 
				-	current->utask->xol_vaddr = xol_take_insn_slot(area);
			
 
				+	area = get_xol_area();
			
 
				+	if (!area)
			
 
				+		return 0;
			
 
				 
			
 
				-	/*
			
 
				-	 * Initialize the slot if xol_vaddr points to valid
			
 
				-	 * instruction slot.
			
 
				-	 */
			
 
				-	if (unlikely(!current->utask->xol_vaddr))
			
 
				+	xol_vaddr = xol_take_insn_slot(area);
			
 
				+	if (unlikely(!xol_vaddr))
			
 
				 		return 0;
			
 
				 
			
 
				-	current->utask->vaddr = slot_addr;
			
 
				-	offset = current->utask->xol_vaddr & ~PAGE_MASK;
			
 
				+	/* Initialize the slot */
			
 
				+	offset = xol_vaddr & ~PAGE_MASK;
			
 
				 	vaddr = kmap_atomic(area->page);
			
 
				 	memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
			
 
				 	kunmap_atomic(vaddr);
			
@@ -1222,7 +1239,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot
 
				 	 */
			
 
				 	flush_dcache_page(area->page);
			
 
				 
			
 
				-	return current->utask->xol_vaddr;
			
 
				+	return xol_vaddr;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1240,8 +1257,7 @@ static void xol_free_insn_slot(struct task_struct *tsk)
 
				 		return;
			
 
				 
			
 
				 	slot_addr = tsk->utask->xol_vaddr;
			
 
				-
			
 
				-	if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr)))
			
 
				+	if (unlikely(!slot_addr))
			
 
				 		return;
			
 
				 
			
 
				 	area = tsk->mm->uprobes_state.xol_area;
			
@@ -1303,33 +1319,48 @@ void uprobe_copy_process(struct task_struct *t)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Allocate a uprobe_task object for the task.
			
 
				- * Called when the thread hits a breakpoint for the first time.
			
 
				+ * Allocate a uprobe_task object for the task if if necessary.
			
 
				+ * Called when the thread hits a breakpoint.
			
 
				  *
			
 
				  * Returns:
			
 
				  * - pointer to new uprobe_task on success
			
 
				  * - NULL otherwise
			
 
				  */
			
 
				-static struct uprobe_task *add_utask(void)
			
 
				+static struct uprobe_task *get_utask(void)
			
 
				 {
			
 
				-	struct uprobe_task *utask;
			
 
				-
			
 
				-	utask = kzalloc(sizeof *utask, GFP_KERNEL);
			
 
				-	if (unlikely(!utask))
			
 
				-		return NULL;
			
 
				-
			
 
				-	current->utask = utask;
			
 
				-	return utask;
			
 
				+	if (!current->utask)
			
 
				+		current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
			
 
				+	return current->utask;
			
 
				 }
			
 
				 
			
 
				 /* Prepare to single-step probed instruction out of line. */
			
 
				 static int
			
 
				-pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr)
			
 
				+pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
			
 
				 {
			
 
				-	if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs))
			
 
				-		return 0;
			
 
				+	struct uprobe_task *utask;
			
 
				+	unsigned long xol_vaddr;
			
 
				+	int err;
			
 
				+
			
 
				+	utask = get_utask();
			
 
				+	if (!utask)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	xol_vaddr = xol_get_insn_slot(uprobe);
			
 
				+	if (!xol_vaddr)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	utask->xol_vaddr = xol_vaddr;
			
 
				+	utask->vaddr = bp_vaddr;
			
 
				+
			
 
				+	err = arch_uprobe_pre_xol(&uprobe->arch, regs);
			
 
				+	if (unlikely(err)) {
			
 
				+		xol_free_insn_slot(current);
			
 
				+		return err;
			
 
				+	}
			
 
				 
			
 
				-	return -EFAULT;
			
 
				+	utask->active_uprobe = uprobe;
			
 
				+	utask->state = UTASK_SSTEP;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1391,6 +1422,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)
 
				 		 * This is not strictly accurate, we can race with
			
 
				 		 * uprobe_unregister() and see the already removed
			
 
				 		 * uprobe if delete_uprobe() was not yet called.
			
 
				+		 * Or this uprobe can be filtered out.
			
 
				 		 */
			
 
				 		if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end))
			
 
				 			return;
			
@@ -1452,13 +1484,33 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
 
				 	return uprobe;
			
 
				 }
			
 
				 
			
 
				+static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
			
 
				+{
			
 
				+	struct uprobe_consumer *uc;
			
 
				+	int remove = UPROBE_HANDLER_REMOVE;
			
 
				+
			
 
				+	down_read(&uprobe->register_rwsem);
			
 
				+	for (uc = uprobe->consumers; uc; uc = uc->next) {
			
 
				+		int rc = uc->handler(uc, regs);
			
 
				+
			
 
				+		WARN(rc & ~UPROBE_HANDLER_MASK,
			
 
				+			"bad rc=0x%x from %pf()\n", rc, uc->handler);
			
 
				+		remove &= rc;
			
 
				+	}
			
 
				+
			
 
				+	if (remove && uprobe->consumers) {
			
 
				+		WARN_ON(!uprobe_is_active(uprobe));
			
 
				+		unapply_uprobe(uprobe, current->mm);
			
 
				+	}
			
 
				+	up_read(&uprobe->register_rwsem);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Run handler and ask thread to singlestep.
			
 
				  * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
			
 
				  */
			
 
				 static void handle_swbp(struct pt_regs *regs)
			
 
				 {
			
 
				-	struct uprobe_task *utask;
			
 
				 	struct uprobe *uprobe;
			
 
				 	unsigned long bp_vaddr;
			
 
				 	int uninitialized_var(is_swbp);
			
@@ -1483,6 +1535,10 @@ static void handle_swbp(struct pt_regs *regs)
 
				 		}
			
 
				 		return;
			
 
				 	}
			
 
				+
			
 
				+	/* change it in advance for ->handler() and restart */
			
 
				+	instruction_pointer_set(regs, bp_vaddr);
			
 
				+
			
 
				 	/*
			
 
				 	 * TODO: move copy_insn/etc into _register and remove this hack.
			
 
				 	 * After we hit the bp, _unregister + _register can install the
			
@@ -1490,32 +1546,16 @@ static void handle_swbp(struct pt_regs *regs)
 
				 	 */
			
 
				 	smp_rmb(); /* pairs with wmb() in install_breakpoint() */
			
 
				 	if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
			
 
				-		goto restart;
			
 
				-
			
 
				-	utask = current->utask;
			
 
				-	if (!utask) {
			
 
				-		utask = add_utask();
			
 
				-		/* Cannot allocate; re-execute the instruction. */
			
 
				-		if (!utask)
			
 
				-			goto restart;
			
 
				-	}
			
 
				+		goto out;
			
 
				 
			
 
				 	handler_chain(uprobe, regs);
			
 
				 	if (can_skip_sstep(uprobe, regs))
			
 
				 		goto out;
			
 
				 
			
 
				-	if (!pre_ssout(uprobe, regs, bp_vaddr)) {
			
 
				-		utask->active_uprobe = uprobe;
			
 
				-		utask->state = UTASK_SSTEP;
			
 
				+	if (!pre_ssout(uprobe, regs, bp_vaddr))
			
 
				 		return;
			
 
				-	}
			
 
				 
			
 
				-restart:
			
 
				-	/*
			
 
				-	 * cannot singlestep; cannot skip instruction;
			
 
				-	 * re-execute the instruction.
			
 
				-	 */
			
 
				-	instruction_pointer_set(regs, bp_vaddr);
			
 
				+	/* can_skip_sstep() succeeded, or restart if can't singlestep */
			
 
				 out:
			
 
				 	put_uprobe(uprobe);
			
 
				 }
			
@@ -1609,10 +1649,8 @@ static int __init init_uprobes(void)
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				-	for (i = 0; i < UPROBES_HASH_SZ; i++) {
			
 
				-		mutex_init(&uprobes_mutex[i]);
			
 
				+	for (i = 0; i < UPROBES_HASH_SZ; i++)
			
 
				 		mutex_init(&uprobes_mmap_mutex[i]);
			
 
				-	}
			
 
				 
			
 
				 	if (percpu_init_rwsem(&dup_mmap_sem))
			
 
				 		return -ENOMEM;
			
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -919,7 +919,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 
				 }
			
 
				 #endif /* CONFIG_OPTPROBES */
			
 
				 
			
 
				-#ifdef KPROBES_CAN_USE_FTRACE
			
 
				+#ifdef CONFIG_KPROBES_ON_FTRACE
			
 
				 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
			
 
				 	.func = kprobe_ftrace_handler,
			
 
				 	.flags = FTRACE_OPS_FL_SAVE_REGS,
			
@@ -964,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
 
				 			   (unsigned long)p->addr, 1, 0);
			
 
				 	WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
			
 
				 }
			
 
				-#else	/* !KPROBES_CAN_USE_FTRACE */
			
 
				+#else	/* !CONFIG_KPROBES_ON_FTRACE */
			
 
				 #define prepare_kprobe(p)	arch_prepare_kprobe(p)
			
 
				 #define arm_kprobe_ftrace(p)	do {} while (0)
			
 
				 #define disarm_kprobe_ftrace(p)	do {} while (0)
			
@@ -1414,12 +1414,12 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
 
				 	 */
			
 
				 	ftrace_addr = ftrace_location((unsigned long)p->addr);
			
 
				 	if (ftrace_addr) {
			
 
				-#ifdef KPROBES_CAN_USE_FTRACE
			
 
				+#ifdef CONFIG_KPROBES_ON_FTRACE
			
 
				 		/* Given address is not on the instruction boundary */
			
 
				 		if ((unsigned long)p->addr != ftrace_addr)
			
 
				 			return -EILSEQ;
			
 
				 		p->flags |= KPROBE_FLAG_FTRACE;
			
 
				-#else	/* !KPROBES_CAN_USE_FTRACE */
			
 
				+#else	/* !CONFIG_KPROBES_ON_FTRACE */
			
 
				 		return -EINVAL;
			
 
				 #endif
			
 
				 	}
			
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -37,9 +37,6 @@ struct profile_hit {
 
				 #define NR_PROFILE_HIT		(PAGE_SIZE/sizeof(struct profile_hit))
			
 
				 #define NR_PROFILE_GRP		(NR_PROFILE_HIT/PROFILE_GRPSZ)
			
 
				 
			
 
				-/* Oprofile timer tick hook */
			
 
				-static int (*timer_hook)(struct pt_regs *) __read_mostly;
			
 
				-
			
 
				 static atomic_t *prof_buffer;
			
 
				 static unsigned long prof_len, prof_shift;
			
 
				 
			
@@ -208,25 +205,6 @@ int profile_event_unregister(enum profile_type type, struct notifier_block *n)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(profile_event_unregister);
			
 
				 
			
 
				-int register_timer_hook(int (*hook)(struct pt_regs *))
			
 
				-{
			
 
				-	if (timer_hook)
			
 
				-		return -EBUSY;
			
 
				-	timer_hook = hook;
			
 
				-	return 0;
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(register_timer_hook);
			
 
				-
			
 
				-void unregister_timer_hook(int (*hook)(struct pt_regs *))
			
 
				-{
			
 
				-	WARN_ON(hook != timer_hook);
			
 
				-	timer_hook = NULL;
			
 
				-	/* make sure all CPUs see the NULL hook */
			
 
				-	synchronize_sched();  /* Allow ongoing interrupts to complete. */
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(unregister_timer_hook);
			
 
				-
			
 
				-
			
 
				 #ifdef CONFIG_SMP
			
 
				 /*
			
 
				  * Each cpu has a pair of open-addressed hashtables for pending
			
@@ -436,8 +414,6 @@ void profile_tick(int type)
 
				 {
			
 
				 	struct pt_regs *regs = get_irq_regs();
			
 
				 
			
 
				-	if (type == CPU_PROFILING && timer_hook)
			
 
				-		timer_hook(regs);
			
 
				 	if (!user_mode(regs) && prof_cpu_mask != NULL &&
			
 
				 	    cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
			
 
				 		profile_hit(type, (void *)profile_pc(regs));
			
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -712,6 +712,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
 
				 					     kiov->iov_len, kiov->iov_base);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * This is declared in linux/regset.h and defined in machine-dependent
			
 
				+ * code.  We put the export here, near the primary machine-neutral use,
			
 
				+ * to ensure no machine forgets it.
			
 
				+ */
			
 
				+EXPORT_SYMBOL_GPL(task_user_regset_view);
			
 
				 #endif
			
 
				 
			
 
				 int ptrace_request(struct task_struct *child, long request,
			
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -39,6 +39,9 @@ config HAVE_DYNAMIC_FTRACE
 
				 	help
			
 
				 	  See Documentation/trace/ftrace-design.txt
			
 
				 
			
 
				+config HAVE_DYNAMIC_FTRACE_WITH_REGS
			
 
				+	bool
			
 
				+
			
 
				 config HAVE_FTRACE_MCOUNT_RECORD
			
 
				 	bool
			
 
				 	help
			
@@ -250,6 +253,16 @@ config FTRACE_SYSCALLS
 
				 	help
			
 
				 	  Basic tracer to catch the syscall entry and exit events.
			
 
				 
			
 
				+config TRACER_SNAPSHOT
			
 
				+	bool "Create a snapshot trace buffer"
			
 
				+	select TRACER_MAX_TRACE
			
 
				+	help
			
 
				+	  Allow tracing users to take snapshot of the current buffer using the
			
 
				+	  ftrace interface, e.g.:
			
 
				+
			
 
				+	      echo 1 > /sys/kernel/debug/tracing/snapshot
			
 
				+	      cat snapshot
			
 
				+
			
 
				 config TRACE_BRANCH_PROFILING
			
 
				 	bool
			
 
				 	select GENERIC_TRACER
			
@@ -434,6 +447,11 @@ config DYNAMIC_FTRACE
 
				 	  were made. If so, it runs stop_machine (stops all CPUS)
			
 
				 	  and modifies the code to jump over the call to ftrace.
			
 
				 
			
 
				+config DYNAMIC_FTRACE_WITH_REGS
			
 
				+	def_bool y
			
 
				+	depends on DYNAMIC_FTRACE
			
 
				+	depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
			
 
				+
			
 
				 config FUNCTION_PROFILER
			
 
				 	bool "Kernel function profiler"
			
 
				 	depends on FUNCTION_TRACER
			
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -147,7 +147,7 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
 
				 		return;
			
 
				 
			
 
				 	local_irq_save(flags);
			
 
				-	buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
			
 
				+	buf = this_cpu_ptr(bt->msg_data);
			
 
				 	va_start(args, fmt);
			
 
				 	n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
			
 
				 	va_end(args);
			
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
 
				 #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
			
 
				 #endif
			
 
				 
			
 
				+/*
			
 
				+ * Traverse the ftrace_global_list, invoking all entries.  The reason that we
			
 
				+ * can use rcu_dereference_raw() is that elements removed from this list
			
 
				+ * are simply leaked, so there is no need to interact with a grace-period
			
 
				+ * mechanism.  The rcu_dereference_raw() calls are needed to handle
			
 
				+ * concurrent insertions into the ftrace_global_list.
			
 
				+ *
			
 
				+ * Silly Alpha and silly pointer-speculation compiler optimizations!
			
 
				+ */
			
 
				+#define do_for_each_ftrace_op(op, list)			\
			
 
				+	op = rcu_dereference_raw(list);			\
			
 
				+	do
			
 
				+
			
 
				+/*
			
 
				+ * Optimized for just a single item in the list (as that is the normal case).
			
 
				+ */
			
 
				+#define while_for_each_ftrace_op(op)				\
			
 
				+	while (likely(op = rcu_dereference_raw((op)->next)) &&	\
			
 
				+	       unlikely((op) != &ftrace_list_end))
			
 
				+
			
 
				 /**
			
 
				  * ftrace_nr_registered_ops - return number of ops registered
			
 
				  *
			
@@ -132,29 +152,21 @@ int ftrace_nr_registered_ops(void)
 
				 	return cnt;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Traverse the ftrace_global_list, invoking all entries.  The reason that we
			
 
				- * can use rcu_dereference_raw() is that elements removed from this list
			
 
				- * are simply leaked, so there is no need to interact with a grace-period
			
 
				- * mechanism.  The rcu_dereference_raw() calls are needed to handle
			
 
				- * concurrent insertions into the ftrace_global_list.
			
 
				- *
			
 
				- * Silly Alpha and silly pointer-speculation compiler optimizations!
			
 
				- */
			
 
				 static void
			
 
				 ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
			
 
				 			struct ftrace_ops *op, struct pt_regs *regs)
			
 
				 {
			
 
				-	if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
			
 
				+	int bit;
			
 
				+
			
 
				+	bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
			
 
				+	if (bit < 0)
			
 
				 		return;
			
 
				 
			
 
				-	trace_recursion_set(TRACE_GLOBAL_BIT);
			
 
				-	op = rcu_dereference_raw(ftrace_global_list); /*see above*/
			
 
				-	while (op != &ftrace_list_end) {
			
 
				+	do_for_each_ftrace_op(op, ftrace_global_list) {
			
 
				 		op->func(ip, parent_ip, op, regs);
			
 
				-		op = rcu_dereference_raw(op->next); /*see above*/
			
 
				-	};
			
 
				-	trace_recursion_clear(TRACE_GLOBAL_BIT);
			
 
				+	} while_for_each_ftrace_op(op);
			
 
				+
			
 
				+	trace_clear_recursion(bit);
			
 
				 }
			
 
				 
			
 
				 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
			
@@ -221,10 +233,24 @@ static void update_global_ops(void)
 
				 	 * registered callers.
			
 
				 	 */
			
 
				 	if (ftrace_global_list == &ftrace_list_end ||
			
 
				-	    ftrace_global_list->next == &ftrace_list_end)
			
 
				+	    ftrace_global_list->next == &ftrace_list_end) {
			
 
				 		func = ftrace_global_list->func;
			
 
				-	else
			
 
				+		/*
			
 
				+		 * As we are calling the function directly.
			
 
				+		 * If it does not have recursion protection,
			
 
				+		 * the function_trace_op needs to be updated
			
 
				+		 * accordingly.
			
 
				+		 */
			
 
				+		if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
			
 
				+			global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
			
 
				+		else
			
 
				+			global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
			
 
				+	} else {
			
 
				 		func = ftrace_global_list_func;
			
 
				+		/* The list has its own recursion protection. */
			
 
				+		global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
			
 
				+	}
			
 
				+
			
 
				 
			
 
				 	/* If we filter on pids, update to use the pid function */
			
 
				 	if (!list_empty(&ftrace_pids)) {
			
@@ -337,7 +363,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 
				 	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
			
 
				+#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
			
 
				 	/*
			
 
				 	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
			
 
				 	 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
			
@@ -4090,14 +4116,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
 
				 	 */
			
 
				 	preempt_disable_notrace();
			
 
				 	trace_recursion_set(TRACE_CONTROL_BIT);
			
 
				-	op = rcu_dereference_raw(ftrace_control_list);
			
 
				-	while (op != &ftrace_list_end) {
			
 
				+	do_for_each_ftrace_op(op, ftrace_control_list) {
			
 
				 		if (!ftrace_function_local_disabled(op) &&
			
 
				 		    ftrace_ops_test(op, ip))
			
 
				 			op->func(ip, parent_ip, op, regs);
			
 
				-
			
 
				-		op = rcu_dereference_raw(op->next);
			
 
				-	};
			
 
				+	} while_for_each_ftrace_op(op);
			
 
				 	trace_recursion_clear(TRACE_CONTROL_BIT);
			
 
				 	preempt_enable_notrace();
			
 
				 }
			
@@ -4112,27 +4135,26 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 
				 		       struct ftrace_ops *ignored, struct pt_regs *regs)
			
 
				 {
			
 
				 	struct ftrace_ops *op;
			
 
				+	int bit;
			
 
				 
			
 
				 	if (function_trace_stop)
			
 
				 		return;
			
 
				 
			
 
				-	if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
			
 
				+	bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
			
 
				+	if (bit < 0)
			
 
				 		return;
			
 
				 
			
 
				-	trace_recursion_set(TRACE_INTERNAL_BIT);
			
 
				 	/*
			
 
				 	 * Some of the ops may be dynamically allocated,
			
 
				 	 * they must be freed after a synchronize_sched().
			
 
				 	 */
			
 
				 	preempt_disable_notrace();
			
 
				-	op = rcu_dereference_raw(ftrace_ops_list);
			
 
				-	while (op != &ftrace_list_end) {
			
 
				+	do_for_each_ftrace_op(op, ftrace_ops_list) {
			
 
				 		if (ftrace_ops_test(op, ip))
			
 
				 			op->func(ip, parent_ip, op, regs);
			
 
				-		op = rcu_dereference_raw(op->next);
			
 
				-	};
			
 
				+	} while_for_each_ftrace_op(op);
			
 
				 	preempt_enable_notrace();
			
 
				-	trace_recursion_clear(TRACE_INTERNAL_BIT);
			
 
				+	trace_clear_recursion(bit);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -4143,8 +4165,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 
				  * Archs are to support both the regs and ftrace_ops at the same time.
			
 
				  * If they support ftrace_ops, it is assumed they support regs.
			
 
				  * If call backs want to use regs, they must either check for regs
			
 
				- * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS.
			
 
				- * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved.
			
 
				+ * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
			
 
				+ * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
			
 
				  * An architecture can pass partial regs with ftrace_ops and still
			
 
				  * set the ARCH_SUPPORT_FTARCE_OPS.
			
 
				  */
			
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3,8 +3,10 @@
 
				  *
			
 
				  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
			
 
				  */
			
 
				+#include <linux/ftrace_event.h>
			
 
				 #include <linux/ring_buffer.h>
			
 
				 #include <linux/trace_clock.h>
			
 
				+#include <linux/trace_seq.h>
			
 
				 #include <linux/spinlock.h>
			
 
				 #include <linux/debugfs.h>
			
 
				 #include <linux/uaccess.h>
			
@@ -21,7 +23,6 @@
 
				 #include <linux/fs.h>
			
 
				 
			
 
				 #include <asm/local.h>
			
 
				-#include "trace.h"
			
 
				 
			
 
				 static void update_pages_handler(struct work_struct *work);
			
 
				 
			
@@ -2432,41 +2433,76 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 
				 
			
 
				 #ifdef CONFIG_TRACING
			
 
				 
			
 
				-#define TRACE_RECURSIVE_DEPTH 16
			
 
				+/*
			
 
				+ * The lock and unlock are done within a preempt disable section.
			
 
				+ * The current_context per_cpu variable can only be modified
			
 
				+ * by the current task between lock and unlock. But it can
			
 
				+ * be modified more than once via an interrupt. To pass this
			
 
				+ * information from the lock to the unlock without having to
			
 
				+ * access the 'in_interrupt()' functions again (which do show
			
 
				+ * a bit of overhead in something as critical as function tracing,
			
 
				+ * we use a bitmask trick.
			
 
				+ *
			
 
				+ *  bit 0 =  NMI context
			
 
				+ *  bit 1 =  IRQ context
			
 
				+ *  bit 2 =  SoftIRQ context
			
 
				+ *  bit 3 =  normal context.
			
 
				+ *
			
 
				+ * This works because this is the order of contexts that can
			
 
				+ * preempt other contexts. A SoftIRQ never preempts an IRQ
			
 
				+ * context.
			
 
				+ *
			
 
				+ * When the context is determined, the corresponding bit is
			
 
				+ * checked and set (if it was set, then a recursion of that context
			
 
				+ * happened).
			
 
				+ *
			
 
				+ * On unlock, we need to clear this bit. To do so, just subtract
			
 
				+ * 1 from the current_context and AND it to itself.
			
 
				+ *
			
 
				+ * (binary)
			
 
				+ *  101 - 1 = 100
			
 
				+ *  101 & 100 = 100 (clearing bit zero)
			
 
				+ *
			
 
				+ *  1010 - 1 = 1001
			
 
				+ *  1010 & 1001 = 1000 (clearing bit 1)
			
 
				+ *
			
 
				+ * The least significant bit can be cleared this way, and it
			
 
				+ * just so happens that it is the same bit corresponding to
			
 
				+ * the current context.
			
 
				+ */
			
 
				+static DEFINE_PER_CPU(unsigned int, current_context);
			
 
				 
			
 
				-/* Keep this code out of the fast path cache */
			
 
				-static noinline void trace_recursive_fail(void)
			
 
				+static __always_inline int trace_recursive_lock(void)
			
 
				 {
			
 
				-	/* Disable all tracing before we do anything else */
			
 
				-	tracing_off_permanent();
			
 
				-
			
 
				-	printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
			
 
				-		    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
			
 
				-		    trace_recursion_buffer(),
			
 
				-		    hardirq_count() >> HARDIRQ_SHIFT,
			
 
				-		    softirq_count() >> SOFTIRQ_SHIFT,
			
 
				-		    in_nmi());
			
 
				-
			
 
				-	WARN_ON_ONCE(1);
			
 
				-}
			
 
				+	unsigned int val = this_cpu_read(current_context);
			
 
				+	int bit;
			
 
				 
			
 
				-static inline int trace_recursive_lock(void)
			
 
				-{
			
 
				-	trace_recursion_inc();
			
 
				+	if (in_interrupt()) {
			
 
				+		if (in_nmi())
			
 
				+			bit = 0;
			
 
				+		else if (in_irq())
			
 
				+			bit = 1;
			
 
				+		else
			
 
				+			bit = 2;
			
 
				+	} else
			
 
				+		bit = 3;
			
 
				 
			
 
				-	if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
			
 
				-		return 0;
			
 
				+	if (unlikely(val & (1 << bit)))
			
 
				+		return 1;
			
 
				 
			
 
				-	trace_recursive_fail();
			
 
				+	val |= (1 << bit);
			
 
				+	this_cpu_write(current_context, val);
			
 
				 
			
 
				-	return -1;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline void trace_recursive_unlock(void)
			
 
				+static __always_inline void trace_recursive_unlock(void)
			
 
				 {
			
 
				-	WARN_ON_ONCE(!trace_recursion_buffer());
			
 
				+	unsigned int val = this_cpu_read(current_context);
			
 
				 
			
 
				-	trace_recursion_dec();
			
 
				+	val--;
			
 
				+	val &= this_cpu_read(current_context);
			
 
				+	this_cpu_write(current_context, val);
			
 
				 }
			
 
				 
			
 
				 #else
			
@@ -3066,6 +3102,24 @@ ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
			
 
				 
			
 
				+/**
			
 
				+ * ring_buffer_read_events_cpu - get the number of events successfully read
			
 
				+ * @buffer: The ring buffer
			
 
				+ * @cpu: The per CPU buffer to get the number of events read
			
 
				+ */
			
 
				+unsigned long
			
 
				+ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
			
 
				+{
			
 
				+	struct ring_buffer_per_cpu *cpu_buffer;
			
 
				+
			
 
				+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
			
 
				+		return 0;
			
 
				+
			
 
				+	cpu_buffer = buffer->buffers[cpu];
			
 
				+	return cpu_buffer->read;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
			
 
				+
			
 
				 /**
			
 
				  * ring_buffer_entries - get the number of entries in a buffer
			
 
				  * @buffer: The ring buffer
			
@@ -3425,7 +3479,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 
				 	/* check for end of page padding */
			
 
				 	if ((iter->head >= rb_page_size(iter->head_page)) &&
			
 
				 	    (iter->head_page != cpu_buffer->commit_page))
			
 
				-		rb_advance_iter(iter);
			
 
				+		rb_inc_iter(iter);
			
 
				 }
			
 
				 
			
 
				 static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
			
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -249,7 +249,7 @@ static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 
				 static struct tracer		*trace_types __read_mostly;
			
 
				 
			
 
				 /* current_trace points to the tracer that is currently active */
			
 
				-static struct tracer		*current_trace __read_mostly;
			
 
				+static struct tracer		*current_trace __read_mostly = &nop_trace;
			
 
				 
			
 
				 /*
			
 
				  * trace_types_lock is used to protect the trace_types list.
			
@@ -709,10 +709,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
				 		return;
			
 
				 
			
 
				 	WARN_ON_ONCE(!irqs_disabled());
			
 
				-	if (!current_trace->use_max_tr) {
			
 
				-		WARN_ON_ONCE(1);
			
 
				+
			
 
				+	if (!current_trace->allocated_snapshot) {
			
 
				+		/* Only the nop tracer should hit this when disabling */
			
 
				+		WARN_ON_ONCE(current_trace != &nop_trace);
			
 
				 		return;
			
 
				 	}
			
 
				+
			
 
				 	arch_spin_lock(&ftrace_max_lock);
			
 
				 
			
 
				 	tr->buffer = max_tr.buffer;
			
@@ -739,10 +742,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
				 		return;
			
 
				 
			
 
				 	WARN_ON_ONCE(!irqs_disabled());
			
 
				-	if (!current_trace->use_max_tr) {
			
 
				-		WARN_ON_ONCE(1);
			
 
				+	if (WARN_ON_ONCE(!current_trace->allocated_snapshot))
			
 
				 		return;
			
 
				-	}
			
 
				 
			
 
				 	arch_spin_lock(&ftrace_max_lock);
			
 
				 
			
@@ -862,10 +863,13 @@ int register_tracer(struct tracer *type)
 
				 
			
 
				 		current_trace = type;
			
 
				 
			
 
				-		/* If we expanded the buffers, make sure the max is expanded too */
			
 
				-		if (ring_buffer_expanded && type->use_max_tr)
			
 
				-			ring_buffer_resize(max_tr.buffer, trace_buf_size,
			
 
				-						RING_BUFFER_ALL_CPUS);
			
 
				+		if (type->use_max_tr) {
			
 
				+			/* If we expanded the buffers, make sure the max is expanded too */
			
 
				+			if (ring_buffer_expanded)
			
 
				+				ring_buffer_resize(max_tr.buffer, trace_buf_size,
			
 
				+						   RING_BUFFER_ALL_CPUS);
			
 
				+			type->allocated_snapshot = true;
			
 
				+		}
			
 
				 
			
 
				 		/* the test is responsible for initializing and enabling */
			
 
				 		pr_info("Testing tracer %s: ", type->name);
			
@@ -881,10 +885,14 @@ int register_tracer(struct tracer *type)
 
				 		/* Only reset on passing, to avoid touching corrupted buffers */
			
 
				 		tracing_reset_online_cpus(tr);
			
 
				 
			
 
				-		/* Shrink the max buffer again */
			
 
				-		if (ring_buffer_expanded && type->use_max_tr)
			
 
				-			ring_buffer_resize(max_tr.buffer, 1,
			
 
				-						RING_BUFFER_ALL_CPUS);
			
 
				+		if (type->use_max_tr) {
			
 
				+			type->allocated_snapshot = false;
			
 
				+
			
 
				+			/* Shrink the max buffer again */
			
 
				+			if (ring_buffer_expanded)
			
 
				+				ring_buffer_resize(max_tr.buffer, 1,
			
 
				+						   RING_BUFFER_ALL_CPUS);
			
 
				+		}
			
 
				 
			
 
				 		printk(KERN_CONT "PASSED\n");
			
 
				 	}
			
@@ -922,6 +930,9 @@ void tracing_reset(struct trace_array *tr, int cpu)
 
				 {
			
 
				 	struct ring_buffer *buffer = tr->buffer;
			
 
				 
			
 
				+	if (!buffer)
			
 
				+		return;
			
 
				+
			
 
				 	ring_buffer_record_disable(buffer);
			
 
				 
			
 
				 	/* Make sure all commits have finished */
			
@@ -936,6 +947,9 @@ void tracing_reset_online_cpus(struct trace_array *tr)
 
				 	struct ring_buffer *buffer = tr->buffer;
			
 
				 	int cpu;
			
 
				 
			
 
				+	if (!buffer)
			
 
				+		return;
			
 
				+
			
 
				 	ring_buffer_record_disable(buffer);
			
 
				 
			
 
				 	/* Make sure all commits have finished */
			
@@ -1167,7 +1181,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
				 
			
 
				 	entry->preempt_count		= pc & 0xff;
			
 
				 	entry->pid			= (tsk) ? tsk->pid : 0;
			
 
				-	entry->padding			= 0;
			
 
				 	entry->flags =
			
 
				 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
			
 
				 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
			
@@ -1335,7 +1348,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 
				 	 */
			
 
				 	preempt_disable_notrace();
			
 
				 
			
 
				-	use_stack = ++__get_cpu_var(ftrace_stack_reserve);
			
 
				+	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
			
 
				 	/*
			
 
				 	 * We don't need any atomic variables, just a barrier.
			
 
				 	 * If an interrupt comes in, we don't care, because it would
			
@@ -1389,7 +1402,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 
				  out:
			
 
				 	/* Again, don't let gcc optimize things here */
			
 
				 	barrier();
			
 
				-	__get_cpu_var(ftrace_stack_reserve)--;
			
 
				+	__this_cpu_dec(ftrace_stack_reserve);
			
 
				 	preempt_enable_notrace();
			
 
				 
			
 
				 }
			
@@ -1517,7 +1530,6 @@ static struct trace_buffer_struct *trace_percpu_nmi_buffer;
 
				 static char *get_trace_buf(void)
			
 
				 {
			
 
				 	struct trace_buffer_struct *percpu_buffer;
			
 
				-	struct trace_buffer_struct *buffer;
			
 
				 
			
 
				 	/*
			
 
				 	 * If we have allocated per cpu buffers, then we do not
			
@@ -1535,9 +1547,7 @@ static char *get_trace_buf(void)
 
				 	if (!percpu_buffer)
			
 
				 		return NULL;
			
 
				 
			
 
				-	buffer = per_cpu_ptr(percpu_buffer, smp_processor_id());
			
 
				-
			
 
				-	return buffer->buffer;
			
 
				+	return this_cpu_ptr(&percpu_buffer->buffer[0]);
			
 
				 }
			
 
				 
			
 
				 static int alloc_percpu_trace_buffer(void)
			
@@ -1942,21 +1952,27 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
 
				 static void *s_start(struct seq_file *m, loff_t *pos)
			
 
				 {
			
 
				 	struct trace_iterator *iter = m->private;
			
 
				-	static struct tracer *old_tracer;
			
 
				 	int cpu_file = iter->cpu_file;
			
 
				 	void *p = NULL;
			
 
				 	loff_t l = 0;
			
 
				 	int cpu;
			
 
				 
			
 
				-	/* copy the tracer to avoid using a global lock all around */
			
 
				+	/*
			
 
				+	 * copy the tracer to avoid using a global lock all around.
			
 
				+	 * iter->trace is a copy of current_trace, the pointer to the
			
 
				+	 * name may be used instead of a strcmp(), as iter->trace->name
			
 
				+	 * will point to the same string as current_trace->name.
			
 
				+	 */
			
 
				 	mutex_lock(&trace_types_lock);
			
 
				-	if (unlikely(old_tracer != current_trace && current_trace)) {
			
 
				-		old_tracer = current_trace;
			
 
				+	if (unlikely(current_trace && iter->trace->name != current_trace->name))
			
 
				 		*iter->trace = *current_trace;
			
 
				-	}
			
 
				 	mutex_unlock(&trace_types_lock);
			
 
				 
			
 
				-	atomic_inc(&trace_record_cmdline_disabled);
			
 
				+	if (iter->snapshot && iter->trace->use_max_tr)
			
 
				+		return ERR_PTR(-EBUSY);
			
 
				+
			
 
				+	if (!iter->snapshot)
			
 
				+		atomic_inc(&trace_record_cmdline_disabled);
			
 
				 
			
 
				 	if (*pos != iter->pos) {
			
 
				 		iter->ent = NULL;
			
@@ -1995,7 +2011,11 @@ static void s_stop(struct seq_file *m, void *p)
 
				 {
			
 
				 	struct trace_iterator *iter = m->private;
			
 
				 
			
 
				-	atomic_dec(&trace_record_cmdline_disabled);
			
 
				+	if (iter->snapshot && iter->trace->use_max_tr)
			
 
				+		return;
			
 
				+
			
 
				+	if (!iter->snapshot)
			
 
				+		atomic_dec(&trace_record_cmdline_disabled);
			
 
				 	trace_access_unlock(iter->cpu_file);
			
 
				 	trace_event_read_unlock();
			
 
				 }
			
@@ -2080,8 +2100,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 
				 	unsigned long total;
			
 
				 	const char *name = "preemption";
			
 
				 
			
 
				-	if (type)
			
 
				-		name = type->name;
			
 
				+	name = type->name;
			
 
				 
			
 
				 	get_total_entries(tr, &total, &entries);
			
 
				 
			
@@ -2430,7 +2449,7 @@ static const struct seq_operations tracer_seq_ops = {
 
				 };
			
 
				 
			
 
				 static struct trace_iterator *
			
 
				-__tracing_open(struct inode *inode, struct file *file)
			
 
				+__tracing_open(struct inode *inode, struct file *file, bool snapshot)
			
 
				 {
			
 
				 	long cpu_file = (long) inode->i_private;
			
 
				 	struct trace_iterator *iter;
			
@@ -2457,16 +2476,16 @@ __tracing_open(struct inode *inode, struct file *file)
 
				 	if (!iter->trace)
			
 
				 		goto fail;
			
 
				 
			
 
				-	if (current_trace)
			
 
				-		*iter->trace = *current_trace;
			
 
				+	*iter->trace = *current_trace;
			
 
				 
			
 
				 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
			
 
				 		goto fail;
			
 
				 
			
 
				-	if (current_trace && current_trace->print_max)
			
 
				+	if (current_trace->print_max || snapshot)
			
 
				 		iter->tr = &max_tr;
			
 
				 	else
			
 
				 		iter->tr = &global_trace;
			
 
				+	iter->snapshot = snapshot;
			
 
				 	iter->pos = -1;
			
 
				 	mutex_init(&iter->mutex);
			
 
				 	iter->cpu_file = cpu_file;
			
@@ -2483,8 +2502,9 @@ __tracing_open(struct inode *inode, struct file *file)
 
				 	if (trace_clocks[trace_clock_id].in_ns)
			
 
				 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
			
 
				 
			
 
				-	/* stop the trace while dumping */
			
 
				-	tracing_stop();
			
 
				+	/* stop the trace while dumping if we are not opening "snapshot" */
			
 
				+	if (!iter->snapshot)
			
 
				+		tracing_stop();
			
 
				 
			
 
				 	if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
			
 
				 		for_each_tracing_cpu(cpu) {
			
@@ -2547,8 +2567,9 @@ static int tracing_release(struct inode *inode, struct file *file)
 
				 	if (iter->trace && iter->trace->close)
			
 
				 		iter->trace->close(iter);
			
 
				 
			
 
				-	/* reenable tracing if it was previously enabled */
			
 
				-	tracing_start();
			
 
				+	if (!iter->snapshot)
			
 
				+		/* reenable tracing if it was previously enabled */
			
 
				+		tracing_start();
			
 
				 	mutex_unlock(&trace_types_lock);
			
 
				 
			
 
				 	mutex_destroy(&iter->mutex);
			
@@ -2576,7 +2597,7 @@ static int tracing_open(struct inode *inode, struct file *file)
 
				 	}
			
 
				 
			
 
				 	if (file->f_mode & FMODE_READ) {
			
 
				-		iter = __tracing_open(inode, file);
			
 
				+		iter = __tracing_open(inode, file, false);
			
 
				 		if (IS_ERR(iter))
			
 
				 			ret = PTR_ERR(iter);
			
 
				 		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
			
@@ -3014,10 +3035,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
 
				 	int r;
			
 
				 
			
 
				 	mutex_lock(&trace_types_lock);
			
 
				-	if (current_trace)
			
 
				-		r = sprintf(buf, "%s\n", current_trace->name);
			
 
				-	else
			
 
				-		r = sprintf(buf, "\n");
			
 
				+	r = sprintf(buf, "%s\n", current_trace->name);
			
 
				 	mutex_unlock(&trace_types_lock);
			
 
				 
			
 
				 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
			
@@ -3183,6 +3201,7 @@ static int tracing_set_tracer(const char *buf)
 
				 	static struct trace_option_dentry *topts;
			
 
				 	struct trace_array *tr = &global_trace;
			
 
				 	struct tracer *t;
			
 
				+	bool had_max_tr;
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	mutex_lock(&trace_types_lock);
			
@@ -3207,9 +3226,21 @@ static int tracing_set_tracer(const char *buf)
 
				 		goto out;
			
 
				 
			
 
				 	trace_branch_disable();
			
 
				-	if (current_trace && current_trace->reset)
			
 
				+	if (current_trace->reset)
			
 
				 		current_trace->reset(tr);
			
 
				-	if (current_trace && current_trace->use_max_tr) {
			
 
				+
			
 
				+	had_max_tr = current_trace->allocated_snapshot;
			
 
				+	current_trace = &nop_trace;
			
 
				+
			
 
				+	if (had_max_tr && !t->use_max_tr) {
			
 
				+		/*
			
 
				+		 * We need to make sure that the update_max_tr sees that
			
 
				+		 * current_trace changed to nop_trace to keep it from
			
 
				+		 * swapping the buffers after we resize it.
			
 
				+		 * The update_max_tr is called from interrupts disabled
			
 
				+		 * so a synchronized_sched() is sufficient.
			
 
				+		 */
			
 
				+		synchronize_sched();
			
 
				 		/*
			
 
				 		 * We don't free the ring buffer. instead, resize it because
			
 
				 		 * The max_tr ring buffer has some state (e.g. ring->clock) and
			
@@ -3217,18 +3248,19 @@ static int tracing_set_tracer(const char *buf)
 
				 		 */
			
 
				 		ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
			
 
				 		set_buffer_entries(&max_tr, 1);
			
 
				+		tracing_reset_online_cpus(&max_tr);
			
 
				+		current_trace->allocated_snapshot = false;
			
 
				 	}
			
 
				 	destroy_trace_option_files(topts);
			
 
				 
			
 
				-	current_trace = &nop_trace;
			
 
				-
			
 
				 	topts = create_trace_option_files(t);
			
 
				-	if (t->use_max_tr) {
			
 
				+	if (t->use_max_tr && !had_max_tr) {
			
 
				 		/* we need to make per cpu buffer sizes equivalent */
			
 
				 		ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
			
 
				 						   RING_BUFFER_ALL_CPUS);
			
 
				 		if (ret < 0)
			
 
				 			goto out;
			
 
				+		t->allocated_snapshot = true;
			
 
				 	}
			
 
				 
			
 
				 	if (t->init) {
			
@@ -3336,8 +3368,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 
				 		ret = -ENOMEM;
			
 
				 		goto fail;
			
 
				 	}
			
 
				-	if (current_trace)
			
 
				-		*iter->trace = *current_trace;
			
 
				+	*iter->trace = *current_trace;
			
 
				 
			
 
				 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
			
 
				 		ret = -ENOMEM;
			
@@ -3477,7 +3508,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 
				 		  size_t cnt, loff_t *ppos)
			
 
				 {
			
 
				 	struct trace_iterator *iter = filp->private_data;
			
 
				-	static struct tracer *old_tracer;
			
 
				 	ssize_t sret;
			
 
				 
			
 
				 	/* return any leftover data */
			
@@ -3489,10 +3519,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 
				 
			
 
				 	/* copy the tracer to avoid using a global lock all around */
			
 
				 	mutex_lock(&trace_types_lock);
			
 
				-	if (unlikely(old_tracer != current_trace && current_trace)) {
			
 
				-		old_tracer = current_trace;
			
 
				+	if (unlikely(iter->trace->name != current_trace->name))
			
 
				 		*iter->trace = *current_trace;
			
 
				-	}
			
 
				 	mutex_unlock(&trace_types_lock);
			
 
				 
			
 
				 	/*
			
@@ -3648,7 +3676,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 
				 		.ops		= &tracing_pipe_buf_ops,
			
 
				 		.spd_release	= tracing_spd_release_pipe,
			
 
				 	};
			
 
				-	static struct tracer *old_tracer;
			
 
				 	ssize_t ret;
			
 
				 	size_t rem;
			
 
				 	unsigned int i;
			
@@ -3658,10 +3685,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 
				 
			
 
				 	/* copy the tracer to avoid using a global lock all around */
			
 
				 	mutex_lock(&trace_types_lock);
			
 
				-	if (unlikely(old_tracer != current_trace && current_trace)) {
			
 
				-		old_tracer = current_trace;
			
 
				+	if (unlikely(iter->trace->name != current_trace->name))
			
 
				 		*iter->trace = *current_trace;
			
 
				-	}
			
 
				 	mutex_unlock(&trace_types_lock);
			
 
				 
			
 
				 	mutex_lock(&iter->mutex);
			
@@ -4037,8 +4062,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
 
				 	 * Reset the buffer so that it doesn't have incomparable timestamps.
			
 
				 	 */
			
 
				 	tracing_reset_online_cpus(&global_trace);
			
 
				-	if (max_tr.buffer)
			
 
				-		tracing_reset_online_cpus(&max_tr);
			
 
				+	tracing_reset_online_cpus(&max_tr);
			
 
				 
			
 
				 	mutex_unlock(&trace_types_lock);
			
 
				 
			
@@ -4054,6 +4078,87 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
 
				 	return single_open(file, tracing_clock_show, NULL);
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_TRACER_SNAPSHOT
			
 
				+static int tracing_snapshot_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	struct trace_iterator *iter;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (file->f_mode & FMODE_READ) {
			
 
				+		iter = __tracing_open(inode, file, true);
			
 
				+		if (IS_ERR(iter))
			
 
				+			ret = PTR_ERR(iter);
			
 
				+	}
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static ssize_t
			
 
				+tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
			
 
				+		       loff_t *ppos)
			
 
				+{
			
 
				+	unsigned long val;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = tracing_update_buffers();
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	mutex_lock(&trace_types_lock);
			
 
				+
			
 
				+	if (current_trace->use_max_tr) {
			
 
				+		ret = -EBUSY;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	switch (val) {
			
 
				+	case 0:
			
 
				+		if (current_trace->allocated_snapshot) {
			
 
				+			/* free spare buffer */
			
 
				+			ring_buffer_resize(max_tr.buffer, 1,
			
 
				+					   RING_BUFFER_ALL_CPUS);
			
 
				+			set_buffer_entries(&max_tr, 1);
			
 
				+			tracing_reset_online_cpus(&max_tr);
			
 
				+			current_trace->allocated_snapshot = false;
			
 
				+		}
			
 
				+		break;
			
 
				+	case 1:
			
 
				+		if (!current_trace->allocated_snapshot) {
			
 
				+			/* allocate spare buffer */
			
 
				+			ret = resize_buffer_duplicate_size(&max_tr,
			
 
				+					&global_trace, RING_BUFFER_ALL_CPUS);
			
 
				+			if (ret < 0)
			
 
				+				break;
			
 
				+			current_trace->allocated_snapshot = true;
			
 
				+		}
			
 
				+
			
 
				+		local_irq_disable();
			
 
				+		/* Now, we're going to swap */
			
 
				+		update_max_tr(&global_trace, current, smp_processor_id());
			
 
				+		local_irq_enable();
			
 
				+		break;
			
 
				+	default:
			
 
				+		if (current_trace->allocated_snapshot)
			
 
				+			tracing_reset_online_cpus(&max_tr);
			
 
				+		else
			
 
				+			ret = -EINVAL;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	if (ret >= 0) {
			
 
				+		*ppos += cnt;
			
 
				+		ret = cnt;
			
 
				+	}
			
 
				+out:
			
 
				+	mutex_unlock(&trace_types_lock);
			
 
				+	return ret;
			
 
				+}
			
 
				+#endif /* CONFIG_TRACER_SNAPSHOT */
			
 
				+
			
 
				+
			
 
				 static const struct file_operations tracing_max_lat_fops = {
			
 
				 	.open		= tracing_open_generic,
			
 
				 	.read		= tracing_max_lat_read,
			
@@ -4110,6 +4215,16 @@ static const struct file_operations trace_clock_fops = {
 
				 	.write		= tracing_clock_write,
			
 
				 };
			
 
				 
			
 
				+#ifdef CONFIG_TRACER_SNAPSHOT
			
 
				+static const struct file_operations snapshot_fops = {
			
 
				+	.open		= tracing_snapshot_open,
			
 
				+	.read		= seq_read,
			
 
				+	.write		= tracing_snapshot_write,
			
 
				+	.llseek		= tracing_seek,
			
 
				+	.release	= tracing_release,
			
 
				+};
			
 
				+#endif /* CONFIG_TRACER_SNAPSHOT */
			
 
				+
			
 
				 struct ftrace_buffer_info {
			
 
				 	struct trace_array	*tr;
			
 
				 	void			*spare;
			
@@ -4414,6 +4529,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 
				 	cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
			
 
				 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
			
 
				 
			
 
				+	cnt = ring_buffer_read_events_cpu(tr->buffer, cpu);
			
 
				+	trace_seq_printf(s, "read events: %ld\n", cnt);
			
 
				+
			
 
				 	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
			
 
				 
			
 
				 	kfree(s);
			
@@ -4490,7 +4608,7 @@ struct dentry *tracing_init_dentry(void)
 
				 
			
 
				 static struct dentry *d_percpu;
			
 
				 
			
 
				-struct dentry *tracing_dentry_percpu(void)
			
 
				+static struct dentry *tracing_dentry_percpu(void)
			
 
				 {
			
 
				 	static int once;
			
 
				 	struct dentry *d_tracer;
			
@@ -4906,6 +5024,11 @@ static __init int tracer_init_debugfs(void)
 
				 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
			
 
				 #endif
			
 
				 
			
 
				+#ifdef CONFIG_TRACER_SNAPSHOT
			
 
				+	trace_create_file("snapshot", 0644, d_tracer,
			
 
				+			  (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops);
			
 
				+#endif
			
 
				+
			
 
				 	create_trace_options_dir();
			
 
				 
			
 
				 	for_each_tracing_cpu(cpu)
			
@@ -5014,6 +5137,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
 
				 	if (disable_tracing)
			
 
				 		ftrace_kill();
			
 
				 
			
 
				+	/* Simulate the iterator */
			
 
				 	trace_init_global_iter(&iter);
			
 
				 
			
 
				 	for_each_tracing_cpu(cpu) {
			
@@ -5025,10 +5149,6 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
 
				 	/* don't look at user memory in panic mode */
			
 
				 	trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
			
 
				 
			
 
				-	/* Simulate the iterator */
			
 
				-	iter.tr = &global_trace;
			
 
				-	iter.trace = current_trace;
			
 
				-
			
 
				 	switch (oops_dump_mode) {
			
 
				 	case DUMP_ALL:
			
 
				 		iter.cpu_file = TRACE_PIPE_ALL_CPU;
			
@@ -5173,7 +5293,7 @@ __init static int tracer_alloc_buffers(void)
 
				 	init_irq_work(&trace_work_wakeup, trace_wake_up);
			
 
				 
			
 
				 	register_tracer(&nop_trace);
			
 
				-	current_trace = &nop_trace;
			
 
				+
			
 
				 	/* All seems OK, enable tracing */
			
 
				 	tracing_disabled = 0;
			
 
				 
			
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -287,20 +287,62 @@ struct tracer {
 
				 	struct tracer_flags	*flags;
			
 
				 	bool			print_max;
			
 
				 	bool			use_max_tr;
			
 
				+	bool			allocated_snapshot;
			
 
				 };
			
 
				 
			
 
				 
			
 
				 /* Only current can touch trace_recursion */
			
 
				-#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
			
 
				-#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
			
 
				 
			
 
				-/* Ring buffer has the 10 LSB bits to count */
			
 
				-#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
			
 
				-
			
 
				-/* for function tracing recursion */
			
 
				-#define TRACE_INTERNAL_BIT		(1<<11)
			
 
				-#define TRACE_GLOBAL_BIT		(1<<12)
			
 
				-#define TRACE_CONTROL_BIT		(1<<13)
			
 
				+/*
			
 
				+ * For function tracing recursion:
			
 
				+ *  The order of these bits are important.
			
 
				+ *
			
 
				+ *  When function tracing occurs, the following steps are made:
			
 
				+ *   If arch does not support a ftrace feature:
			
 
				+ *    call internal function (uses INTERNAL bits) which calls...
			
 
				+ *   If callback is registered to the "global" list, the list
			
 
				+ *    function is called and recursion checks the GLOBAL bits.
			
 
				+ *    then this function calls...
			
 
				+ *   The function callback, which can use the FTRACE bits to
			
 
				+ *    check for recursion.
			
 
				+ *
			
 
				+ * Now if the arch does not suppport a feature, and it calls
			
 
				+ * the global list function which calls the ftrace callback
			
 
				+ * all three of these steps will do a recursion protection.
			
 
				+ * There's no reason to do one if the previous caller already
			
 
				+ * did. The recursion that we are protecting against will
			
 
				+ * go through the same steps again.
			
 
				+ *
			
 
				+ * To prevent the multiple recursion checks, if a recursion
			
 
				+ * bit is set that is higher than the MAX bit of the current
			
 
				+ * check, then we know that the check was made by the previous
			
 
				+ * caller, and we can skip the current check.
			
 
				+ */
			
 
				+enum {
			
 
				+	TRACE_BUFFER_BIT,
			
 
				+	TRACE_BUFFER_NMI_BIT,
			
 
				+	TRACE_BUFFER_IRQ_BIT,
			
 
				+	TRACE_BUFFER_SIRQ_BIT,
			
 
				+
			
 
				+	/* Start of function recursion bits */
			
 
				+	TRACE_FTRACE_BIT,
			
 
				+	TRACE_FTRACE_NMI_BIT,
			
 
				+	TRACE_FTRACE_IRQ_BIT,
			
 
				+	TRACE_FTRACE_SIRQ_BIT,
			
 
				+
			
 
				+	/* GLOBAL_BITs must be greater than FTRACE_BITs */
			
 
				+	TRACE_GLOBAL_BIT,
			
 
				+	TRACE_GLOBAL_NMI_BIT,
			
 
				+	TRACE_GLOBAL_IRQ_BIT,
			
 
				+	TRACE_GLOBAL_SIRQ_BIT,
			
 
				+
			
 
				+	/* INTERNAL_BITs must be greater than GLOBAL_BITs */
			
 
				+	TRACE_INTERNAL_BIT,
			
 
				+	TRACE_INTERNAL_NMI_BIT,
			
 
				+	TRACE_INTERNAL_IRQ_BIT,
			
 
				+	TRACE_INTERNAL_SIRQ_BIT,
			
 
				+
			
 
				+	TRACE_CONTROL_BIT,
			
 
				 
			
 
				 /*
			
 
				  * Abuse of the trace_recursion.
			
@@ -309,11 +351,77 @@ struct tracer {
 
				  * was called in irq context but we have irq tracing off. Since this
			
 
				  * can only be modified by current, we can reuse trace_recursion.
			
 
				  */
			
 
				-#define TRACE_IRQ_BIT			(1<<13)
			
 
				+	TRACE_IRQ_BIT,
			
 
				+};
			
 
				+
			
 
				+#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (1<<(bit)); } while (0)
			
 
				+#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(1<<(bit)); } while (0)
			
 
				+#define trace_recursion_test(bit)	((current)->trace_recursion & (1<<(bit)))
			
 
				+
			
 
				+#define TRACE_CONTEXT_BITS	4
			
 
				+
			
 
				+#define TRACE_FTRACE_START	TRACE_FTRACE_BIT
			
 
				+#define TRACE_FTRACE_MAX	((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
			
 
				+
			
 
				+#define TRACE_GLOBAL_START	TRACE_GLOBAL_BIT
			
 
				+#define TRACE_GLOBAL_MAX	((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
			
 
				+
			
 
				+#define TRACE_LIST_START	TRACE_INTERNAL_BIT
			
 
				+#define TRACE_LIST_MAX		((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
			
 
				+
			
 
				+#define TRACE_CONTEXT_MASK	TRACE_LIST_MAX
			
 
				+
			
 
				+static __always_inline int trace_get_context_bit(void)
			
 
				+{
			
 
				+	int bit;
			
 
				 
			
 
				-#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (bit); } while (0)
			
 
				-#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(bit); } while (0)
			
 
				-#define trace_recursion_test(bit)	((current)->trace_recursion & (bit))
			
 
				+	if (in_interrupt()) {
			
 
				+		if (in_nmi())
			
 
				+			bit = 0;
			
 
				+
			
 
				+		else if (in_irq())
			
 
				+			bit = 1;
			
 
				+		else
			
 
				+			bit = 2;
			
 
				+	} else
			
 
				+		bit = 3;
			
 
				+
			
 
				+	return bit;
			
 
				+}
			
 
				+
			
 
				+static __always_inline int trace_test_and_set_recursion(int start, int max)
			
 
				+{
			
 
				+	unsigned int val = current->trace_recursion;
			
 
				+	int bit;
			
 
				+
			
 
				+	/* A previous recursion check was made */
			
 
				+	if ((val & TRACE_CONTEXT_MASK) > max)
			
 
				+		return 0;
			
 
				+
			
 
				+	bit = trace_get_context_bit() + start;
			
 
				+	if (unlikely(val & (1 << bit)))
			
 
				+		return -1;
			
 
				+
			
 
				+	val |= 1 << bit;
			
 
				+	current->trace_recursion = val;
			
 
				+	barrier();
			
 
				+
			
 
				+	return bit;
			
 
				+}
			
 
				+
			
 
				+static __always_inline void trace_clear_recursion(int bit)
			
 
				+{
			
 
				+	unsigned int val = current->trace_recursion;
			
 
				+
			
 
				+	if (!bit)
			
 
				+		return;
			
 
				+
			
 
				+	bit = 1 << bit;
			
 
				+	val &= ~bit;
			
 
				+
			
 
				+	barrier();
			
 
				+	current->trace_recursion = val;
			
 
				+}
			
 
				 
			
 
				 #define TRACE_PIPE_ALL_CPU	-1
			
 
				 
			
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -21,8 +21,6 @@
 
				 #include <linux/ktime.h>
			
 
				 #include <linux/trace_clock.h>
			
 
				 
			
 
				-#include "trace.h"
			
 
				-
			
 
				 /*
			
 
				  * trace_clock_local(): the simplest and least coherent tracing clock.
			
 
				  *
			
@@ -87,7 +85,7 @@ u64 notrace trace_clock_global(void)
 
				 	local_irq_save(flags);
			
 
				 
			
 
				 	this_cpu = raw_smp_processor_id();
			
 
				-	now = cpu_clock(this_cpu);
			
 
				+	now = sched_clock_cpu(this_cpu);
			
 
				 	/*
			
 
				 	 * If in an NMI context then dont risk lockups and return the
			
 
				 	 * cpu_clock() time:
			
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
 
				 	__common_field(unsigned char, flags);
			
 
				 	__common_field(unsigned char, preempt_count);
			
 
				 	__common_field(int, pid);
			
 
				-	__common_field(int, padding);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -47,34 +47,6 @@ static void function_trace_start(struct trace_array *tr)
 
				 	tracing_reset_online_cpus(tr);
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				-function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip,
			
 
				-				 struct ftrace_ops *op, struct pt_regs *pt_regs)
			
 
				-{
			
 
				-	struct trace_array *tr = func_trace;
			
 
				-	struct trace_array_cpu *data;
			
 
				-	unsigned long flags;
			
 
				-	long disabled;
			
 
				-	int cpu;
			
 
				-	int pc;
			
 
				-
			
 
				-	if (unlikely(!ftrace_function_enabled))
			
 
				-		return;
			
 
				-
			
 
				-	pc = preempt_count();
			
 
				-	preempt_disable_notrace();
			
 
				-	local_save_flags(flags);
			
 
				-	cpu = raw_smp_processor_id();
			
 
				-	data = tr->data[cpu];
			
 
				-	disabled = atomic_inc_return(&data->disabled);
			
 
				-
			
 
				-	if (likely(disabled == 1))
			
 
				-		trace_function(tr, ip, parent_ip, flags, pc);
			
 
				-
			
 
				-	atomic_dec(&data->disabled);
			
 
				-	preempt_enable_notrace();
			
 
				-}
			
 
				-
			
 
				 /* Our option */
			
 
				 enum {
			
 
				 	TRACE_FUNC_OPT_STACK	= 0x1,
			
@@ -85,34 +57,34 @@ static struct tracer_flags func_flags;
 
				 static void
			
 
				 function_trace_call(unsigned long ip, unsigned long parent_ip,
			
 
				 		    struct ftrace_ops *op, struct pt_regs *pt_regs)
			
 
				-
			
 
				 {
			
 
				 	struct trace_array *tr = func_trace;
			
 
				 	struct trace_array_cpu *data;
			
 
				 	unsigned long flags;
			
 
				-	long disabled;
			
 
				+	int bit;
			
 
				 	int cpu;
			
 
				 	int pc;
			
 
				 
			
 
				 	if (unlikely(!ftrace_function_enabled))
			
 
				 		return;
			
 
				 
			
 
				-	/*
			
 
				-	 * Need to use raw, since this must be called before the
			
 
				-	 * recursive protection is performed.
			
 
				-	 */
			
 
				-	local_irq_save(flags);
			
 
				-	cpu = raw_smp_processor_id();
			
 
				-	data = tr->data[cpu];
			
 
				-	disabled = atomic_inc_return(&data->disabled);
			
 
				+	pc = preempt_count();
			
 
				+	preempt_disable_notrace();
			
 
				 
			
 
				-	if (likely(disabled == 1)) {
			
 
				-		pc = preempt_count();
			
 
				+	bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
			
 
				+	if (bit < 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	cpu = smp_processor_id();
			
 
				+	data = tr->data[cpu];
			
 
				+	if (!atomic_read(&data->disabled)) {
			
 
				+		local_save_flags(flags);
			
 
				 		trace_function(tr, ip, parent_ip, flags, pc);
			
 
				 	}
			
 
				+	trace_clear_recursion(bit);
			
 
				 
			
 
				-	atomic_dec(&data->disabled);
			
 
				-	local_irq_restore(flags);
			
 
				+ out:
			
 
				+	preempt_enable_notrace();
			
 
				 }
			
 
				 
			
 
				 static void
			
@@ -185,11 +157,6 @@ static void tracing_start_function_trace(void)
 
				 {
			
 
				 	ftrace_function_enabled = 0;
			
 
				 
			
 
				-	if (trace_flags & TRACE_ITER_PREEMPTONLY)
			
 
				-		trace_ops.func = function_trace_call_preempt_only;
			
 
				-	else
			
 
				-		trace_ops.func = function_trace_call;
			
 
				-
			
 
				 	if (func_flags.val & TRACE_FUNC_OPT_STACK)
			
 
				 		register_ftrace_function(&trace_stack_ops);
			
 
				 	else
			
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -47,6 +47,8 @@ struct fgraph_data {
 
				 #define TRACE_GRAPH_PRINT_ABS_TIME	0x20
			
 
				 #define TRACE_GRAPH_PRINT_IRQS		0x40
			
 
				 
			
 
				+static unsigned int max_depth;
			
 
				+
			
 
				 static struct tracer_opt trace_opts[] = {
			
 
				 	/* Display overruns? (for self-debug purpose) */
			
 
				 	{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
			
@@ -189,10 +191,16 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 
				 
			
 
				 	ftrace_pop_return_trace(&trace, &ret, frame_pointer);
			
 
				 	trace.rettime = trace_clock_local();
			
 
				-	ftrace_graph_return(&trace);
			
 
				 	barrier();
			
 
				 	current->curr_ret_stack--;
			
 
				 
			
 
				+	/*
			
 
				+	 * The trace should run after decrementing the ret counter
			
 
				+	 * in case an interrupt were to come in. We don't want to
			
 
				+	 * lose the interrupt if max_depth is set.
			
 
				+	 */
			
 
				+	ftrace_graph_return(&trace);
			
 
				+
			
 
				 	if (unlikely(!ret)) {
			
 
				 		ftrace_graph_stop();
			
 
				 		WARN_ON(1);
			
@@ -250,8 +258,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 
				 		return 0;
			
 
				 
			
 
				 	/* trace it when it is-nested-in or is a function enabled. */
			
 
				-	if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
			
 
				-	      ftrace_graph_ignore_irqs())
			
 
				+	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
			
 
				+	     ftrace_graph_ignore_irqs()) ||
			
 
				+	    (max_depth && trace->depth >= max_depth))
			
 
				 		return 0;
			
 
				 
			
 
				 	local_irq_save(flags);
			
@@ -1457,6 +1466,59 @@ static struct tracer graph_trace __read_mostly = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				+
			
 
				+static ssize_t
			
 
				+graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt,
			
 
				+		  loff_t *ppos)
			
 
				+{
			
 
				+	unsigned long val;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	max_depth = val;
			
 
				+
			
 
				+	*ppos += cnt;
			
 
				+
			
 
				+	return cnt;
			
 
				+}
			
 
				+
			
 
				+static ssize_t
			
 
				+graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt,
			
 
				+		 loff_t *ppos)
			
 
				+{
			
 
				+	char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/
			
 
				+	int n;
			
 
				+
			
 
				+	n = sprintf(buf, "%d\n", max_depth);
			
 
				+
			
 
				+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations graph_depth_fops = {
			
 
				+	.open		= tracing_open_generic,
			
 
				+	.write		= graph_depth_write,
			
 
				+	.read		= graph_depth_read,
			
 
				+	.llseek		= generic_file_llseek,
			
 
				+};
			
 
				+
			
 
				+static __init int init_graph_debugfs(void)
			
 
				+{
			
 
				+	struct dentry *d_tracer;
			
 
				+
			
 
				+	d_tracer = tracing_init_dentry();
			
 
				+	if (!d_tracer)
			
 
				+		return 0;
			
 
				+
			
 
				+	trace_create_file("max_graph_depth", 0644, d_tracer,
			
 
				+			  NULL, &graph_depth_fops);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+fs_initcall(init_graph_debugfs);
			
 
				+
			
 
				 static __init int init_graph_trace(void)
			
 
				 {
			
 
				 	max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
			
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -66,7 +66,6 @@
 
				 #define TP_FLAG_TRACE		1
			
 
				 #define TP_FLAG_PROFILE		2
			
 
				 #define TP_FLAG_REGISTERED	4
			
 
				-#define TP_FLAG_UPROBE		8
			
 
				 
			
 
				 
			
 
				 /* data_rloc: data relative location, compatible with u32 */
			
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -415,7 +415,8 @@ static void trace_selftest_test_recursion_func(unsigned long ip,
 
				 	 * The ftrace infrastructure should provide the recursion
			
 
				 	 * protection. If not, this will crash the kernel!
			
 
				 	 */
			
 
				-	trace_selftest_recursion_cnt++;
			
 
				+	if (trace_selftest_recursion_cnt++ > 10)
			
 
				+		return;
			
 
				 	DYN_FTRACE_TEST_NAME();
			
 
				 }
			
 
				 
			
@@ -452,7 +453,6 @@ trace_selftest_function_recursion(void)
 
				 	char *func_name;
			
 
				 	int len;
			
 
				 	int ret;
			
 
				-	int cnt;
			
 
				 
			
 
				 	/* The previous test PASSED */
			
 
				 	pr_cont("PASSED\n");
			
@@ -510,19 +510,10 @@ trace_selftest_function_recursion(void)
 
				 
			
 
				 	unregister_ftrace_function(&test_recsafe_probe);
			
 
				 
			
 
				-	/*
			
 
				-	 * If arch supports all ftrace features, and no other task
			
 
				-	 * was on the list, we should be fine.
			
 
				-	 */
			
 
				-	if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC)
			
 
				-		cnt = 2; /* Should have recursed */
			
 
				-	else
			
 
				-		cnt = 1;
			
 
				-
			
 
				 	ret = -1;
			
 
				-	if (trace_selftest_recursion_cnt != cnt) {
			
 
				-		pr_cont("*callback not called expected %d times (%d)* ",
			
 
				-			cnt, trace_selftest_recursion_cnt);
			
 
				+	if (trace_selftest_recursion_cnt != 2) {
			
 
				+		pr_cont("*callback not called expected 2 times (%d)* ",
			
 
				+			trace_selftest_recursion_cnt);
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
@@ -568,7 +559,7 @@ trace_selftest_function_regs(void)
 
				 	int ret;
			
 
				 	int supported = 0;
			
 
				 
			
 
				-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
			
 
				+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
			
 
				 	supported = 1;
			
 
				 #endif
			
 
				 
			
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -77,7 +77,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
 
				 	return syscalls_metadata[nr];
			
 
				 }
			
 
				 
			
 
				-enum print_line_t
			
 
				+static enum print_line_t
			
 
				 print_syscall_enter(struct trace_iterator *iter, int flags,
			
 
				 		    struct trace_event *event)
			
 
				 {
			
@@ -130,7 +130,7 @@ end:
 
				 	return TRACE_TYPE_HANDLED;
			
 
				 }
			
 
				 
			
 
				-enum print_line_t
			
 
				+static enum print_line_t
			
 
				 print_syscall_exit(struct trace_iterator *iter, int flags,
			
 
				 		   struct trace_event *event)
			
 
				 {
			
@@ -270,7 +270,7 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
			
 
				+static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
			
 
				 {
			
 
				 	struct syscall_trace_enter *entry;
			
 
				 	struct syscall_metadata *sys_data;
			
@@ -305,7 +305,7 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 
				 		trace_current_buffer_unlock_commit(buffer, event, 0, 0);
			
 
				 }
			
 
				 
			
 
				-void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
			
 
				+static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
			
 
				 {
			
 
				 	struct syscall_trace_exit *entry;
			
 
				 	struct syscall_metadata *sys_data;
			
@@ -337,7 +337,7 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 
				 		trace_current_buffer_unlock_commit(buffer, event, 0, 0);
			
 
				 }
			
 
				 
			
 
				-int reg_event_syscall_enter(struct ftrace_event_call *call)
			
 
				+static int reg_event_syscall_enter(struct ftrace_event_call *call)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 	int num;
			
@@ -356,7 +356,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void unreg_event_syscall_enter(struct ftrace_event_call *call)
			
 
				+static void unreg_event_syscall_enter(struct ftrace_event_call *call)
			
 
				 {
			
 
				 	int num;
			
 
				 
			
@@ -371,7 +371,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
 
				 	mutex_unlock(&syscall_trace_lock);
			
 
				 }
			
 
				 
			
 
				-int reg_event_syscall_exit(struct ftrace_event_call *call)
			
 
				+static int reg_event_syscall_exit(struct ftrace_event_call *call)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 	int num;
			
@@ -390,7 +390,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void unreg_event_syscall_exit(struct ftrace_event_call *call)
			
 
				+static void unreg_event_syscall_exit(struct ftrace_event_call *call)
			
 
				 {
			
 
				 	int num;
			
 
				 
			
@@ -459,7 +459,7 @@ unsigned long __init __weak arch_syscall_addr(int nr)
 
				 	return (unsigned long)sys_call_table[nr];
			
 
				 }
			
 
				 
			
 
				-int __init init_ftrace_syscalls(void)
			
 
				+static int __init init_ftrace_syscalls(void)
			
 
				 {
			
 
				 	struct syscall_metadata *meta;
			
 
				 	unsigned long addr;
			
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -28,20 +28,21 @@
 
				 
			
 
				 #define UPROBE_EVENT_SYSTEM	"uprobes"
			
 
				 
			
 
				+struct trace_uprobe_filter {
			
 
				+	rwlock_t		rwlock;
			
 
				+	int			nr_systemwide;
			
 
				+	struct list_head	perf_events;
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * uprobe event core functions
			
 
				  */
			
 
				-struct trace_uprobe;
			
 
				-struct uprobe_trace_consumer {
			
 
				-	struct uprobe_consumer		cons;
			
 
				-	struct trace_uprobe		*tu;
			
 
				-};
			
 
				-
			
 
				 struct trace_uprobe {
			
 
				 	struct list_head		list;
			
 
				 	struct ftrace_event_class	class;
			
 
				 	struct ftrace_event_call	call;
			
 
				-	struct uprobe_trace_consumer	*consumer;
			
 
				+	struct trace_uprobe_filter	filter;
			
 
				+	struct uprobe_consumer		consumer;
			
 
				 	struct inode			*inode;
			
 
				 	char				*filename;
			
 
				 	unsigned long			offset;
			
@@ -64,6 +65,18 @@ static LIST_HEAD(uprobe_list);
 
				 
			
 
				 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
			
 
				 
			
 
				+static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
			
 
				+{
			
 
				+	rwlock_init(&filter->rwlock);
			
 
				+	filter->nr_systemwide = 0;
			
 
				+	INIT_LIST_HEAD(&filter->perf_events);
			
 
				+}
			
 
				+
			
 
				+static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
			
 
				+{
			
 
				+	return !filter->nr_systemwide && list_empty(&filter->perf_events);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Allocate new trace_uprobe and initialize it (including uprobes).
			
 
				  */
			
@@ -92,6 +105,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
 
				 		goto error;
			
 
				 
			
 
				 	INIT_LIST_HEAD(&tu->list);
			
 
				+	tu->consumer.handler = uprobe_dispatcher;
			
 
				+	init_trace_uprobe_filter(&tu->filter);
			
 
				 	return tu;
			
 
				 
			
 
				 error:
			
@@ -253,12 +268,18 @@ static int create_trace_uprobe(int argc, char **argv)
 
				 	if (ret)
			
 
				 		goto fail_address_parse;
			
 
				 
			
 
				+	inode = igrab(path.dentry->d_inode);
			
 
				+	path_put(&path);
			
 
				+
			
 
				+	if (!inode || !S_ISREG(inode->i_mode)) {
			
 
				+		ret = -EINVAL;
			
 
				+		goto fail_address_parse;
			
 
				+	}
			
 
				+
			
 
				 	ret = kstrtoul(arg, 0, &offset);
			
 
				 	if (ret)
			
 
				 		goto fail_address_parse;
			
 
				 
			
 
				-	inode = igrab(path.dentry->d_inode);
			
 
				-
			
 
				 	argc -= 2;
			
 
				 	argv += 2;
			
 
				 
			
@@ -356,7 +377,7 @@ fail_address_parse:
 
				 	if (inode)
			
 
				 		iput(inode);
			
 
				 
			
 
				-	pr_info("Failed to parse address.\n");
			
 
				+	pr_info("Failed to parse address or file.\n");
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -465,7 +486,7 @@ static const struct file_operations uprobe_profile_ops = {
 
				 };
			
 
				 
			
 
				 /* uprobe handler */
			
 
				-static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
			
 
				+static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
			
 
				 {
			
 
				 	struct uprobe_trace_entry_head *entry;
			
 
				 	struct ring_buffer_event *event;
			
@@ -475,8 +496,6 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
 
				 	unsigned long irq_flags;
			
 
				 	struct ftrace_event_call *call = &tu->call;
			
 
				 
			
 
				-	tu->nhit++;
			
 
				-
			
 
				 	local_save_flags(irq_flags);
			
 
				 	pc = preempt_count();
			
 
				 
			
@@ -485,16 +504,18 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
 
				 	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
			
 
				 						  size, irq_flags, pc);
			
 
				 	if (!event)
			
 
				-		return;
			
 
				+		return 0;
			
 
				 
			
 
				 	entry = ring_buffer_event_data(event);
			
 
				-	entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
			
 
				+	entry->ip = instruction_pointer(task_pt_regs(current));
			
 
				 	data = (u8 *)&entry[1];
			
 
				 	for (i = 0; i < tu->nr_args; i++)
			
 
				 		call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
			
 
				 
			
 
				 	if (!filter_current_check_discard(buffer, call, entry, event))
			
 
				 		trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /* Event entry printers */
			
@@ -533,42 +554,43 @@ partial:
 
				 	return TRACE_TYPE_PARTIAL_LINE;
			
 
				 }
			
 
				 
			
 
				-static int probe_event_enable(struct trace_uprobe *tu, int flag)
			
 
				+static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
			
 
				 {
			
 
				-	struct uprobe_trace_consumer *utc;
			
 
				-	int ret = 0;
			
 
				+	return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
			
 
				+}
			
 
				 
			
 
				-	if (!tu->inode || tu->consumer)
			
 
				-		return -EINTR;
			
 
				+typedef bool (*filter_func_t)(struct uprobe_consumer *self,
			
 
				+				enum uprobe_filter_ctx ctx,
			
 
				+				struct mm_struct *mm);
			
 
				 
			
 
				-	utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL);
			
 
				-	if (!utc)
			
 
				+static int
			
 
				+probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (is_trace_uprobe_enabled(tu))
			
 
				 		return -EINTR;
			
 
				 
			
 
				-	utc->cons.handler = uprobe_dispatcher;
			
 
				-	utc->cons.filter = NULL;
			
 
				-	ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
			
 
				-	if (ret) {
			
 
				-		kfree(utc);
			
 
				-		return ret;
			
 
				-	}
			
 
				+	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
			
 
				 
			
 
				 	tu->flags |= flag;
			
 
				-	utc->tu = tu;
			
 
				-	tu->consumer = utc;
			
 
				+	tu->consumer.filter = filter;
			
 
				+	ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
			
 
				+	if (ret)
			
 
				+		tu->flags &= ~flag;
			
 
				 
			
 
				-	return 0;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static void probe_event_disable(struct trace_uprobe *tu, int flag)
			
 
				 {
			
 
				-	if (!tu->inode || !tu->consumer)
			
 
				+	if (!is_trace_uprobe_enabled(tu))
			
 
				 		return;
			
 
				 
			
 
				-	uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons);
			
 
				+	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
			
 
				+
			
 
				+	uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
			
 
				 	tu->flags &= ~flag;
			
 
				-	kfree(tu->consumer);
			
 
				-	tu->consumer = NULL;
			
 
				 }
			
 
				 
			
 
				 static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
			
@@ -642,8 +664,96 @@ static int set_print_fmt(struct trace_uprobe *tu)
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_PERF_EVENTS
			
 
				+static bool
			
 
				+__uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
			
 
				+{
			
 
				+	struct perf_event *event;
			
 
				+
			
 
				+	if (filter->nr_systemwide)
			
 
				+		return true;
			
 
				+
			
 
				+	list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
			
 
				+		if (event->hw.tp_target->mm == mm)
			
 
				+			return true;
			
 
				+	}
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+static inline bool
			
 
				+uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
			
 
				+{
			
 
				+	return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
			
 
				+}
			
 
				+
			
 
				+static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
			
 
				+{
			
 
				+	bool done;
			
 
				+
			
 
				+	write_lock(&tu->filter.rwlock);
			
 
				+	if (event->hw.tp_target) {
			
 
				+		/*
			
 
				+		 * event->parent != NULL means copy_process(), we can avoid
			
 
				+		 * uprobe_apply(). current->mm must be probed and we can rely
			
 
				+		 * on dup_mmap() which preserves the already installed bp's.
			
 
				+		 *
			
 
				+		 * attr.enable_on_exec means that exec/mmap will install the
			
 
				+		 * breakpoints we need.
			
 
				+		 */
			
 
				+		done = tu->filter.nr_systemwide ||
			
 
				+			event->parent || event->attr.enable_on_exec ||
			
 
				+			uprobe_filter_event(tu, event);
			
 
				+		list_add(&event->hw.tp_list, &tu->filter.perf_events);
			
 
				+	} else {
			
 
				+		done = tu->filter.nr_systemwide;
			
 
				+		tu->filter.nr_systemwide++;
			
 
				+	}
			
 
				+	write_unlock(&tu->filter.rwlock);
			
 
				+
			
 
				+	if (!done)
			
 
				+		uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
			
 
				+{
			
 
				+	bool done;
			
 
				+
			
 
				+	write_lock(&tu->filter.rwlock);
			
 
				+	if (event->hw.tp_target) {
			
 
				+		list_del(&event->hw.tp_list);
			
 
				+		done = tu->filter.nr_systemwide ||
			
 
				+			(event->hw.tp_target->flags & PF_EXITING) ||
			
 
				+			uprobe_filter_event(tu, event);
			
 
				+	} else {
			
 
				+		tu->filter.nr_systemwide--;
			
 
				+		done = tu->filter.nr_systemwide;
			
 
				+	}
			
 
				+	write_unlock(&tu->filter.rwlock);
			
 
				+
			
 
				+	if (!done)
			
 
				+		uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static bool uprobe_perf_filter(struct uprobe_consumer *uc,
			
 
				+				enum uprobe_filter_ctx ctx, struct mm_struct *mm)
			
 
				+{
			
 
				+	struct trace_uprobe *tu;
			
 
				+	int ret;
			
 
				+
			
 
				+	tu = container_of(uc, struct trace_uprobe, consumer);
			
 
				+	read_lock(&tu->filter.rwlock);
			
 
				+	ret = __uprobe_perf_filter(&tu->filter, mm);
			
 
				+	read_unlock(&tu->filter.rwlock);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 /* uprobe profile handler */
			
 
				-static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
			
 
				+static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
			
 
				 {
			
 
				 	struct ftrace_event_call *call = &tu->call;
			
 
				 	struct uprobe_trace_entry_head *entry;
			
@@ -652,11 +762,14 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
 
				 	int size, __size, i;
			
 
				 	int rctx;
			
 
				 
			
 
				+	if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
			
 
				+		return UPROBE_HANDLER_REMOVE;
			
 
				+
			
 
				 	__size = sizeof(*entry) + tu->size;
			
 
				 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
			
 
				 	size -= sizeof(u32);
			
 
				 	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
			
 
				-		return;
			
 
				+		return 0;
			
 
				 
			
 
				 	preempt_disable();
			
 
				 
			
@@ -664,7 +777,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
 
				 	if (!entry)
			
 
				 		goto out;
			
 
				 
			
 
				-	entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
			
 
				+	entry->ip = instruction_pointer(task_pt_regs(current));
			
 
				 	data = (u8 *)&entry[1];
			
 
				 	for (i = 0; i < tu->nr_args; i++)
			
 
				 		call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
			
@@ -674,6 +787,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
 
				 
			
 
				  out:
			
 
				 	preempt_enable();
			
 
				+	return 0;
			
 
				 }
			
 
				 #endif	/* CONFIG_PERF_EVENTS */
			
 
				 
			
@@ -684,7 +798,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
 
				 
			
 
				 	switch (type) {
			
 
				 	case TRACE_REG_REGISTER:
			
 
				-		return probe_event_enable(tu, TP_FLAG_TRACE);
			
 
				+		return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
			
 
				 
			
 
				 	case TRACE_REG_UNREGISTER:
			
 
				 		probe_event_disable(tu, TP_FLAG_TRACE);
			
@@ -692,11 +806,18 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
 
				 
			
 
				 #ifdef CONFIG_PERF_EVENTS
			
 
				 	case TRACE_REG_PERF_REGISTER:
			
 
				-		return probe_event_enable(tu, TP_FLAG_PROFILE);
			
 
				+		return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
			
 
				 
			
 
				 	case TRACE_REG_PERF_UNREGISTER:
			
 
				 		probe_event_disable(tu, TP_FLAG_PROFILE);
			
 
				 		return 0;
			
 
				+
			
 
				+	case TRACE_REG_PERF_OPEN:
			
 
				+		return uprobe_perf_open(tu, data);
			
 
				+
			
 
				+	case TRACE_REG_PERF_CLOSE:
			
 
				+		return uprobe_perf_close(tu, data);
			
 
				+
			
 
				 #endif
			
 
				 	default:
			
 
				 		return 0;
			
@@ -706,22 +827,20 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
 
				 
			
 
				 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
			
 
				 {
			
 
				-	struct uprobe_trace_consumer *utc;
			
 
				 	struct trace_uprobe *tu;
			
 
				+	int ret = 0;
			
 
				 
			
 
				-	utc = container_of(con, struct uprobe_trace_consumer, cons);
			
 
				-	tu = utc->tu;
			
 
				-	if (!tu || tu->consumer != utc)
			
 
				-		return 0;
			
 
				+	tu = container_of(con, struct trace_uprobe, consumer);
			
 
				+	tu->nhit++;
			
 
				 
			
 
				 	if (tu->flags & TP_FLAG_TRACE)
			
 
				-		uprobe_trace_func(tu, regs);
			
 
				+		ret |= uprobe_trace_func(tu, regs);
			
 
				 
			
 
				 #ifdef CONFIG_PERF_EVENTS
			
 
				 	if (tu->flags & TP_FLAG_PROFILE)
			
 
				-		uprobe_perf_func(tu, regs);
			
 
				+		ret |= uprobe_perf_func(tu, regs);
			
 
				 #endif
			
 
				-	return 0;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static struct trace_event_functions uprobe_funcs = {
			
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -5,12 +5,6 @@ menuconfig SAMPLES
 
				 
			
 
				 if SAMPLES
			
 
				 
			
 
				-config SAMPLE_TRACEPOINTS
			
 
				-	tristate "Build tracepoints examples -- loadable modules only"
			
 
				-	depends on TRACEPOINTS && m
			
 
				-	help
			
 
				-	  This build tracepoints example modules.
			
 
				-
			
 
				 config SAMPLE_TRACE_EVENTS
			
 
				 	tristate "Build trace_events examples -- loadable modules only"
			
 
				 	depends on EVENT_TRACING && m
			
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,4 +1,4 @@
 
				 # Makefile for Linux samples code
			
 
				 
			
 
				-obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ tracepoints/ trace_events/ \
			
 
				+obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ \
			
 
				 			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/
			
--- a/samples/tracepoints/Makefile
+++ b/samples/tracepoints/Makefile
@@ -1,6 +0,0 @@
 
				-# builds the tracepoint example kernel modules;
			
 
				-# then to use one (as root):  insmod <module_name.ko>
			
 
				-
			
 
				-obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o
			
 
				-obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o
			
 
				-obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o
			
--- a/samples/tracepoints/tp-samples-trace.h
+++ b/samples/tracepoints/tp-samples-trace.h
@@ -1,11 +0,0 @@
 
				-#ifndef _TP_SAMPLES_TRACE_H
			
 
				-#define _TP_SAMPLES_TRACE_H
			
 
				-
			
 
				-#include <linux/proc_fs.h>	/* for struct inode and struct file */
			
 
				-#include <linux/tracepoint.h>
			
 
				-
			
 
				-DECLARE_TRACE(subsys_event,
			
 
				-	TP_PROTO(struct inode *inode, struct file *file),
			
 
				-	TP_ARGS(inode, file));
			
 
				-DECLARE_TRACE_NOARGS(subsys_eventb);
			
 
				-#endif
			
--- a/samples/tracepoints/tracepoint-probe-sample.c
+++ b/samples/tracepoints/tracepoint-probe-sample.c
@@ -1,57 +0,0 @@
 
				-/*
			
 
				- * tracepoint-probe-sample.c
			
 
				- *
			
 
				- * sample tracepoint probes.
			
 
				- */
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/file.h>
			
 
				-#include <linux/dcache.h>
			
 
				-#include "tp-samples-trace.h"
			
 
				-
			
 
				-/*
			
 
				- * Here the caller only guarantees locking for struct file and struct inode.
			
 
				- * Locking must therefore be done in the probe to use the dentry.
			
 
				- */
			
 
				-static void probe_subsys_event(void *ignore,
			
 
				-			       struct inode *inode, struct file *file)
			
 
				-{
			
 
				-	path_get(&file->f_path);
			
 
				-	dget(file->f_path.dentry);
			
 
				-	printk(KERN_INFO "Event is encountered with filename %s\n",
			
 
				-		file->f_path.dentry->d_name.name);
			
 
				-	dput(file->f_path.dentry);
			
 
				-	path_put(&file->f_path);
			
 
				-}
			
 
				-
			
 
				-static void probe_subsys_eventb(void *ignore)
			
 
				-{
			
 
				-	printk(KERN_INFO "Event B is encountered\n");
			
 
				-}
			
 
				-
			
 
				-static int __init tp_sample_trace_init(void)
			
 
				-{
			
 
				-	int ret;
			
 
				-
			
 
				-	ret = register_trace_subsys_event(probe_subsys_event, NULL);
			
 
				-	WARN_ON(ret);
			
 
				-	ret = register_trace_subsys_eventb(probe_subsys_eventb, NULL);
			
 
				-	WARN_ON(ret);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-module_init(tp_sample_trace_init);
			
 
				-
			
 
				-static void __exit tp_sample_trace_exit(void)
			
 
				-{
			
 
				-	unregister_trace_subsys_eventb(probe_subsys_eventb, NULL);
			
 
				-	unregister_trace_subsys_event(probe_subsys_event, NULL);
			
 
				-	tracepoint_synchronize_unregister();
			
 
				-}
			
 
				-
			
 
				-module_exit(tp_sample_trace_exit);
			
 
				-
			
 
				-MODULE_LICENSE("GPL");
			
 
				-MODULE_AUTHOR("Mathieu Desnoyers");
			
 
				-MODULE_DESCRIPTION("Tracepoint Probes Samples");
			
--- a/samples/tracepoints/tracepoint-probe-sample2.c
+++ b/samples/tracepoints/tracepoint-probe-sample2.c
@@ -1,44 +0,0 @@
 
				-/*
			
 
				- * tracepoint-probe-sample2.c
			
 
				- *
			
 
				- * 2nd sample tracepoint probes.
			
 
				- */
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/fs.h>
			
 
				-#include "tp-samples-trace.h"
			
 
				-
			
 
				-/*
			
 
				- * Here the caller only guarantees locking for struct file and struct inode.
			
 
				- * Locking must therefore be done in the probe to use the dentry.
			
 
				- */
			
 
				-static void probe_subsys_event(void *ignore,
			
 
				-			       struct inode *inode, struct file *file)
			
 
				-{
			
 
				-	printk(KERN_INFO "Event is encountered with inode number %lu\n",
			
 
				-		inode->i_ino);
			
 
				-}
			
 
				-
			
 
				-static int __init tp_sample_trace_init(void)
			
 
				-{
			
 
				-	int ret;
			
 
				-
			
 
				-	ret = register_trace_subsys_event(probe_subsys_event, NULL);
			
 
				-	WARN_ON(ret);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-module_init(tp_sample_trace_init);
			
 
				-
			
 
				-static void __exit tp_sample_trace_exit(void)
			
 
				-{
			
 
				-	unregister_trace_subsys_event(probe_subsys_event, NULL);
			
 
				-	tracepoint_synchronize_unregister();
			
 
				-}
			
 
				-
			
 
				-module_exit(tp_sample_trace_exit);
			
 
				-
			
 
				-MODULE_LICENSE("GPL");
			
 
				-MODULE_AUTHOR("Mathieu Desnoyers");
			
 
				-MODULE_DESCRIPTION("Tracepoint Probes Samples");
			
--- a/samples/tracepoints/tracepoint-sample.c
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -1,57 +0,0 @@
 
				-/* tracepoint-sample.c
			
 
				- *
			
 
				- * Executes a tracepoint when /proc/tracepoint-sample is opened.
			
 
				- *
			
 
				- * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
			
 
				- *
			
 
				- * This file is released under the GPLv2.
			
 
				- * See the file COPYING for more details.
			
 
				- */
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/proc_fs.h>
			
 
				-#include "tp-samples-trace.h"
			
 
				-
			
 
				-DEFINE_TRACE(subsys_event);
			
 
				-DEFINE_TRACE(subsys_eventb);
			
 
				-
			
 
				-struct proc_dir_entry *pentry_sample;
			
 
				-
			
 
				-static int my_open(struct inode *inode, struct file *file)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	trace_subsys_event(inode, file);
			
 
				-	for (i = 0; i < 10; i++)
			
 
				-		trace_subsys_eventb();
			
 
				-	return -EPERM;
			
 
				-}
			
 
				-
			
 
				-static const struct file_operations mark_ops = {
			
 
				-	.open = my_open,
			
 
				-	.llseek = noop_llseek,
			
 
				-};
			
 
				-
			
 
				-static int __init sample_init(void)
			
 
				-{
			
 
				-	printk(KERN_ALERT "sample init\n");
			
 
				-	pentry_sample = proc_create("tracepoint-sample", 0444, NULL,
			
 
				-		&mark_ops);
			
 
				-	if (!pentry_sample)
			
 
				-		return -EPERM;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void __exit sample_exit(void)
			
 
				-{
			
 
				-	printk(KERN_ALERT "sample exit\n");
			
 
				-	remove_proc_entry("tracepoint-sample", NULL);
			
 
				-}
			
 
				-
			
 
				-module_init(sample_init)
			
 
				-module_exit(sample_exit)
			
 
				-
			
 
				-MODULE_LICENSE("GPL");
			
 
				-MODULE_AUTHOR("Mathieu Desnoyers");
			
 
				-MODULE_DESCRIPTION("Tracepoint sample");
			
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -15,7 +15,7 @@ help:
 
				 	@echo '  x86_energy_perf_policy - Intel energy policy tool'
			
 
				 	@echo ''
			
 
				 	@echo 'You can do:'
			
 
				-	@echo ' $$ make -C tools/<tool>_install'
			
 
				+	@echo ' $$ make -C tools/ <tool>_install'
			
 
				 	@echo ''
			
 
				 	@echo '  from the kernel command line to build and install one of'
			
 
				 	@echo '  the tools above'
			
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -13,8 +13,7 @@
 
				  * GNU Lesser General Public License for more details.
			
 
				  *
			
 
				  * You should have received a copy of the GNU Lesser General Public
			
 
				- * License along with this program; if not, write to the Free Software
			
 
				- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
			
 
				+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
			
 
				  *
			
 
				  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				  *
			
@@ -1224,6 +1223,34 @@ static int field_is_long(struct format_field *field)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static unsigned int type_size(const char *name)
			
 
				+{
			
 
				+	/* This covers all FIELD_IS_STRING types. */
			
 
				+	static struct {
			
 
				+		const char *type;
			
 
				+		unsigned int size;
			
 
				+	} table[] = {
			
 
				+		{ "u8",   1 },
			
 
				+		{ "u16",  2 },
			
 
				+		{ "u32",  4 },
			
 
				+		{ "u64",  8 },
			
 
				+		{ "s8",   1 },
			
 
				+		{ "s16",  2 },
			
 
				+		{ "s32",  4 },
			
 
				+		{ "s64",  8 },
			
 
				+		{ "char", 1 },
			
 
				+		{ },
			
 
				+	};
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; table[i].type; i++) {
			
 
				+		if (!strcmp(table[i].type, name))
			
 
				+			return table[i].size;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int event_read_fields(struct event_format *event, struct format_field **fields)
			
 
				 {
			
 
				 	struct format_field *field = NULL;
			
@@ -1233,6 +1260,8 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 
				 	int count = 0;
			
 
				 
			
 
				 	do {
			
 
				+		unsigned int size_dynamic = 0;
			
 
				+
			
 
				 		type = read_token(&token);
			
 
				 		if (type == EVENT_NEWLINE) {
			
 
				 			free_token(token);
			
@@ -1391,6 +1420,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 
				 				field->type = new_type;
			
 
				 				strcat(field->type, " ");
			
 
				 				strcat(field->type, field->name);
			
 
				+				size_dynamic = type_size(field->name);
			
 
				 				free_token(field->name);
			
 
				 				strcat(field->type, brackets);
			
 
				 				field->name = token;
			
@@ -1463,7 +1493,8 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 
				 			if (read_expect_type(EVENT_ITEM, &token))
			
 
				 				goto fail;
			
 
				 
			
 
				-			/* add signed type */
			
 
				+			if (strtoul(token, NULL, 0))
			
 
				+				field->flags |= FIELD_IS_SIGNED;
			
 
				 
			
 
				 			free_token(token);
			
 
				 			if (read_expected(EVENT_OP, ";") < 0)
			
@@ -1478,10 +1509,14 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 
				 		if (field->flags & FIELD_IS_ARRAY) {
			
 
				 			if (field->arraylen)
			
 
				 				field->elementsize = field->size / field->arraylen;
			
 
				+			else if (field->flags & FIELD_IS_DYNAMIC)
			
 
				+				field->elementsize = size_dynamic;
			
 
				 			else if (field->flags & FIELD_IS_STRING)
			
 
				 				field->elementsize = 1;
			
 
				-			else
			
 
				-				field->elementsize = event->pevent->long_size;
			
 
				+			else if (field->flags & FIELD_IS_LONG)
			
 
				+				field->elementsize = event->pevent ?
			
 
				+						     event->pevent->long_size :
			
 
				+						     sizeof(long);
			
 
				 		} else
			
 
				 			field->elementsize = field->size;
			
 
				 
			
@@ -1785,6 +1820,8 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 
				 		   strcmp(token, "/") == 0 ||
			
 
				 		   strcmp(token, "<") == 0 ||
			
 
				 		   strcmp(token, ">") == 0 ||
			
 
				+		   strcmp(token, "<=") == 0 ||
			
 
				+		   strcmp(token, ">=") == 0 ||
			
 
				 		   strcmp(token, "==") == 0 ||
			
 
				 		   strcmp(token, "!=") == 0) {
			
 
				 
			
@@ -2481,7 +2518,7 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char **
 
				 
			
 
				 	free_token(token);
			
 
				 	arg = alloc_arg();
			
 
				-	if (!field) {
			
 
				+	if (!arg) {
			
 
				 		do_warning("%s: not enough memory!", __func__);
			
 
				 		*tok = NULL;
			
 
				 		return EVENT_ERROR;
			
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -13,8 +13,7 @@
 
				  * GNU Lesser General Public License for more details.
			
 
				  *
			
 
				  * You should have received a copy of the GNU Lesser General Public
			
 
				- * License along with this program; if not, write to the Free Software
			
 
				- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
			
 
				+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
			
 
				  *
			
 
				  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				  */
			
--- a/tools/lib/traceevent/event-utils.h
+++ b/tools/lib/traceevent/event-utils.h
@@ -13,8 +13,7 @@
 
				  * GNU Lesser General Public License for more details.
			
 
				  *
			
 
				  * You should have received a copy of the GNU Lesser General Public
			
 
				- * License along with this program; if not, write to the Free Software
			
 
				- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
			
 
				+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
			
 
				  *
			
 
				  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				  */
			
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -13,8 +13,7 @@
 
				  * GNU Lesser General Public License for more details.
			
 
				  *
			
 
				  * You should have received a copy of the GNU Lesser General Public
			
 
				- * License along with this program; if not, write to the Free Software
			
 
				- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
			
 
				+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
			
 
				  *
			
 
				  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				  */
			
--- a/tools/lib/traceevent/parse-utils.c
+++ b/tools/lib/traceevent/parse-utils.c
@@ -1,3 +1,22 @@
 
				+/*
			
 
				+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
			
 
				+ *
			
 
				+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU Lesser General Public
			
 
				+ * License as published by the Free Software Foundation;
			
 
				+ * version 2.1 of the License (not later!)
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU Lesser General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU Lesser General Public
			
 
				+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
			
 
				+ *
			
 
				+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+ */
			
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
 
				 #include <string.h>
			
--- a/tools/lib/traceevent/trace-seq.c
+++ b/tools/lib/traceevent/trace-seq.c
@@ -13,8 +13,7 @@
 
				  * GNU Lesser General Public License for more details.
			
 
				  *
			
 
				  * You should have received a copy of the GNU Lesser General Public
			
 
				- * License along with this program; if not, write to the Free Software
			
 
				- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
			
 
				+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
			
 
				  *
			
 
				  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				  */
			
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -222,10 +222,14 @@ install-pdf: pdf
 
				 #install-html: html
			
 
				 #	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
			
 
				 
			
 
				+ifneq ($(MAKECMDGOALS),clean)
			
 
				+ifneq ($(MAKECMDGOALS),tags)
			
 
				 $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
			
 
				 	$(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE
			
 
				 
			
 
				 -include $(OUTPUT)PERF-VERSION-FILE
			
 
				+endif
			
 
				+endif
			
 
				 
			
 
				 #
			
 
				 # Determine "include::" file references in asciidoc files.
			
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -61,11 +61,13 @@ OPTIONS
 
				 
			
 
				 --stdio:: Use the stdio interface.
			
 
				 
			
 
				---tui:: Use the TUI interface Use of --tui requires a tty, if one is not
			
 
				+--tui:: Use the TUI interface. Use of --tui requires a tty, if one is not
			
 
				 	present, as when piping to other commands, the stdio interface is
			
 
				 	used. This interfaces starts by centering on the line with more
			
 
				 	samples, TAB/UNTAB cycles through the lines with more samples.
			
 
				 
			
 
				+--gtk:: Use the GTK interface.
			
 
				+
			
 
				 -C::
			
 
				 --cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
			
 
				 	be provided as a comma-separated list with no space: 0,1. Ranges of
			
@@ -88,6 +90,9 @@ OPTIONS
 
				 --objdump=<path>::
			
 
				         Path to objdump binary.
			
 
				 
			
 
				+--skip-missing::
			
 
				+	Skip symbols that cannot be annotated.
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-record[1], linkperf:perf-report[1]
			
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -24,6 +24,13 @@ OPTIONS
 
				 -r::
			
 
				 --remove=::
			
 
				         Remove specified file from the cache.
			
 
				+-M::
			
 
				+--missing=:: 
			
 
				+	List missing build ids in the cache for the specified file.
			
 
				+-u::
			
 
				+--update::
			
 
				+	Update specified file of the cache. It can be used to update kallsyms
			
 
				+	kernel dso to vmlinux in order to support annotation.
			
 
				 -v::
			
 
				 --verbose::
			
 
				 	Be more verbose.
			
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -22,10 +22,6 @@ specified perf.data files.
 
				 
			
 
				 OPTIONS
			
 
				 -------
			
 
				--M::
			
 
				---displacement::
			
 
				-        Show position displacement relative to baseline.
			
 
				-
			
 
				 -D::
			
 
				 --dump-raw-trace::
			
 
				         Dump raw trace in ASCII.
			
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -28,6 +28,10 @@ OPTIONS
 
				 --verbose=::
			
 
				 	Show all fields.
			
 
				 
			
 
				+-g::
			
 
				+--group::
			
 
				+	Show event group information.
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-record[1], linkperf:perf-list[1],
			
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -57,11 +57,44 @@ OPTIONS
 
				 
			
 
				 -s::
			
 
				 --sort=::
			
 
				-	Sort by key(s): pid, comm, dso, symbol, parent, srcline.
			
 
				+	Sort histogram entries by given key(s) - multiple keys can be specified
			
 
				+	in CSV format.  Following sort keys are available:
			
 
				+	pid, comm, dso, symbol, parent, cpu, srcline.
			
 
				+
			
 
				+	Each key has following meaning:
			
 
				+
			
 
				+	- comm: command (name) of the task which can be read via /proc/<pid>/comm
			
 
				+	- pid: command and tid of the task
			
 
				+	- dso: name of library or module executed at the time of sample
			
 
				+	- symbol: name of function executed at the time of sample
			
 
				+	- parent: name of function matched to the parent regex filter. Unmatched
			
 
				+	entries are displayed as "[other]".
			
 
				+	- cpu: cpu number the task ran at the time of sample
			
 
				+	- srcline: filename and line number executed at the time of sample.  The
			
 
				+	DWARF debuggin info must be provided.
			
 
				+
			
 
				+	By default, comm, dso and symbol keys are used.
			
 
				+	(i.e. --sort comm,dso,symbol)
			
 
				+
			
 
				+	If --branch-stack option is used, following sort keys are also
			
 
				+	available:
			
 
				+	dso_from, dso_to, symbol_from, symbol_to, mispredict.
			
 
				+
			
 
				+	- dso_from: name of library or module branched from
			
 
				+	- dso_to: name of library or module branched to
			
 
				+	- symbol_from: name of function branched from
			
 
				+	- symbol_to: name of function branched to
			
 
				+	- mispredict: "N" for predicted branch, "Y" for mispredicted branch
			
 
				+
			
 
				+	And default sort keys are changed to comm, dso_from, symbol_from, dso_to
			
 
				+	and symbol_to, see '--branch-stack'.
			
 
				 
			
 
				 -p::
			
 
				 --parent=<regex>::
			
 
				-        regex filter to identify parent, see: '--sort parent'
			
 
				+        A regex filter to identify parent. The parent is a caller of this
			
 
				+	function and searched through the callchain, thus it requires callchain
			
 
				+	information recorded. The pattern is in the exteneded regex format and
			
 
				+	defaults to "\^sys_|^do_page_fault", see '--sort parent'.
			
 
				 
			
 
				 -x::
			
 
				 --exclude-other::
			
@@ -74,7 +107,6 @@ OPTIONS
 
				 
			
 
				 -t::
			
 
				 --field-separator=::
			
 
				-
			
 
				 	Use a special separator character and don't pad with spaces, replacing
			
 
				 	all occurrences of this separator in symbol names (and other output)
			
 
				 	with a '.' character, that thus it's the only non valid separator.
			
@@ -171,6 +203,9 @@ OPTIONS
 
				 --objdump=<path>::
			
 
				         Path to objdump binary.
			
 
				 
			
 
				+--group::
			
 
				+	Show event group information together.
			
 
				+
			
 
				 SEE ALSO
			
 
				 --------
			
 
				 linkperf:perf-stat[1], linkperf:perf-annotate[1]
			
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -336,7 +336,6 @@ scripts listed by the 'perf script -l' command e.g.:
 
				 ----
			
 
				 root@tropicana:~# perf script -l
			
 
				 List of available trace scripts:
			
 
				-  workqueue-stats                      workqueue stats (ins/exe/create/destroy)
			
 
				   wakeup-latency                       system-wide min/max/avg wakeup latency
			
 
				   rw-by-file <comm>                    r/w activity for a program, by file
			
 
				   rw-by-pid                            system-wide r/w activity
			
@@ -402,7 +401,6 @@ should show a new entry for your script:
 
				 ----
			
 
				 root@tropicana:~# perf script -l
			
 
				 List of available trace scripts:
			
 
				-  workqueue-stats                      workqueue stats (ins/exe/create/destroy)
			
 
				   wakeup-latency                       system-wide min/max/avg wakeup latency
			
 
				   rw-by-file <comm>                    r/w activity for a program, by file
			
 
				   rw-by-pid                            system-wide r/w activity
			
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -114,6 +114,17 @@ with it.  --append may be used here.  Examples:
 
				 
			
 
				 perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
			
 
				 
			
 
				+-I msecs::
			
 
				+--interval-print msecs::
			
 
				+	Print count deltas every N milliseconds (minimum: 100ms)
			
 
				+	example: perf stat -I 1000 -e cycles -a sleep 5
			
 
				+
			
 
				+--aggr-socket::
			
 
				+Aggregate counts per processor socket for system-wide mode measurements.  This
			
 
				+is a useful mode to detect imbalance between sockets.  To enable this mode,
			
 
				+use --aggr-socket in addition to -a. (system-wide).  The output includes the
			
 
				+socket number and the number of online processors on that socket. This is
			
 
				+useful to gauge the amount of aggregation.
			
 
				 
			
 
				 EXAMPLES
			
 
				 --------
			
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -23,6 +23,10 @@ from 'perf test list'.
 
				 
			
 
				 OPTIONS
			
 
				 -------
			
 
				+-s::
			
 
				+--skip::
			
 
				+	Tests to skip (comma separater numeric list).
			
 
				+
			
 
				 -v::
			
 
				 --verbose::
			
 
				 	Be more verbose.
			
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -60,7 +60,7 @@ Default is to monitor all CPUS.
 
				 
			
 
				 -i::
			
 
				 --inherit::
			
 
				-	Child tasks inherit counters, only makes sens with -p option.
			
 
				+	Child tasks do not inherit counters.
			
 
				 
			
 
				 -k <path>::
			
 
				 --vmlinux=<path>::
			
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -47,10 +47,11 @@ include config/utilities.mak
 
				 # backtrace post unwind.
			
 
				 #
			
 
				 # Define NO_BACKTRACE if you do not want stack backtrace debug feature
			
 
				+#
			
 
				+# Define NO_LIBNUMA if you do not want numa perf benchmark
			
 
				 
			
 
				 $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
			
 
				 	@$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
			
 
				--include $(OUTPUT)PERF-VERSION-FILE
			
 
				 
			
 
				 uname_M := $(shell uname -m 2>/dev/null || echo not)
			
 
				 
			
@@ -148,13 +149,25 @@ RM = rm -f
 
				 MKDIR = mkdir
			
 
				 FIND = find
			
 
				 INSTALL = install
			
 
				+FLEX = flex
			
 
				+BISON= bison
			
 
				 
			
 
				 # sparse is architecture-neutral, which means that we need to tell it
			
 
				 # explicitly what architecture to check for. Fix this up for yours..
			
 
				 SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
			
 
				 
			
 
				+ifneq ($(MAKECMDGOALS),clean)
			
 
				+ifneq ($(MAKECMDGOALS),tags)
			
 
				 -include config/feature-tests.mak
			
 
				 
			
 
				+ifeq ($(call get-executable,$(FLEX)),)
			
 
				+	dummy := $(error Error: $(FLEX) is missing on this system, please install it)
			
 
				+endif
			
 
				+
			
 
				+ifeq ($(call get-executable,$(BISON)),)
			
 
				+	dummy := $(error Error: $(BISON) is missing on this system, please install it)
			
 
				+endif
			
 
				+
			
 
				 ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y)
			
 
				 	CFLAGS := $(CFLAGS) -fstack-protector-all
			
 
				 endif
			
@@ -206,6 +219,8 @@ ifeq ($(call try-cc,$(SOURCE_BIONIC),$(CFLAGS),bionic),y)
 
				 	EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
			
 
				 	BASIC_CFLAGS += -I.
			
 
				 endif
			
 
				+endif # MAKECMDGOALS != tags
			
 
				+endif # MAKECMDGOALS != clean
			
 
				 
			
 
				 # Guard against environment variables
			
 
				 BUILTIN_OBJS =
			
@@ -230,11 +245,19 @@ endif
 
				 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
			
 
				 TE_LIB := -L$(TE_PATH) -ltraceevent
			
 
				 
			
 
				+export LIBTRACEEVENT
			
 
				+
			
 
				+# python extension build directories
			
 
				+PYTHON_EXTBUILD     := $(OUTPUT)python_ext_build/
			
 
				+PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
			
 
				+PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
			
 
				+export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
			
 
				+
			
 
				+python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
			
 
				+
			
 
				 PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
			
 
				 PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
			
 
				 
			
 
				-export LIBTRACEEVENT
			
 
				-
			
 
				 $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
			
 
				 	$(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
			
 
				 	  --quiet build_ext; \
			
@@ -269,20 +292,17 @@ endif
 
				 
			
 
				 export PERL_PATH
			
 
				 
			
 
				-FLEX = flex
			
 
				-BISON= bison
			
 
				-
			
 
				 $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
			
 
				 	$(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c
			
 
				 
			
 
				 $(OUTPUT)util/parse-events-bison.c: util/parse-events.y
			
 
				-	$(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c
			
 
				+	$(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
			
 
				 
			
 
				 $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
			
 
				 	$(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c
			
 
				 
			
 
				 $(OUTPUT)util/pmu-bison.c: util/pmu.y
			
 
				-	$(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c
			
 
				+	$(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
			
 
				 
			
 
				 $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
			
 
				 $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
			
@@ -378,8 +398,11 @@ LIB_H += util/rblist.h
 
				 LIB_H += util/intlist.h
			
 
				 LIB_H += util/perf_regs.h
			
 
				 LIB_H += util/unwind.h
			
 
				-LIB_H += ui/helpline.h
			
 
				 LIB_H += util/vdso.h
			
 
				+LIB_H += ui/helpline.h
			
 
				+LIB_H += ui/progress.h
			
 
				+LIB_H += ui/util.h
			
 
				+LIB_H += ui/ui.h
			
 
				 
			
 
				 LIB_OBJS += $(OUTPUT)util/abspath.o
			
 
				 LIB_OBJS += $(OUTPUT)util/alias.o
			
@@ -453,6 +476,7 @@ LIB_OBJS += $(OUTPUT)util/stat.o
 
				 LIB_OBJS += $(OUTPUT)ui/setup.o
			
 
				 LIB_OBJS += $(OUTPUT)ui/helpline.o
			
 
				 LIB_OBJS += $(OUTPUT)ui/progress.o
			
 
				+LIB_OBJS += $(OUTPUT)ui/util.o
			
 
				 LIB_OBJS += $(OUTPUT)ui/hist.o
			
 
				 LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
			
 
				 
			
@@ -471,7 +495,8 @@ LIB_OBJS += $(OUTPUT)tests/rdpmc.o
 
				 LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
			
 
				 LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
			
 
				 LIB_OBJS += $(OUTPUT)tests/pmu.o
			
 
				-LIB_OBJS += $(OUTPUT)tests/util.o
			
 
				+LIB_OBJS += $(OUTPUT)tests/hists_link.o
			
 
				+LIB_OBJS += $(OUTPUT)tests/python-use.o
			
 
				 
			
 
				 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
			
 
				 BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
			
@@ -510,14 +535,13 @@ PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT)
 
				 #
			
 
				 # Platform specific tweaks
			
 
				 #
			
 
				+ifneq ($(MAKECMDGOALS),clean)
			
 
				+ifneq ($(MAKECMDGOALS),tags)
			
 
				 
			
 
				 # We choose to avoid "if .. else if .. else .. endif endif"
			
 
				 # because maintaining the nesting to match is a pain.  If
			
 
				 # we had "elif" things would have been much nicer...
			
 
				 
			
 
				--include config.mak.autogen
			
 
				--include config.mak
			
 
				-
			
 
				 ifdef NO_LIBELF
			
 
				 	NO_DWARF := 1
			
 
				 	NO_DEMANGLE := 1
			
@@ -557,6 +581,11 @@ else
 
				 endif # SOURCE_LIBELF
			
 
				 endif # NO_LIBELF
			
 
				 
			
 
				+# There's only x86 (both 32 and 64) support for CFI unwind so far
			
 
				+ifneq ($(ARCH),x86)
			
 
				+	NO_LIBUNWIND := 1
			
 
				+endif
			
 
				+
			
 
				 ifndef NO_LIBUNWIND
			
 
				 # for linking with debug library, run like:
			
 
				 # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
			
@@ -646,7 +675,6 @@ ifndef NO_NEWT
 
				 		LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
			
 
				 		LIB_OBJS += $(OUTPUT)ui/browsers/map.o
			
 
				 		LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
			
 
				-		LIB_OBJS += $(OUTPUT)ui/util.o
			
 
				 		LIB_OBJS += $(OUTPUT)ui/tui/setup.o
			
 
				 		LIB_OBJS += $(OUTPUT)ui/tui/util.o
			
 
				 		LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
			
@@ -655,9 +683,6 @@ ifndef NO_NEWT
 
				 		LIB_H += ui/browsers/map.h
			
 
				 		LIB_H += ui/keysyms.h
			
 
				 		LIB_H += ui/libslang.h
			
 
				-		LIB_H += ui/progress.h
			
 
				-		LIB_H += ui/util.h
			
 
				-		LIB_H += ui/ui.h
			
 
				 	endif
			
 
				 endif
			
 
				 
			
@@ -673,14 +698,12 @@ ifndef NO_GTK2
 
				 		BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null)
			
 
				 		EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null)
			
 
				 		LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
			
 
				+		LIB_OBJS += $(OUTPUT)ui/gtk/hists.o
			
 
				 		LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
			
 
				 		LIB_OBJS += $(OUTPUT)ui/gtk/util.o
			
 
				 		LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o
			
 
				 		LIB_OBJS += $(OUTPUT)ui/gtk/progress.o
			
 
				-		# Make sure that it'd be included only once.
			
 
				-		ifeq ($(findstring -DNEWT_SUPPORT,$(BASIC_CFLAGS)),)
			
 
				-			LIB_OBJS += $(OUTPUT)ui/util.o
			
 
				-		endif
			
 
				+		LIB_OBJS += $(OUTPUT)ui/gtk/annotate.o
			
 
				 	endif
			
 
				 endif
			
 
				 
			
@@ -707,7 +730,7 @@ disable-python = $(eval $(disable-python_code))
 
				 define disable-python_code
			
 
				   BASIC_CFLAGS += -DNO_LIBPYTHON
			
 
				   $(if $(1),$(warning No $(1) was found))
			
 
				-  $(warning Python support won't be built)
			
 
				+  $(warning Python support will not be built)
			
 
				 endef
			
 
				 
			
 
				 override PYTHON := \
			
@@ -715,19 +738,10 @@ override PYTHON := \
 
				 
			
 
				 ifndef PYTHON
			
 
				   $(call disable-python,python interpreter)
			
 
				-  python-clean :=
			
 
				 else
			
 
				 
			
 
				   PYTHON_WORD := $(call shell-wordify,$(PYTHON))
			
 
				 
			
 
				-  # python extension build directories
			
 
				-  PYTHON_EXTBUILD     := $(OUTPUT)python_ext_build/
			
 
				-  PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
			
 
				-  PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
			
 
				-  export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
			
 
				-
			
 
				-  python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
			
 
				-
			
 
				   ifdef NO_LIBPYTHON
			
 
				     $(call disable-python)
			
 
				   else
			
@@ -839,10 +853,24 @@ ifndef NO_BACKTRACE
 
				        endif
			
 
				 endif
			
 
				 
			
 
				+ifndef NO_LIBNUMA
			
 
				+	FLAGS_LIBNUMA = $(ALL_CFLAGS) $(ALL_LDFLAGS) -lnuma
			
 
				+	ifneq ($(call try-cc,$(SOURCE_LIBNUMA),$(FLAGS_LIBNUMA),libnuma),y)
			
 
				+		msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev);
			
 
				+	else
			
 
				+		BASIC_CFLAGS += -DLIBNUMA_SUPPORT
			
 
				+		BUILTIN_OBJS += $(OUTPUT)bench/numa.o
			
 
				+		EXTLIBS += -lnuma
			
 
				+	endif
			
 
				+endif
			
 
				+
			
 
				 ifdef ASCIIDOC8
			
 
				 	export ASCIIDOC8
			
 
				 endif
			
 
				 
			
 
				+endif # MAKECMDGOALS != tags
			
 
				+endif # MAKECMDGOALS != clean
			
 
				+
			
 
				 # Shell quote (do not use $(call) to accommodate ancient setups);
			
 
				 
			
 
				 ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
			
@@ -884,7 +912,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf
 
				 	$(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
			
 
				 
			
 
				 $(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
			
 
				-	$(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
			
 
				+	$(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \
			
 
				 		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
			
 
				 		$(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
			
 
				 
			
@@ -948,7 +976,13 @@ $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
 
				 
			
 
				 $(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
			
 
				 	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
			
 
				-		'-DBINDIR="$(bindir_SQ)"' \
			
 
				+		'-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \
			
 
				+		$<
			
 
				+
			
 
				+$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
			
 
				+	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
			
 
				+		-DPYTHONPATH='"$(OUTPUT)python"' \
			
 
				+		-DPYTHON='"$(PYTHON_WORD)"' \
			
 
				 		$<
			
 
				 
			
 
				 $(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
			
@@ -1099,7 +1133,7 @@ perfexec_instdir = $(prefix)/$(perfexecdir)
 
				 endif
			
 
				 perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
			
 
				 
			
 
				-install: all try-install-man
			
 
				+install-bin: all
			
 
				 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
			
 
				 	$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
			
 
				 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
			
@@ -1120,6 +1154,8 @@ install: all try-install-man
 
				 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
			
 
				 	$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
			
 
				 
			
 
				+install: install-bin try-install-man
			
 
				+
			
 
				 install-python_ext:
			
 
				 	$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
			
 
				 
			
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -155,6 +155,7 @@ static int perf_session_env__lookup_binutils_path(struct perf_session_env *env,
 
				 		if (lookup_path(buf))
			
 
				 			goto out;
			
 
				 		free(buf);
			
 
				+		buf = NULL;
			
 
				 	}
			
 
				 
			
 
				 	if (!strcmp(arch, "arm"))
			
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -1,6 +1,7 @@
 
				 #ifndef BENCH_H
			
 
				 #define BENCH_H
			
 
				 
			
 
				+extern int bench_numa(int argc, const char **argv, const char *prefix);
			
 
				 extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
			
 
				 extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
			
 
				 extern int bench_mem_memcpy(int argc, const char **argv,
			
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -0,0 +1,1731 @@
 
				+/*
			
 
				+ * numa.c
			
 
				+ *
			
 
				+ * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
			
 
				+ */
			
 
				+
			
 
				+#include "../perf.h"
			
 
				+#include "../builtin.h"
			
 
				+#include "../util/util.h"
			
 
				+#include "../util/parse-options.h"
			
 
				+
			
 
				+#include "bench.h"
			
 
				+
			
 
				+#include <errno.h>
			
 
				+#include <sched.h>
			
 
				+#include <stdio.h>
			
 
				+#include <assert.h>
			
 
				+#include <malloc.h>
			
 
				+#include <signal.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+#include <unistd.h>
			
 
				+#include <pthread.h>
			
 
				+#include <sys/mman.h>
			
 
				+#include <sys/time.h>
			
 
				+#include <sys/wait.h>
			
 
				+#include <sys/prctl.h>
			
 
				+#include <sys/types.h>
			
 
				+
			
 
				+#include <numa.h>
			
 
				+#include <numaif.h>
			
 
				+
			
 
				+/*
			
 
				+ * Regular printout to the terminal, supressed if -q is specified:
			
 
				+ */
			
 
				+#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
			
 
				+
			
 
				+/*
			
 
				+ * Debug printf:
			
 
				+ */
			
 
				+#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
			
 
				+
			
 
				+struct thread_data {
			
 
				+	int			curr_cpu;
			
 
				+	cpu_set_t		bind_cpumask;
			
 
				+	int			bind_node;
			
 
				+	u8			*process_data;
			
 
				+	int			process_nr;
			
 
				+	int			thread_nr;
			
 
				+	int			task_nr;
			
 
				+	unsigned int		loops_done;
			
 
				+	u64			val;
			
 
				+	u64			runtime_ns;
			
 
				+	pthread_mutex_t		*process_lock;
			
 
				+};
			
 
				+
			
 
				+/* Parameters set by options: */
			
 
				+
			
 
				+struct params {
			
 
				+	/* Startup synchronization: */
			
 
				+	bool			serialize_startup;
			
 
				+
			
 
				+	/* Task hierarchy: */
			
 
				+	int			nr_proc;
			
 
				+	int			nr_threads;
			
 
				+
			
 
				+	/* Working set sizes: */
			
 
				+	const char		*mb_global_str;
			
 
				+	const char		*mb_proc_str;
			
 
				+	const char		*mb_proc_locked_str;
			
 
				+	const char		*mb_thread_str;
			
 
				+
			
 
				+	double			mb_global;
			
 
				+	double			mb_proc;
			
 
				+	double			mb_proc_locked;
			
 
				+	double			mb_thread;
			
 
				+
			
 
				+	/* Access patterns to the working set: */
			
 
				+	bool			data_reads;
			
 
				+	bool			data_writes;
			
 
				+	bool			data_backwards;
			
 
				+	bool			data_zero_memset;
			
 
				+	bool			data_rand_walk;
			
 
				+	u32			nr_loops;
			
 
				+	u32			nr_secs;
			
 
				+	u32			sleep_usecs;
			
 
				+
			
 
				+	/* Working set initialization: */
			
 
				+	bool			init_zero;
			
 
				+	bool			init_random;
			
 
				+	bool			init_cpu0;
			
 
				+
			
 
				+	/* Misc options: */
			
 
				+	int			show_details;
			
 
				+	int			run_all;
			
 
				+	int			thp;
			
 
				+
			
 
				+	long			bytes_global;
			
 
				+	long			bytes_process;
			
 
				+	long			bytes_process_locked;
			
 
				+	long			bytes_thread;
			
 
				+
			
 
				+	int			nr_tasks;
			
 
				+	bool			show_quiet;
			
 
				+
			
 
				+	bool			show_convergence;
			
 
				+	bool			measure_convergence;
			
 
				+
			
 
				+	int			perturb_secs;
			
 
				+	int			nr_cpus;
			
 
				+	int			nr_nodes;
			
 
				+
			
 
				+	/* Affinity options -C and -N: */
			
 
				+	char			*cpu_list_str;
			
 
				+	char			*node_list_str;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+/* Global, read-writable area, accessible to all processes and threads: */
			
 
				+
			
 
				+struct global_info {
			
 
				+	u8			*data;
			
 
				+
			
 
				+	pthread_mutex_t		startup_mutex;
			
 
				+	int			nr_tasks_started;
			
 
				+
			
 
				+	pthread_mutex_t		startup_done_mutex;
			
 
				+
			
 
				+	pthread_mutex_t		start_work_mutex;
			
 
				+	int			nr_tasks_working;
			
 
				+
			
 
				+	pthread_mutex_t		stop_work_mutex;
			
 
				+	u64			bytes_done;
			
 
				+
			
 
				+	struct thread_data	*threads;
			
 
				+
			
 
				+	/* Convergence latency measurement: */
			
 
				+	bool			all_converged;
			
 
				+	bool			stop_work;
			
 
				+
			
 
				+	int			print_once;
			
 
				+
			
 
				+	struct params		p;
			
 
				+};
			
 
				+
			
 
				+static struct global_info	*g = NULL;
			
 
				+
			
 
				+static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
			
 
				+static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);
			
 
				+
			
 
				+struct params p0;
			
 
				+
			
 
				+static const struct option options[] = {
			
 
				+	OPT_INTEGER('p', "nr_proc"	, &p0.nr_proc,		"number of processes"),
			
 
				+	OPT_INTEGER('t', "nr_threads"	, &p0.nr_threads,	"number of threads per process"),
			
 
				+
			
 
				+	OPT_STRING('G', "mb_global"	, &p0.mb_global_str,	"MB", "global  memory (MBs)"),
			
 
				+	OPT_STRING('P', "mb_proc"	, &p0.mb_proc_str,	"MB", "process memory (MBs)"),
			
 
				+	OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
			
 
				+	OPT_STRING('T', "mb_thread"	, &p0.mb_thread_str,	"MB", "thread  memory (MBs)"),
			
 
				+
			
 
				+	OPT_UINTEGER('l', "nr_loops"	, &p0.nr_loops,		"max number of loops to run"),
			
 
				+	OPT_UINTEGER('s', "nr_secs"	, &p0.nr_secs,		"max number of seconds to run"),
			
 
				+	OPT_UINTEGER('u', "usleep"	, &p0.sleep_usecs,	"usecs to sleep per loop iteration"),
			
 
				+
			
 
				+	OPT_BOOLEAN('R', "data_reads"	, &p0.data_reads,	"access the data via writes (can be mixed with -W)"),
			
 
				+	OPT_BOOLEAN('W', "data_writes"	, &p0.data_writes,	"access the data via writes (can be mixed with -R)"),
			
 
				+	OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards,	"access the data backwards as well"),
			
 
				+	OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
			
 
				+	OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk,	"access the data with random (32bit LFSR) walk"),
			
 
				+
			
 
				+
			
 
				+	OPT_BOOLEAN('z', "init_zero"	, &p0.init_zero,	"bzero the initial allocations"),
			
 
				+	OPT_BOOLEAN('I', "init_random"	, &p0.init_random,	"randomize the contents of the initial allocations"),
			
 
				+	OPT_BOOLEAN('0', "init_cpu0"	, &p0.init_cpu0,	"do the initial allocations on CPU#0"),
			
 
				+	OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs,	"perturb thread 0/0 every X secs, to test convergence stability"),
			
 
				+
			
 
				+	OPT_INCR   ('d', "show_details"	, &p0.show_details,	"Show details"),
			
 
				+	OPT_INCR   ('a', "all"		, &p0.run_all,		"Run all tests in the suite"),
			
 
				+	OPT_INTEGER('H', "thp"		, &p0.thp,		"MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
			
 
				+	OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
			
 
				+	OPT_BOOLEAN('m', "measure_convergence",	&p0.measure_convergence, "measure convergence latency"),
			
 
				+	OPT_BOOLEAN('q', "quiet"	, &p0.show_quiet,	"bzero the initial allocations"),
			
 
				+	OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
			
 
				+
			
 
				+	/* Special option string parsing callbacks: */
			
 
				+        OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
			
 
				+			"bind the first N tasks to these specific cpus (the rest is unbound)",
			
 
				+			parse_cpus_opt),
			
 
				+        OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
			
 
				+			"bind the first N tasks to these specific memory nodes (the rest is unbound)",
			
 
				+			parse_nodes_opt),
			
 
				+	OPT_END()
			
 
				+};
			
 
				+
			
 
				+static const char * const bench_numa_usage[] = {
			
 
				+	"perf bench numa <options>",
			
 
				+	NULL
			
 
				+};
			
 
				+
			
 
				+static const char * const numa_usage[] = {
			
 
				+	"perf bench numa mem [<options>]",
			
 
				+	NULL
			
 
				+};
			
 
				+
			
 
				+static cpu_set_t bind_to_cpu(int target_cpu)
			
 
				+{
			
 
				+	cpu_set_t orig_mask, mask;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
			
 
				+	BUG_ON(ret);
			
 
				+
			
 
				+	CPU_ZERO(&mask);
			
 
				+
			
 
				+	if (target_cpu == -1) {
			
 
				+		int cpu;
			
 
				+
			
 
				+		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
			
 
				+			CPU_SET(cpu, &mask);
			
 
				+	} else {
			
 
				+		BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
			
 
				+		CPU_SET(target_cpu, &mask);
			
 
				+	}
			
 
				+
			
 
				+	ret = sched_setaffinity(0, sizeof(mask), &mask);
			
 
				+	BUG_ON(ret);
			
 
				+
			
 
				+	return orig_mask;
			
 
				+}
			
 
				+
			
 
				+static cpu_set_t bind_to_node(int target_node)
			
 
				+{
			
 
				+	int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes;
			
 
				+	cpu_set_t orig_mask, mask;
			
 
				+	int cpu;
			
 
				+	int ret;
			
 
				+
			
 
				+	BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus);
			
 
				+	BUG_ON(!cpus_per_node);
			
 
				+
			
 
				+	ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
			
 
				+	BUG_ON(ret);
			
 
				+
			
 
				+	CPU_ZERO(&mask);
			
 
				+
			
 
				+	if (target_node == -1) {
			
 
				+		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
			
 
				+			CPU_SET(cpu, &mask);
			
 
				+	} else {
			
 
				+		int cpu_start = (target_node + 0) * cpus_per_node;
			
 
				+		int cpu_stop  = (target_node + 1) * cpus_per_node;
			
 
				+
			
 
				+		BUG_ON(cpu_stop > g->p.nr_cpus);
			
 
				+
			
 
				+		for (cpu = cpu_start; cpu < cpu_stop; cpu++)
			
 
				+			CPU_SET(cpu, &mask);
			
 
				+	}
			
 
				+
			
 
				+	ret = sched_setaffinity(0, sizeof(mask), &mask);
			
 
				+	BUG_ON(ret);
			
 
				+
			
 
				+	return orig_mask;
			
 
				+}
			
 
				+
			
 
				+static void bind_to_cpumask(cpu_set_t mask)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = sched_setaffinity(0, sizeof(mask), &mask);
			
 
				+	BUG_ON(ret);
			
 
				+}
			
 
				+
			
 
				+static void mempol_restore(void)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1);
			
 
				+
			
 
				+	BUG_ON(ret);
			
 
				+}
			
 
				+
			
 
				+static void bind_to_memnode(int node)
			
 
				+{
			
 
				+	unsigned long nodemask;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (node == -1)
			
 
				+		return;
			
 
				+
			
 
				+	BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask));
			
 
				+	nodemask = 1L << node;
			
 
				+
			
 
				+	ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
			
 
				+	dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
			
 
				+
			
 
				+	BUG_ON(ret);
			
 
				+}
			
 
				+
			
 
				+#define HPSIZE (2*1024*1024)
			
 
				+
			
 
				+#define set_taskname(fmt...)				\
			
 
				+do {							\
			
 
				+	char name[20];					\
			
 
				+							\
			
 
				+	snprintf(name, 20, fmt);			\
			
 
				+	prctl(PR_SET_NAME, name);			\
			
 
				+} while (0)
			
 
				+
			
 
				+static u8 *alloc_data(ssize_t bytes0, int map_flags,
			
 
				+		      int init_zero, int init_cpu0, int thp, int init_random)
			
 
				+{
			
 
				+	cpu_set_t orig_mask;
			
 
				+	ssize_t bytes;
			
 
				+	u8 *buf;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!bytes0)
			
 
				+		return NULL;
			
 
				+
			
 
				+	/* Allocate and initialize all memory on CPU#0: */
			
 
				+	if (init_cpu0) {
			
 
				+		orig_mask = bind_to_node(0);
			
 
				+		bind_to_memnode(0);
			
 
				+	}
			
 
				+
			
 
				+	bytes = bytes0 + HPSIZE;
			
 
				+
			
 
				+	buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0);
			
 
				+	BUG_ON(buf == (void *)-1);
			
 
				+
			
 
				+	if (map_flags == MAP_PRIVATE) {
			
 
				+		if (thp > 0) {
			
 
				+			ret = madvise(buf, bytes, MADV_HUGEPAGE);
			
 
				+			if (ret && !g->print_once) {
			
 
				+				g->print_once = 1;
			
 
				+				printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n");
			
 
				+			}
			
 
				+		}
			
 
				+		if (thp < 0) {
			
 
				+			ret = madvise(buf, bytes, MADV_NOHUGEPAGE);
			
 
				+			if (ret && !g->print_once) {
			
 
				+				g->print_once = 1;
			
 
				+				printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n");
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (init_zero) {
			
 
				+		bzero(buf, bytes);
			
 
				+	} else {
			
 
				+		/* Initialize random contents, different in each word: */
			
 
				+		if (init_random) {
			
 
				+			u64 *wbuf = (void *)buf;
			
 
				+			long off = rand();
			
 
				+			long i;
			
 
				+
			
 
				+			for (i = 0; i < bytes/8; i++)
			
 
				+				wbuf[i] = i + off;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Align to 2MB boundary: */
			
 
				+	buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1));
			
 
				+
			
 
				+	/* Restore affinity: */
			
 
				+	if (init_cpu0) {
			
 
				+		bind_to_cpumask(orig_mask);
			
 
				+		mempol_restore();
			
 
				+	}
			
 
				+
			
 
				+	return buf;
			
 
				+}
			
 
				+
			
 
				+static void free_data(void *data, ssize_t bytes)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!data)
			
 
				+		return;
			
 
				+
			
 
				+	ret = munmap(data, bytes);
			
 
				+	BUG_ON(ret);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Create a shared memory buffer that can be shared between processes, zeroed:
			
 
				+ */
			
 
				+static void * zalloc_shared_data(ssize_t bytes)
			
 
				+{
			
 
				+	return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0,  g->p.thp, g->p.init_random);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Create a shared memory buffer that can be shared between processes:
			
 
				+ */
			
 
				+static void * setup_shared_data(ssize_t bytes)
			
 
				+{
			
 
				+	return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0,  g->p.thp, g->p.init_random);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Allocate process-local memory - this will either be shared between
			
 
				+ * threads of this process, or only be accessed by this thread:
			
 
				+ */
			
 
				+static void * setup_private_data(ssize_t bytes)
			
 
				+{
			
 
				+	return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0,  g->p.thp, g->p.init_random);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return a process-shared (global) mutex:
			
 
				+ */
			
 
				+static void init_global_mutex(pthread_mutex_t *mutex)
			
 
				+{
			
 
				+	pthread_mutexattr_t attr;
			
 
				+
			
 
				+	pthread_mutexattr_init(&attr);
			
 
				+	pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
			
 
				+	pthread_mutex_init(mutex, &attr);
			
 
				+}
			
 
				+
			
 
				+static int parse_cpu_list(const char *arg)
			
 
				+{
			
 
				+	p0.cpu_list_str = strdup(arg);
			
 
				+
			
 
				+	dprintf("got CPU list: {%s}\n", p0.cpu_list_str);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void parse_setup_cpu_list(void)
			
 
				+{
			
 
				+	struct thread_data *td;
			
 
				+	char *str0, *str;
			
 
				+	int t;
			
 
				+
			
 
				+	if (!g->p.cpu_list_str)
			
 
				+		return;
			
 
				+
			
 
				+	dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
			
 
				+
			
 
				+	str0 = str = strdup(g->p.cpu_list_str);
			
 
				+	t = 0;
			
 
				+
			
 
				+	BUG_ON(!str);
			
 
				+
			
 
				+	tprintf("# binding tasks to CPUs:\n");
			
 
				+	tprintf("#  ");
			
 
				+
			
 
				+	while (true) {
			
 
				+		int bind_cpu, bind_cpu_0, bind_cpu_1;
			
 
				+		char *tok, *tok_end, *tok_step, *tok_len, *tok_mul;
			
 
				+		int bind_len;
			
 
				+		int step;
			
 
				+		int mul;
			
 
				+
			
 
				+		tok = strsep(&str, ",");
			
 
				+		if (!tok)
			
 
				+			break;
			
 
				+
			
 
				+		tok_end = strstr(tok, "-");
			
 
				+
			
 
				+		dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
			
 
				+		if (!tok_end) {
			
 
				+			/* Single CPU specified: */
			
 
				+			bind_cpu_0 = bind_cpu_1 = atol(tok);
			
 
				+		} else {
			
 
				+			/* CPU range specified (for example: "5-11"): */
			
 
				+			bind_cpu_0 = atol(tok);
			
 
				+			bind_cpu_1 = atol(tok_end + 1);
			
 
				+		}
			
 
				+
			
 
				+		step = 1;
			
 
				+		tok_step = strstr(tok, "#");
			
 
				+		if (tok_step) {
			
 
				+			step = atol(tok_step + 1);
			
 
				+			BUG_ON(step <= 0 || step >= g->p.nr_cpus);
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Mask length.
			
 
				+		 * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4',
			
 
				+		 * where the _4 means the next 4 CPUs are allowed.
			
 
				+		 */
			
 
				+		bind_len = 1;
			
 
				+		tok_len = strstr(tok, "_");
			
 
				+		if (tok_len) {
			
 
				+			bind_len = atol(tok_len + 1);
			
 
				+			BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus);
			
 
				+		}
			
 
				+
			
 
				+		/* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
			
 
				+		mul = 1;
			
 
				+		tok_mul = strstr(tok, "x");
			
 
				+		if (tok_mul) {
			
 
				+			mul = atol(tok_mul + 1);
			
 
				+			BUG_ON(mul <= 0);
			
 
				+		}
			
 
				+
			
 
				+		dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
			
 
				+
			
 
				+		BUG_ON(bind_cpu_0 < 0 || bind_cpu_0 >= g->p.nr_cpus);
			
 
				+		BUG_ON(bind_cpu_1 < 0 || bind_cpu_1 >= g->p.nr_cpus);
			
 
				+		BUG_ON(bind_cpu_0 > bind_cpu_1);
			
 
				+
			
 
				+		for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
			
 
				+			int i;
			
 
				+
			
 
				+			for (i = 0; i < mul; i++) {
			
 
				+				int cpu;
			
 
				+
			
 
				+				if (t >= g->p.nr_tasks) {
			
 
				+					printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu);
			
 
				+					goto out;
			
 
				+				}
			
 
				+				td = g->threads + t;
			
 
				+
			
 
				+				if (t)
			
 
				+					tprintf(",");
			
 
				+				if (bind_len > 1) {
			
 
				+					tprintf("%2d/%d", bind_cpu, bind_len);
			
 
				+				} else {
			
 
				+					tprintf("%2d", bind_cpu);
			
 
				+				}
			
 
				+
			
 
				+				CPU_ZERO(&td->bind_cpumask);
			
 
				+				for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
			
 
				+					BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
			
 
				+					CPU_SET(cpu, &td->bind_cpumask);
			
 
				+				}
			
 
				+				t++;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+out:
			
 
				+
			
 
				+	tprintf("\n");
			
 
				+
			
 
				+	if (t < g->p.nr_tasks)
			
 
				+		printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
			
 
				+
			
 
				+	free(str0);
			
 
				+}
			
 
				+
			
 
				+static int parse_cpus_opt(const struct option *opt __maybe_unused,
			
 
				+			  const char *arg, int unset __maybe_unused)
			
 
				+{
			
 
				+	if (!arg)
			
 
				+		return -1;
			
 
				+
			
 
				+	return parse_cpu_list(arg);
			
 
				+}
			
 
				+
			
 
				+static int parse_node_list(const char *arg)
			
 
				+{
			
 
				+	p0.node_list_str = strdup(arg);
			
 
				+
			
 
				+	dprintf("got NODE list: {%s}\n", p0.node_list_str);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void parse_setup_node_list(void)
			
 
				+{
			
 
				+	struct thread_data *td;
			
 
				+	char *str0, *str;
			
 
				+	int t;
			
 
				+
			
 
				+	if (!g->p.node_list_str)
			
 
				+		return;
			
 
				+
			
 
				+	dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
			
 
				+
			
 
				+	str0 = str = strdup(g->p.node_list_str);
			
 
				+	t = 0;
			
 
				+
			
 
				+	BUG_ON(!str);
			
 
				+
			
 
				+	tprintf("# binding tasks to NODEs:\n");
			
 
				+	tprintf("# ");
			
 
				+
			
 
				+	while (true) {
			
 
				+		int bind_node, bind_node_0, bind_node_1;
			
 
				+		char *tok, *tok_end, *tok_step, *tok_mul;
			
 
				+		int step;
			
 
				+		int mul;
			
 
				+
			
 
				+		tok = strsep(&str, ",");
			
 
				+		if (!tok)
			
 
				+			break;
			
 
				+
			
 
				+		tok_end = strstr(tok, "-");
			
 
				+
			
 
				+		dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
			
 
				+		if (!tok_end) {
			
 
				+			/* Single NODE specified: */
			
 
				+			bind_node_0 = bind_node_1 = atol(tok);
			
 
				+		} else {
			
 
				+			/* NODE range specified (for example: "5-11"): */
			
 
				+			bind_node_0 = atol(tok);
			
 
				+			bind_node_1 = atol(tok_end + 1);
			
 
				+		}
			
 
				+
			
 
				+		step = 1;
			
 
				+		tok_step = strstr(tok, "#");
			
 
				+		if (tok_step) {
			
 
				+			step = atol(tok_step + 1);
			
 
				+			BUG_ON(step <= 0 || step >= g->p.nr_nodes);
			
 
				+		}
			
 
				+
			
 
				+		/* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
			
 
				+		mul = 1;
			
 
				+		tok_mul = strstr(tok, "x");
			
 
				+		if (tok_mul) {
			
 
				+			mul = atol(tok_mul + 1);
			
 
				+			BUG_ON(mul <= 0);
			
 
				+		}
			
 
				+
			
 
				+		dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
			
 
				+
			
 
				+		BUG_ON(bind_node_0 < 0 || bind_node_0 >= g->p.nr_nodes);
			
 
				+		BUG_ON(bind_node_1 < 0 || bind_node_1 >= g->p.nr_nodes);
			
 
				+		BUG_ON(bind_node_0 > bind_node_1);
			
 
				+
			
 
				+		for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
			
 
				+			int i;
			
 
				+
			
 
				+			for (i = 0; i < mul; i++) {
			
 
				+				if (t >= g->p.nr_tasks) {
			
 
				+					printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
			
 
				+					goto out;
			
 
				+				}
			
 
				+				td = g->threads + t;
			
 
				+
			
 
				+				if (!t)
			
 
				+					tprintf(" %2d", bind_node);
			
 
				+				else
			
 
				+					tprintf(",%2d", bind_node);
			
 
				+
			
 
				+				td->bind_node = bind_node;
			
 
				+				t++;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+out:
			
 
				+
			
 
				+	tprintf("\n");
			
 
				+
			
 
				+	if (t < g->p.nr_tasks)
			
 
				+		printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
			
 
				+
			
 
				+	free(str0);
			
 
				+}
			
 
				+
			
 
				+static int parse_nodes_opt(const struct option *opt __maybe_unused,
			
 
				+			  const char *arg, int unset __maybe_unused)
			
 
				+{
			
 
				+	if (!arg)
			
 
				+		return -1;
			
 
				+
			
 
				+	return parse_node_list(arg);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#define BIT(x) (1ul << x)
			
 
				+
			
 
				+static inline uint32_t lfsr_32(uint32_t lfsr)
			
 
				+{
			
 
				+	const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
			
 
				+	return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Make sure there's real data dependency to RAM (when read
			
 
				+ * accesses are enabled), so the compiler, the CPU and the
			
 
				+ * kernel (KSM, zero page, etc.) cannot optimize away RAM
			
 
				+ * accesses:
			
 
				+ */
			
 
				+static inline u64 access_data(u64 *data __attribute__((unused)), u64 val)
			
 
				+{
			
 
				+	if (g->p.data_reads)
			
 
				+		val += *data;
			
 
				+	if (g->p.data_writes)
			
 
				+		*data = val + 1;
			
 
				+	return val;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The worker process does two types of work, a forwards going
			
 
				+ * loop and a backwards going loop.
			
 
				+ *
			
 
				+ * We do this so that on multiprocessor systems we do not create
			
 
				+ * a 'train' of processing, with highly synchronized processes,
			
 
				+ * skewing the whole benchmark.
			
 
				+ */
			
 
				+static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val)
			
 
				+{
			
 
				+	long words = bytes/sizeof(u64);
			
 
				+	u64 *data = (void *)__data;
			
 
				+	long chunk_0, chunk_1;
			
 
				+	u64 *d0, *d, *d1;
			
 
				+	long off;
			
 
				+	long i;
			
 
				+
			
 
				+	BUG_ON(!data && words);
			
 
				+	BUG_ON(data && !words);
			
 
				+
			
 
				+	if (!data)
			
 
				+		return val;
			
 
				+
			
 
				+	/* Very simple memset() work variant: */
			
 
				+	if (g->p.data_zero_memset && !g->p.data_rand_walk) {
			
 
				+		bzero(data, bytes);
			
 
				+		return val;
			
 
				+	}
			
 
				+
			
 
				+	/* Spread out by PID/TID nr and by loop nr: */
			
 
				+	chunk_0 = words/nr_max;
			
 
				+	chunk_1 = words/g->p.nr_loops;
			
 
				+	off = nr*chunk_0 + loop*chunk_1;
			
 
				+
			
 
				+	while (off >= words)
			
 
				+		off -= words;
			
 
				+
			
 
				+	if (g->p.data_rand_walk) {
			
 
				+		u32 lfsr = nr + loop + val;
			
 
				+		int j;
			
 
				+
			
 
				+		for (i = 0; i < words/1024; i++) {
			
 
				+			long start, end;
			
 
				+
			
 
				+			lfsr = lfsr_32(lfsr);
			
 
				+
			
 
				+			start = lfsr % words;
			
 
				+			end = min(start + 1024, words-1);
			
 
				+
			
 
				+			if (g->p.data_zero_memset) {
			
 
				+				bzero(data + start, (end-start) * sizeof(u64));
			
 
				+			} else {
			
 
				+				for (j = start; j < end; j++)
			
 
				+					val = access_data(data + j, val);
			
 
				+			}
			
 
				+		}
			
 
				+	} else if (!g->p.data_backwards || (nr + loop) & 1) {
			
 
				+
			
 
				+		d0 = data + off;
			
 
				+		d  = data + off + 1;
			
 
				+		d1 = data + words;
			
 
				+
			
 
				+		/* Process data forwards: */
			
 
				+		for (;;) {
			
 
				+			if (unlikely(d >= d1))
			
 
				+				d = data;
			
 
				+			if (unlikely(d == d0))
			
 
				+				break;
			
 
				+
			
 
				+			val = access_data(d, val);
			
 
				+
			
 
				+			d++;
			
 
				+		}
			
 
				+	} else {
			
 
				+		/* Process data backwards: */
			
 
				+
			
 
				+		d0 = data + off;
			
 
				+		d  = data + off - 1;
			
 
				+		d1 = data + words;
			
 
				+
			
 
				+		/* Process data forwards: */
			
 
				+		for (;;) {
			
 
				+			if (unlikely(d < data))
			
 
				+				d = data + words-1;
			
 
				+			if (unlikely(d == d0))
			
 
				+				break;
			
 
				+
			
 
				+			val = access_data(d, val);
			
 
				+
			
 
				+			d--;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return val;
			
 
				+}
			
 
				+
			
 
				+static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
			
 
				+{
			
 
				+	unsigned int cpu;
			
 
				+
			
 
				+	cpu = sched_getcpu();
			
 
				+
			
 
				+	g->threads[task_nr].curr_cpu = cpu;
			
 
				+	prctl(0, bytes_worked);
			
 
				+}
			
 
				+
			
 
				+#define MAX_NR_NODES	64
			
 
				+
			
 
				+/*
			
 
				+ * Count the number of nodes a process's threads
			
 
				+ * are spread out on.
			
 
				+ *
			
 
				+ * A count of 1 means that the process is compressed
			
 
				+ * to a single node. A count of g->p.nr_nodes means it's
			
 
				+ * spread out on the whole system.
			
 
				+ */
			
 
				+static int count_process_nodes(int process_nr)
			
 
				+{
			
 
				+	char node_present[MAX_NR_NODES] = { 0, };
			
 
				+	int nodes;
			
 
				+	int n, t;
			
 
				+
			
 
				+	for (t = 0; t < g->p.nr_threads; t++) {
			
 
				+		struct thread_data *td;
			
 
				+		int task_nr;
			
 
				+		int node;
			
 
				+
			
 
				+		task_nr = process_nr*g->p.nr_threads + t;
			
 
				+		td = g->threads + task_nr;
			
 
				+
			
 
				+		node = numa_node_of_cpu(td->curr_cpu);
			
 
				+		node_present[node] = 1;
			
 
				+	}
			
 
				+
			
 
				+	nodes = 0;
			
 
				+
			
 
				+	for (n = 0; n < MAX_NR_NODES; n++)
			
 
				+		nodes += node_present[n];
			
 
				+
			
 
				+	return nodes;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Count the number of distinct process-threads a node contains.
			
 
				+ *
			
 
				+ * A count of 1 means that the node contains only a single
			
 
				+ * process. If all nodes on the system contain at most one
			
 
				+ * process then we are well-converged.
			
 
				+ */
			
 
				+static int count_node_processes(int node)
			
 
				+{
			
 
				+	int processes = 0;
			
 
				+	int t, p;
			
 
				+
			
 
				+	for (p = 0; p < g->p.nr_proc; p++) {
			
 
				+		for (t = 0; t < g->p.nr_threads; t++) {
			
 
				+			struct thread_data *td;
			
 
				+			int task_nr;
			
 
				+			int n;
			
 
				+
			
 
				+			task_nr = p*g->p.nr_threads + t;
			
 
				+			td = g->threads + task_nr;
			
 
				+
			
 
				+			n = numa_node_of_cpu(td->curr_cpu);
			
 
				+			if (n == node) {
			
 
				+				processes++;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return processes;
			
 
				+}
			
 
				+
			
 
				+static void calc_convergence_compression(int *strong)
			
 
				+{
			
 
				+	unsigned int nodes_min, nodes_max;
			
 
				+	int p;
			
 
				+
			
 
				+	nodes_min = -1;
			
 
				+	nodes_max =  0;
			
 
				+
			
 
				+	for (p = 0; p < g->p.nr_proc; p++) {
			
 
				+		unsigned int nodes = count_process_nodes(p);
			
 
				+
			
 
				+		nodes_min = min(nodes, nodes_min);
			
 
				+		nodes_max = max(nodes, nodes_max);
			
 
				+	}
			
 
				+
			
 
				+	/* Strong convergence: all threads compress on a single node: */
			
 
				+	if (nodes_min == 1 && nodes_max == 1) {
			
 
				+		*strong = 1;
			
 
				+	} else {
			
 
				+		*strong = 0;
			
 
				+		tprintf(" {%d-%d}", nodes_min, nodes_max);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void calc_convergence(double runtime_ns_max, double *convergence)
			
 
				+{
			
 
				+	unsigned int loops_done_min, loops_done_max;
			
 
				+	int process_groups;
			
 
				+	int nodes[MAX_NR_NODES];
			
 
				+	int distance;
			
 
				+	int nr_min;
			
 
				+	int nr_max;
			
 
				+	int strong;
			
 
				+	int sum;
			
 
				+	int nr;
			
 
				+	int node;
			
 
				+	int cpu;
			
 
				+	int t;
			
 
				+
			
 
				+	if (!g->p.show_convergence && !g->p.measure_convergence)
			
 
				+		return;
			
 
				+
			
 
				+	for (node = 0; node < g->p.nr_nodes; node++)
			
 
				+		nodes[node] = 0;
			
 
				+
			
 
				+	loops_done_min = -1;
			
 
				+	loops_done_max = 0;
			
 
				+
			
 
				+	for (t = 0; t < g->p.nr_tasks; t++) {
			
 
				+		struct thread_data *td = g->threads + t;
			
 
				+		unsigned int loops_done;
			
 
				+
			
 
				+		cpu = td->curr_cpu;
			
 
				+
			
 
				+		/* Not all threads have written it yet: */
			
 
				+		if (cpu < 0)
			
 
				+			continue;
			
 
				+
			
 
				+		node = numa_node_of_cpu(cpu);
			
 
				+
			
 
				+		nodes[node]++;
			
 
				+
			
 
				+		loops_done = td->loops_done;
			
 
				+		loops_done_min = min(loops_done, loops_done_min);
			
 
				+		loops_done_max = max(loops_done, loops_done_max);
			
 
				+	}
			
 
				+
			
 
				+	nr_max = 0;
			
 
				+	nr_min = g->p.nr_tasks;
			
 
				+	sum = 0;
			
 
				+
			
 
				+	for (node = 0; node < g->p.nr_nodes; node++) {
			
 
				+		nr = nodes[node];
			
 
				+		nr_min = min(nr, nr_min);
			
 
				+		nr_max = max(nr, nr_max);
			
 
				+		sum += nr;
			
 
				+	}
			
 
				+	BUG_ON(nr_min > nr_max);
			
 
				+
			
 
				+	BUG_ON(sum > g->p.nr_tasks);
			
 
				+
			
 
				+	if (0 && (sum < g->p.nr_tasks))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * Count the number of distinct process groups present
			
 
				+	 * on nodes - when we are converged this will decrease
			
 
				+	 * to g->p.nr_proc:
			
 
				+	 */
			
 
				+	process_groups = 0;
			
 
				+
			
 
				+	for (node = 0; node < g->p.nr_nodes; node++) {
			
 
				+		int processes = count_node_processes(node);
			
 
				+
			
 
				+		nr = nodes[node];
			
 
				+		tprintf(" %2d/%-2d", nr, processes);
			
 
				+
			
 
				+		process_groups += processes;
			
 
				+	}
			
 
				+
			
 
				+	distance = nr_max - nr_min;
			
 
				+
			
 
				+	tprintf(" [%2d/%-2d]", distance, process_groups);
			
 
				+
			
 
				+	tprintf(" l:%3d-%-3d (%3d)",
			
 
				+		loops_done_min, loops_done_max, loops_done_max-loops_done_min);
			
 
				+
			
 
				+	if (loops_done_min && loops_done_max) {
			
 
				+		double skew = 1.0 - (double)loops_done_min/loops_done_max;
			
 
				+
			
 
				+		tprintf(" [%4.1f%%]", skew * 100.0);
			
 
				+	}
			
 
				+
			
 
				+	calc_convergence_compression(&strong);
			
 
				+
			
 
				+	if (strong && process_groups == g->p.nr_proc) {
			
 
				+		if (!*convergence) {
			
 
				+			*convergence = runtime_ns_max;
			
 
				+			tprintf(" (%6.1fs converged)\n", *convergence/1e9);
			
 
				+			if (g->p.measure_convergence) {
			
 
				+				g->all_converged = true;
			
 
				+				g->stop_work = true;
			
 
				+			}
			
 
				+		}
			
 
				+	} else {
			
 
				+		if (*convergence) {
			
 
				+			tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9);
			
 
				+			*convergence = 0;
			
 
				+		}
			
 
				+		tprintf("\n");
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void show_summary(double runtime_ns_max, int l, double *convergence)
			
 
				+{
			
 
				+	tprintf("\r #  %5.1f%%  [%.1f mins]",
			
 
				+		(double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0);
			
 
				+
			
 
				+	calc_convergence(runtime_ns_max, convergence);
			
 
				+
			
 
				+	if (g->p.show_details >= 0)
			
 
				+		fflush(stdout);
			
 
				+}
			
 
				+
			
 
				+static void *worker_thread(void *__tdata)
			
 
				+{
			
 
				+	struct thread_data *td = __tdata;
			
 
				+	struct timeval start0, start, stop, diff;
			
 
				+	int process_nr = td->process_nr;
			
 
				+	int thread_nr = td->thread_nr;
			
 
				+	unsigned long last_perturbance;
			
 
				+	int task_nr = td->task_nr;
			
 
				+	int details = g->p.show_details;
			
 
				+	int first_task, last_task;
			
 
				+	double convergence = 0;
			
 
				+	u64 val = td->val;
			
 
				+	double runtime_ns_max;
			
 
				+	u8 *global_data;
			
 
				+	u8 *process_data;
			
 
				+	u8 *thread_data;
			
 
				+	u64 bytes_done;
			
 
				+	long work_done;
			
 
				+	u32 l;
			
 
				+
			
 
				+	bind_to_cpumask(td->bind_cpumask);
			
 
				+	bind_to_memnode(td->bind_node);
			
 
				+
			
 
				+	set_taskname("thread %d/%d", process_nr, thread_nr);
			
 
				+
			
 
				+	global_data = g->data;
			
 
				+	process_data = td->process_data;
			
 
				+	thread_data = setup_private_data(g->p.bytes_thread);
			
 
				+
			
 
				+	bytes_done = 0;
			
 
				+
			
 
				+	last_task = 0;
			
 
				+	if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1)
			
 
				+		last_task = 1;
			
 
				+
			
 
				+	first_task = 0;
			
 
				+	if (process_nr == 0 && thread_nr == 0)
			
 
				+		first_task = 1;
			
 
				+
			
 
				+	if (details >= 2) {
			
 
				+		printf("#  thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n",
			
 
				+			process_nr, thread_nr, global_data, process_data, thread_data);
			
 
				+	}
			
 
				+
			
 
				+	if (g->p.serialize_startup) {
			
 
				+		pthread_mutex_lock(&g->startup_mutex);
			
 
				+		g->nr_tasks_started++;
			
 
				+		pthread_mutex_unlock(&g->startup_mutex);
			
 
				+
			
 
				+		/* Here we will wait for the main process to start us all at once: */
			
 
				+		pthread_mutex_lock(&g->start_work_mutex);
			
 
				+		g->nr_tasks_working++;
			
 
				+
			
 
				+		/* Last one wake the main process: */
			
 
				+		if (g->nr_tasks_working == g->p.nr_tasks)
			
 
				+			pthread_mutex_unlock(&g->startup_done_mutex);
			
 
				+
			
 
				+		pthread_mutex_unlock(&g->start_work_mutex);
			
 
				+	}
			
 
				+
			
 
				+	gettimeofday(&start0, NULL);
			
 
				+
			
 
				+	start = stop = start0;
			
 
				+	last_perturbance = start.tv_sec;
			
 
				+
			
 
				+	for (l = 0; l < g->p.nr_loops; l++) {
			
 
				+		start = stop;
			
 
				+
			
 
				+		if (g->stop_work)
			
 
				+			break;
			
 
				+
			
 
				+		val += do_work(global_data,  g->p.bytes_global,  process_nr, g->p.nr_proc,	l, val);
			
 
				+		val += do_work(process_data, g->p.bytes_process, thread_nr,  g->p.nr_threads,	l, val);
			
 
				+		val += do_work(thread_data,  g->p.bytes_thread,  0,          1,		l, val);
			
 
				+
			
 
				+		if (g->p.sleep_usecs) {
			
 
				+			pthread_mutex_lock(td->process_lock);
			
 
				+			usleep(g->p.sleep_usecs);
			
 
				+			pthread_mutex_unlock(td->process_lock);
			
 
				+		}
			
 
				+		/*
			
 
				+		 * Amount of work to be done under a process-global lock:
			
 
				+		 */
			
 
				+		if (g->p.bytes_process_locked) {
			
 
				+			pthread_mutex_lock(td->process_lock);
			
 
				+			val += do_work(process_data, g->p.bytes_process_locked, thread_nr,  g->p.nr_threads,	l, val);
			
 
				+			pthread_mutex_unlock(td->process_lock);
			
 
				+		}
			
 
				+
			
 
				+		work_done = g->p.bytes_global + g->p.bytes_process +
			
 
				+			    g->p.bytes_process_locked + g->p.bytes_thread;
			
 
				+
			
 
				+		update_curr_cpu(task_nr, work_done);
			
 
				+		bytes_done += work_done;
			
 
				+
			
 
				+		if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs)
			
 
				+			continue;
			
 
				+
			
 
				+		td->loops_done = l;
			
 
				+
			
 
				+		gettimeofday(&stop, NULL);
			
 
				+
			
 
				+		/* Check whether our max runtime timed out: */
			
 
				+		if (g->p.nr_secs) {
			
 
				+			timersub(&stop, &start0, &diff);
			
 
				+			if (diff.tv_sec >= g->p.nr_secs) {
			
 
				+				g->stop_work = true;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		/* Update the summary at most once per second: */
			
 
				+		if (start.tv_sec == stop.tv_sec)
			
 
				+			continue;
			
 
				+
			
 
				+		/*
			
 
				+		 * Perturb the first task's equilibrium every g->p.perturb_secs seconds,
			
 
				+		 * by migrating to CPU#0:
			
 
				+		 */
			
 
				+		if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
			
 
				+			cpu_set_t orig_mask;
			
 
				+			int target_cpu;
			
 
				+			int this_cpu;
			
 
				+
			
 
				+			last_perturbance = stop.tv_sec;
			
 
				+
			
 
				+			/*
			
 
				+			 * Depending on where we are running, move into
			
 
				+			 * the other half of the system, to create some
			
 
				+			 * real disturbance:
			
 
				+			 */
			
 
				+			this_cpu = g->threads[task_nr].curr_cpu;
			
 
				+			if (this_cpu < g->p.nr_cpus/2)
			
 
				+				target_cpu = g->p.nr_cpus-1;
			
 
				+			else
			
 
				+				target_cpu = 0;
			
 
				+
			
 
				+			orig_mask = bind_to_cpu(target_cpu);
			
 
				+
			
 
				+			/* Here we are running on the target CPU already */
			
 
				+			if (details >= 1)
			
 
				+				printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
			
 
				+
			
 
				+			bind_to_cpumask(orig_mask);
			
 
				+		}
			
 
				+
			
 
				+		if (details >= 3) {
			
 
				+			timersub(&stop, &start, &diff);
			
 
				+			runtime_ns_max = diff.tv_sec * 1000000000;
			
 
				+			runtime_ns_max += diff.tv_usec * 1000;
			
 
				+
			
 
				+			if (details >= 0) {
			
 
				+				printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n",
			
 
				+					process_nr, thread_nr, runtime_ns_max / bytes_done, val);
			
 
				+			}
			
 
				+			fflush(stdout);
			
 
				+		}
			
 
				+		if (!last_task)
			
 
				+			continue;
			
 
				+
			
 
				+		timersub(&stop, &start0, &diff);
			
 
				+		runtime_ns_max = diff.tv_sec * 1000000000ULL;
			
 
				+		runtime_ns_max += diff.tv_usec * 1000ULL;
			
 
				+
			
 
				+		show_summary(runtime_ns_max, l, &convergence);
			
 
				+	}
			
 
				+
			
 
				+	gettimeofday(&stop, NULL);
			
 
				+	timersub(&stop, &start0, &diff);
			
 
				+	td->runtime_ns = diff.tv_sec * 1000000000ULL;
			
 
				+	td->runtime_ns += diff.tv_usec * 1000ULL;
			
 
				+
			
 
				+	free_data(thread_data, g->p.bytes_thread);
			
 
				+
			
 
				+	pthread_mutex_lock(&g->stop_work_mutex);
			
 
				+	g->bytes_done += bytes_done;
			
 
				+	pthread_mutex_unlock(&g->stop_work_mutex);
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * A worker process starts a couple of threads:
			
 
				+ */
			
 
				+static void worker_process(int process_nr)
			
 
				+{
			
 
				+	pthread_mutex_t process_lock;
			
 
				+	struct thread_data *td;
			
 
				+	pthread_t *pthreads;
			
 
				+	u8 *process_data;
			
 
				+	int task_nr;
			
 
				+	int ret;
			
 
				+	int t;
			
 
				+
			
 
				+	pthread_mutex_init(&process_lock, NULL);
			
 
				+	set_taskname("process %d", process_nr);
			
 
				+
			
 
				+	/*
			
 
				+	 * Pick up the memory policy and the CPU binding of our first thread,
			
 
				+	 * so that we initialize memory accordingly:
			
 
				+	 */
			
 
				+	task_nr = process_nr*g->p.nr_threads;
			
 
				+	td = g->threads + task_nr;
			
 
				+
			
 
				+	bind_to_memnode(td->bind_node);
			
 
				+	bind_to_cpumask(td->bind_cpumask);
			
 
				+
			
 
				+	pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t));
			
 
				+	process_data = setup_private_data(g->p.bytes_process);
			
 
				+
			
 
				+	if (g->p.show_details >= 3) {
			
 
				+		printf(" # process %2d global mem: %p, process mem: %p\n",
			
 
				+			process_nr, g->data, process_data);
			
 
				+	}
			
 
				+
			
 
				+	for (t = 0; t < g->p.nr_threads; t++) {
			
 
				+		task_nr = process_nr*g->p.nr_threads + t;
			
 
				+		td = g->threads + task_nr;
			
 
				+
			
 
				+		td->process_data = process_data;
			
 
				+		td->process_nr   = process_nr;
			
 
				+		td->thread_nr    = t;
			
 
				+		td->task_nr	 = task_nr;
			
 
				+		td->val          = rand();
			
 
				+		td->curr_cpu	 = -1;
			
 
				+		td->process_lock = &process_lock;
			
 
				+
			
 
				+		ret = pthread_create(pthreads + t, NULL, worker_thread, td);
			
 
				+		BUG_ON(ret);
			
 
				+	}
			
 
				+
			
 
				+	for (t = 0; t < g->p.nr_threads; t++) {
			
 
				+                ret = pthread_join(pthreads[t], NULL);
			
 
				+		BUG_ON(ret);
			
 
				+	}
			
 
				+
			
 
				+	free_data(process_data, g->p.bytes_process);
			
 
				+	free(pthreads);
			
 
				+}
			
 
				+
			
 
				+static void print_summary(void)
			
 
				+{
			
 
				+	if (g->p.show_details < 0)
			
 
				+		return;
			
 
				+
			
 
				+	printf("\n ###\n");
			
 
				+	printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
			
 
				+		g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus);
			
 
				+	printf(" #      %5dx %5ldMB global  shared mem operations\n",
			
 
				+			g->p.nr_loops, g->p.bytes_global/1024/1024);
			
 
				+	printf(" #      %5dx %5ldMB process shared mem operations\n",
			
 
				+			g->p.nr_loops, g->p.bytes_process/1024/1024);
			
 
				+	printf(" #      %5dx %5ldMB thread  local  mem operations\n",
			
 
				+			g->p.nr_loops, g->p.bytes_thread/1024/1024);
			
 
				+
			
 
				+	printf(" ###\n");
			
 
				+
			
 
				+	printf("\n ###\n"); fflush(stdout);
			
 
				+}
			
 
				+
			
 
				+static void init_thread_data(void)
			
 
				+{
			
 
				+	ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
			
 
				+	int t;
			
 
				+
			
 
				+	g->threads = zalloc_shared_data(size);
			
 
				+
			
 
				+	for (t = 0; t < g->p.nr_tasks; t++) {
			
 
				+		struct thread_data *td = g->threads + t;
			
 
				+		int cpu;
			
 
				+
			
 
				+		/* Allow all nodes by default: */
			
 
				+		td->bind_node = -1;
			
 
				+
			
 
				+		/* Allow all CPUs by default: */
			
 
				+		CPU_ZERO(&td->bind_cpumask);
			
 
				+		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
			
 
				+			CPU_SET(cpu, &td->bind_cpumask);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void deinit_thread_data(void)
			
 
				+{
			
 
				+	ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
			
 
				+
			
 
				+	free_data(g->threads, size);
			
 
				+}
			
 
				+
			
 
				+static int init(void)
			
 
				+{
			
 
				+	g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0);
			
 
				+
			
 
				+	/* Copy over options: */
			
 
				+	g->p = p0;
			
 
				+
			
 
				+	g->p.nr_cpus = numa_num_configured_cpus();
			
 
				+
			
 
				+	g->p.nr_nodes = numa_max_node() + 1;
			
 
				+
			
 
				+	/* char array in count_process_nodes(): */
			
 
				+	BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
			
 
				+
			
 
				+	if (g->p.show_quiet && !g->p.show_details)
			
 
				+		g->p.show_details = -1;
			
 
				+
			
 
				+	/* Some memory should be specified: */
			
 
				+	if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str)
			
 
				+		return -1;
			
 
				+
			
 
				+	if (g->p.mb_global_str) {
			
 
				+		g->p.mb_global = atof(g->p.mb_global_str);
			
 
				+		BUG_ON(g->p.mb_global < 0);
			
 
				+	}
			
 
				+
			
 
				+	if (g->p.mb_proc_str) {
			
 
				+		g->p.mb_proc = atof(g->p.mb_proc_str);
			
 
				+		BUG_ON(g->p.mb_proc < 0);
			
 
				+	}
			
 
				+
			
 
				+	if (g->p.mb_proc_locked_str) {
			
 
				+		g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str);
			
 
				+		BUG_ON(g->p.mb_proc_locked < 0);
			
 
				+		BUG_ON(g->p.mb_proc_locked > g->p.mb_proc);
			
 
				+	}
			
 
				+
			
 
				+	if (g->p.mb_thread_str) {
			
 
				+		g->p.mb_thread = atof(g->p.mb_thread_str);
			
 
				+		BUG_ON(g->p.mb_thread < 0);
			
 
				+	}
			
 
				+
			
 
				+	BUG_ON(g->p.nr_threads <= 0);
			
 
				+	BUG_ON(g->p.nr_proc <= 0);
			
 
				+
			
 
				+	g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads;
			
 
				+
			
 
				+	g->p.bytes_global		= g->p.mb_global	*1024L*1024L;
			
 
				+	g->p.bytes_process		= g->p.mb_proc		*1024L*1024L;
			
 
				+	g->p.bytes_process_locked	= g->p.mb_proc_locked	*1024L*1024L;
			
 
				+	g->p.bytes_thread		= g->p.mb_thread	*1024L*1024L;
			
 
				+
			
 
				+	g->data = setup_shared_data(g->p.bytes_global);
			
 
				+
			
 
				+	/* Startup serialization: */
			
 
				+	init_global_mutex(&g->start_work_mutex);
			
 
				+	init_global_mutex(&g->startup_mutex);
			
 
				+	init_global_mutex(&g->startup_done_mutex);
			
 
				+	init_global_mutex(&g->stop_work_mutex);
			
 
				+
			
 
				+	init_thread_data();
			
 
				+
			
 
				+	tprintf("#\n");
			
 
				+	parse_setup_cpu_list();
			
 
				+	parse_setup_node_list();
			
 
				+	tprintf("#\n");
			
 
				+
			
 
				+	print_summary();
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void deinit(void)
			
 
				+{
			
 
				+	free_data(g->data, g->p.bytes_global);
			
 
				+	g->data = NULL;
			
 
				+
			
 
				+	deinit_thread_data();
			
 
				+
			
 
				+	free_data(g, sizeof(*g));
			
 
				+	g = NULL;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Print a short or long result, depending on the verbosity setting:
			
 
				+ */
			
 
				+static void print_res(const char *name, double val,
			
 
				+		      const char *txt_unit, const char *txt_short, const char *txt_long)
			
 
				+{
			
 
				+	if (!name)
			
 
				+		name = "main,";
			
 
				+
			
 
				+	if (g->p.show_quiet)
			
 
				+		printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
			
 
				+	else
			
 
				+		printf(" %14.3f %s\n", val, txt_long);
			
 
				+}
			
 
				+
			
 
				+static int __bench_numa(const char *name)
			
 
				+{
			
 
				+	struct timeval start, stop, diff;
			
 
				+	u64 runtime_ns_min, runtime_ns_sum;
			
 
				+	pid_t *pids, pid, wpid;
			
 
				+	double delta_runtime;
			
 
				+	double runtime_avg;
			
 
				+	double runtime_sec_max;
			
 
				+	double runtime_sec_min;
			
 
				+	int wait_stat;
			
 
				+	double bytes;
			
 
				+	int i, t;
			
 
				+
			
 
				+	if (init())
			
 
				+		return -1;
			
 
				+
			
 
				+	pids = zalloc(g->p.nr_proc * sizeof(*pids));
			
 
				+	pid = -1;
			
 
				+
			
 
				+	/* All threads try to acquire it, this way we can wait for them to start up: */
			
 
				+	pthread_mutex_lock(&g->start_work_mutex);
			
 
				+
			
 
				+	if (g->p.serialize_startup) {
			
 
				+		tprintf(" #\n");
			
 
				+		tprintf(" # Startup synchronization: ..."); fflush(stdout);
			
 
				+	}
			
 
				+
			
 
				+	gettimeofday(&start, NULL);
			
 
				+
			
 
				+	for (i = 0; i < g->p.nr_proc; i++) {
			
 
				+		pid = fork();
			
 
				+		dprintf(" # process %2d: PID %d\n", i, pid);
			
 
				+
			
 
				+		BUG_ON(pid < 0);
			
 
				+		if (!pid) {
			
 
				+			/* Child process: */
			
 
				+			worker_process(i);
			
 
				+
			
 
				+			exit(0);
			
 
				+		}
			
 
				+		pids[i] = pid;
			
 
				+
			
 
				+	}
			
 
				+	/* Wait for all the threads to start up: */
			
 
				+	while (g->nr_tasks_started != g->p.nr_tasks)
			
 
				+		usleep(1000);
			
 
				+
			
 
				+	BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
			
 
				+
			
 
				+	if (g->p.serialize_startup) {
			
 
				+		double startup_sec;
			
 
				+
			
 
				+		pthread_mutex_lock(&g->startup_done_mutex);
			
 
				+
			
 
				+		/* This will start all threads: */
			
 
				+		pthread_mutex_unlock(&g->start_work_mutex);
			
 
				+
			
 
				+		/* This mutex is locked - the last started thread will wake us: */
			
 
				+		pthread_mutex_lock(&g->startup_done_mutex);
			
 
				+
			
 
				+		gettimeofday(&stop, NULL);
			
 
				+
			
 
				+		timersub(&stop, &start, &diff);
			
 
				+
			
 
				+		startup_sec = diff.tv_sec * 1000000000.0;
			
 
				+		startup_sec += diff.tv_usec * 1000.0;
			
 
				+		startup_sec /= 1e9;
			
 
				+
			
 
				+		tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
			
 
				+		tprintf(" #\n");
			
 
				+
			
 
				+		start = stop;
			
 
				+		pthread_mutex_unlock(&g->startup_done_mutex);
			
 
				+	} else {
			
 
				+		gettimeofday(&start, NULL);
			
 
				+	}
			
 
				+
			
 
				+	/* Parent process: */
			
 
				+
			
 
				+
			
 
				+	for (i = 0; i < g->p.nr_proc; i++) {
			
 
				+		wpid = waitpid(pids[i], &wait_stat, 0);
			
 
				+		BUG_ON(wpid < 0);
			
 
				+		BUG_ON(!WIFEXITED(wait_stat));
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	runtime_ns_sum = 0;
			
 
				+	runtime_ns_min = -1LL;
			
 
				+
			
 
				+	for (t = 0; t < g->p.nr_tasks; t++) {
			
 
				+		u64 thread_runtime_ns = g->threads[t].runtime_ns;
			
 
				+
			
 
				+		runtime_ns_sum += thread_runtime_ns;
			
 
				+		runtime_ns_min = min(thread_runtime_ns, runtime_ns_min);
			
 
				+	}
			
 
				+
			
 
				+	gettimeofday(&stop, NULL);
			
 
				+	timersub(&stop, &start, &diff);
			
 
				+
			
 
				+	BUG_ON(bench_format != BENCH_FORMAT_DEFAULT);
			
 
				+
			
 
				+	tprintf("\n ###\n");
			
 
				+	tprintf("\n");
			
 
				+
			
 
				+	runtime_sec_max = diff.tv_sec * 1000000000.0;
			
 
				+	runtime_sec_max += diff.tv_usec * 1000.0;
			
 
				+	runtime_sec_max /= 1e9;
			
 
				+
			
 
				+	runtime_sec_min = runtime_ns_min/1e9;
			
 
				+
			
 
				+	bytes = g->bytes_done;
			
 
				+	runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9;
			
 
				+
			
 
				+	if (g->p.measure_convergence) {
			
 
				+		print_res(name, runtime_sec_max,
			
 
				+			"secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge");
			
 
				+	}
			
 
				+
			
 
				+	print_res(name, runtime_sec_max,
			
 
				+		"secs,", "runtime-max/thread",	"secs slowest (max) thread-runtime");
			
 
				+
			
 
				+	print_res(name, runtime_sec_min,
			
 
				+		"secs,", "runtime-min/thread",	"secs fastest (min) thread-runtime");
			
 
				+
			
 
				+	print_res(name, runtime_avg,
			
 
				+		"secs,", "runtime-avg/thread",	"secs average thread-runtime");
			
 
				+
			
 
				+	delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0;
			
 
				+	print_res(name, delta_runtime / runtime_sec_max * 100.0,
			
 
				+		"%,", "spread-runtime/thread",	"% difference between max/avg runtime");
			
 
				+
			
 
				+	print_res(name, bytes / g->p.nr_tasks / 1e9,
			
 
				+		"GB,", "data/thread",		"GB data processed, per thread");
			
 
				+
			
 
				+	print_res(name, bytes / 1e9,
			
 
				+		"GB,", "data-total",		"GB data processed, total");
			
 
				+
			
 
				+	print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks),
			
 
				+		"nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
			
 
				+
			
 
				+	print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
			
 
				+		"GB/sec,", "thread-speed",	"GB/sec/thread speed");
			
 
				+
			
 
				+	print_res(name, bytes / runtime_sec_max / 1e9,
			
 
				+		"GB/sec,", "total-speed",	"GB/sec total speed");
			
 
				+
			
 
				+	free(pids);
			
 
				+
			
 
				+	deinit();
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#define MAX_ARGS 50
			
 
				+
			
 
				+static int command_size(const char **argv)
			
 
				+{
			
 
				+	int size = 0;
			
 
				+
			
 
				+	while (*argv) {
			
 
				+		size++;
			
 
				+		argv++;
			
 
				+	}
			
 
				+
			
 
				+	BUG_ON(size >= MAX_ARGS);
			
 
				+
			
 
				+	return size;
			
 
				+}
			
 
				+
			
 
				+static void init_params(struct params *p, const char *name, int argc, const char **argv)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	printf("\n # Running %s \"perf bench numa", name);
			
 
				+
			
 
				+	for (i = 0; i < argc; i++)
			
 
				+		printf(" %s", argv[i]);
			
 
				+
			
 
				+	printf("\"\n");
			
 
				+
			
 
				+	memset(p, 0, sizeof(*p));
			
 
				+
			
 
				+	/* Initialize nonzero defaults: */
			
 
				+
			
 
				+	p->serialize_startup		= 1;
			
 
				+	p->data_reads			= true;
			
 
				+	p->data_writes			= true;
			
 
				+	p->data_backwards		= true;
			
 
				+	p->data_rand_walk		= true;
			
 
				+	p->nr_loops			= -1;
			
 
				+	p->init_random			= true;
			
 
				+}
			
 
				+
			
 
				+static int run_bench_numa(const char *name, const char **argv)
			
 
				+{
			
 
				+	int argc = command_size(argv);
			
 
				+
			
 
				+	init_params(&p0, name, argc, argv);
			
 
				+	argc = parse_options(argc, argv, options, bench_numa_usage, 0);
			
 
				+	if (argc)
			
 
				+		goto err;
			
 
				+
			
 
				+	if (__bench_numa(name))
			
 
				+		goto err;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+err:
			
 
				+	usage_with_options(numa_usage, options);
			
 
				+	return -1;
			
 
				+}
			
 
				+
			
 
				+#define OPT_BW_RAM		"-s",  "20", "-zZq",    "--thp", " 1", "--no-data_rand_walk"
			
 
				+#define OPT_BW_RAM_NOTHP	OPT_BW_RAM,		"--thp", "-1"
			
 
				+
			
 
				+#define OPT_CONV		"-s", "100", "-zZ0qcm", "--thp", " 1"
			
 
				+#define OPT_CONV_NOTHP		OPT_CONV,		"--thp", "-1"
			
 
				+
			
 
				+#define OPT_BW			"-s",  "20", "-zZ0q",   "--thp", " 1"
			
 
				+#define OPT_BW_NOTHP		OPT_BW,			"--thp", "-1"
			
 
				+
			
 
				+/*
			
 
				+ * The built-in test-suite executed by "perf bench numa -a".
			
 
				+ *
			
 
				+ * (A minimum of 4 nodes and 16 GB of RAM is recommended.)
			
 
				+ */
			
 
				+static const char *tests[][MAX_ARGS] = {
			
 
				+   /* Basic single-stream NUMA bandwidth measurements: */
			
 
				+   { "RAM-bw-local,",	  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
			
 
				+			  "-C" ,   "0", "-M",   "0", OPT_BW_RAM },
			
 
				+   { "RAM-bw-local-NOTHP,",
			
 
				+			  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
			
 
				+			  "-C" ,   "0", "-M",   "0", OPT_BW_RAM_NOTHP },
			
 
				+   { "RAM-bw-remote,",	  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
			
 
				+			  "-C" ,   "0", "-M",   "1", OPT_BW_RAM },
			
 
				+
			
 
				+   /* 2-stream NUMA bandwidth measurements: */
			
 
				+   { "RAM-bw-local-2x,",  "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
			
 
				+			   "-C", "0,2", "-M", "0x2", OPT_BW_RAM },
			
 
				+   { "RAM-bw-remote-2x,", "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
			
 
				+		 	   "-C", "0,2", "-M", "1x2", OPT_BW_RAM },
			
 
				+
			
 
				+   /* Cross-stream NUMA bandwidth measurement: */
			
 
				+   { "RAM-bw-cross,",     "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
			
 
				+		 	   "-C", "0,8", "-M", "1,0", OPT_BW_RAM },
			
 
				+
			
 
				+   /* Convergence latency measurements: */
			
 
				+   { " 1x3-convergence,", "mem",  "-p",  "1", "-t",  "3", "-P",  "512", OPT_CONV },
			
 
				+   { " 1x4-convergence,", "mem",  "-p",  "1", "-t",  "4", "-P",  "512", OPT_CONV },
			
 
				+   { " 1x6-convergence,", "mem",  "-p",  "1", "-t",  "6", "-P", "1020", OPT_CONV },
			
 
				+   { " 2x3-convergence,", "mem",  "-p",  "3", "-t",  "3", "-P", "1020", OPT_CONV },
			
 
				+   { " 3x3-convergence,", "mem",  "-p",  "3", "-t",  "3", "-P", "1020", OPT_CONV },
			
 
				+   { " 4x4-convergence,", "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_CONV },
			
 
				+   { " 4x4-convergence-NOTHP,",
			
 
				+			  "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_CONV_NOTHP },
			
 
				+   { " 4x6-convergence,", "mem",  "-p",  "4", "-t",  "6", "-P", "1020", OPT_CONV },
			
 
				+   { " 4x8-convergence,", "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_CONV },
			
 
				+   { " 8x4-convergence,", "mem",  "-p",  "8", "-t",  "4", "-P",  "512", OPT_CONV },
			
 
				+   { " 8x4-convergence-NOTHP,",
			
 
				+			  "mem",  "-p",  "8", "-t",  "4", "-P",  "512", OPT_CONV_NOTHP },
			
 
				+   { " 3x1-convergence,", "mem",  "-p",  "3", "-t",  "1", "-P",  "512", OPT_CONV },
			
 
				+   { " 4x1-convergence,", "mem",  "-p",  "4", "-t",  "1", "-P",  "512", OPT_CONV },
			
 
				+   { " 8x1-convergence,", "mem",  "-p",  "8", "-t",  "1", "-P",  "512", OPT_CONV },
			
 
				+   { "16x1-convergence,", "mem",  "-p", "16", "-t",  "1", "-P",  "256", OPT_CONV },
			
 
				+   { "32x1-convergence,", "mem",  "-p", "32", "-t",  "1", "-P",  "128", OPT_CONV },
			
 
				+
			
 
				+   /* Various NUMA process/thread layout bandwidth measurements: */
			
 
				+   { " 2x1-bw-process,",  "mem",  "-p",  "2", "-t",  "1", "-P", "1024", OPT_BW },
			
 
				+   { " 3x1-bw-process,",  "mem",  "-p",  "3", "-t",  "1", "-P", "1024", OPT_BW },
			
 
				+   { " 4x1-bw-process,",  "mem",  "-p",  "4", "-t",  "1", "-P", "1024", OPT_BW },
			
 
				+   { " 8x1-bw-process,",  "mem",  "-p",  "8", "-t",  "1", "-P", " 512", OPT_BW },
			
 
				+   { " 8x1-bw-process-NOTHP,",
			
 
				+			  "mem",  "-p",  "8", "-t",  "1", "-P", " 512", OPT_BW_NOTHP },
			
 
				+   { "16x1-bw-process,",  "mem",  "-p", "16", "-t",  "1", "-P",  "256", OPT_BW },
			
 
				+
			
 
				+   { " 4x1-bw-thread,",	  "mem",  "-p",  "1", "-t",  "4", "-T",  "256", OPT_BW },
			
 
				+   { " 8x1-bw-thread,",	  "mem",  "-p",  "1", "-t",  "8", "-T",  "256", OPT_BW },
			
 
				+   { "16x1-bw-thread,",   "mem",  "-p",  "1", "-t", "16", "-T",  "128", OPT_BW },
			
 
				+   { "32x1-bw-thread,",   "mem",  "-p",  "1", "-t", "32", "-T",   "64", OPT_BW },
			
 
				+
			
 
				+   { " 2x3-bw-thread,",	  "mem",  "-p",  "2", "-t",  "3", "-P",  "512", OPT_BW },
			
 
				+   { " 4x4-bw-thread,",	  "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_BW },
			
 
				+   { " 4x6-bw-thread,",	  "mem",  "-p",  "4", "-t",  "6", "-P",  "512", OPT_BW },
			
 
				+   { " 4x8-bw-thread,",	  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW },
			
 
				+   { " 4x8-bw-thread-NOTHP,",
			
 
				+			  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW_NOTHP },
			
 
				+   { " 3x3-bw-thread,",	  "mem",  "-p",  "3", "-t",  "3", "-P",  "512", OPT_BW },
			
 
				+   { " 5x5-bw-thread,",	  "mem",  "-p",  "5", "-t",  "5", "-P",  "512", OPT_BW },
			
 
				+
			
 
				+   { "2x16-bw-thread,",   "mem",  "-p",  "2", "-t", "16", "-P",  "512", OPT_BW },
			
 
				+   { "1x32-bw-thread,",   "mem",  "-p",  "1", "-t", "32", "-P", "2048", OPT_BW },
			
 
				+
			
 
				+   { "numa02-bw,",	  "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW },
			
 
				+   { "numa02-bw-NOTHP,",  "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW_NOTHP },
			
 
				+   { "numa01-bw-thread,", "mem",  "-p",  "2", "-t", "16", "-T",  "192", OPT_BW },
			
 
				+   { "numa01-bw-thread-NOTHP,",
			
 
				+			  "mem",  "-p",  "2", "-t", "16", "-T",  "192", OPT_BW_NOTHP },
			
 
				+};
			
 
				+
			
 
				+static int bench_all(void)
			
 
				+{
			
 
				+	int nr = ARRAY_SIZE(tests);
			
 
				+	int ret;
			
 
				+	int i;
			
 
				+
			
 
				+	ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'");
			
 
				+	BUG_ON(ret < 0);
			
 
				+
			
 
				+	for (i = 0; i < nr; i++) {
			
 
				+		if (run_bench_numa(tests[i][0], tests[i] + 1))
			
 
				+			return -1;
			
 
				+	}
			
 
				+
			
 
				+	printf("\n");
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int bench_numa(int argc, const char **argv, const char *prefix __maybe_unused)
			
 
				+{
			
 
				+	init_params(&p0, "main,", argc, argv);
			
 
				+	argc = parse_options(argc, argv, options, bench_numa_usage, 0);
			
 
				+	if (argc)
			
 
				+		goto err;
			
 
				+
			
 
				+	if (p0.run_all)
			
 
				+		return bench_all();
			
 
				+
			
 
				+	if (__bench_numa(NULL))
			
 
				+		goto err;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+err:
			
 
				+	usage_with_options(numa_usage, options);
			
 
				+	return -1;
			
 
				+}
			
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -34,9 +34,10 @@
 
				 
			
 
				 struct perf_annotate {
			
 
				 	struct perf_tool tool;
			
 
				-	bool	   force, use_tui, use_stdio;
			
 
				+	bool	   force, use_tui, use_stdio, use_gtk;
			
 
				 	bool	   full_paths;
			
 
				 	bool	   print_line;
			
 
				+	bool	   skip_missing;
			
 
				 	const char *sym_hist_filter;
			
 
				 	const char *cpu_list;
			
 
				 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
			
@@ -138,9 +139,22 @@ find_next:
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		if (use_browser > 0) {
			
 
				+		if (use_browser == 2) {
			
 
				+			int ret;
			
 
				+
			
 
				+			ret = hist_entry__gtk_annotate(he, evidx, NULL);
			
 
				+			if (!ret || !ann->skip_missing)
			
 
				+				return;
			
 
				+
			
 
				+			/* skip missing symbols */
			
 
				+			nd = rb_next(nd);
			
 
				+		} else if (use_browser == 1) {
			
 
				 			key = hist_entry__tui_annotate(he, evidx, NULL);
			
 
				 			switch (key) {
			
 
				+			case -1:
			
 
				+				if (!ann->skip_missing)
			
 
				+					return;
			
 
				+				/* fall through */
			
 
				 			case K_RIGHT:
			
 
				 				next = rb_next(nd);
			
 
				 				break;
			
@@ -224,6 +238,10 @@ static int __cmd_annotate(struct perf_annotate *ann)
 
				 		ui__error("The %s file has no samples!\n", session->filename);
			
 
				 		goto out_delete;
			
 
				 	}
			
 
				+
			
 
				+	if (use_browser == 2)
			
 
				+		perf_gtk__show_annotations();
			
 
				+
			
 
				 out_delete:
			
 
				 	/*
			
 
				 	 * Speed up the exit process, for large files this can
			
@@ -270,6 +288,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		    "be more verbose (show symbol address, etc)"),
			
 
				 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
			
 
				 		    "dump raw trace in ASCII"),
			
 
				+	OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
			
 
				 	OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
			
 
				 	OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
			
 
				 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
			
@@ -280,6 +299,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		    "print matching source lines (may be slow)"),
			
 
				 	OPT_BOOLEAN('P', "full-paths", &annotate.full_paths,
			
 
				 		    "Don't shorten the displayed pathnames"),
			
 
				+	OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
			
 
				+		    "Skip symbols that cannot be annotated"),
			
 
				 	OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
			
 
				 	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
			
 
				 		   "Look for files with symbols relative to this directory"),
			
@@ -300,6 +321,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		use_browser = 0;
			
 
				 	else if (annotate.use_tui)
			
 
				 		use_browser = 1;
			
 
				+	else if (annotate.use_gtk)
			
 
				+		use_browser = 2;
			
 
				 
			
 
				 	setup_browser(true);
			
 
				 
			
@@ -309,7 +332,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	if (symbol__init() < 0)
			
 
				 		return -1;
			
 
				 
			
 
				-	setup_sorting(annotate_usage, options);
			
 
				+	if (setup_sorting() < 0)
			
 
				+		usage_with_options(annotate_usage, options);
			
 
				 
			
 
				 	if (argc) {
			
 
				 		/*
			
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -35,6 +35,18 @@ struct bench_suite {
 
				 /* sentinel: easy for help */
			
 
				 #define suite_all { "all", "Test all benchmark suites", NULL }
			
 
				 
			
 
				+#ifdef LIBNUMA_SUPPORT
			
 
				+static struct bench_suite numa_suites[] = {
			
 
				+	{ "mem",
			
 
				+	  "Benchmark for NUMA workloads",
			
 
				+	  bench_numa },
			
 
				+	suite_all,
			
 
				+	{ NULL,
			
 
				+	  NULL,
			
 
				+	  NULL                  }
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				 static struct bench_suite sched_suites[] = {
			
 
				 	{ "messaging",
			
 
				 	  "Benchmark for scheduler and IPC mechanisms",
			
@@ -68,6 +80,11 @@ struct bench_subsys {
 
				 };
			
 
				 
			
 
				 static struct bench_subsys subsystems[] = {
			
 
				+#ifdef LIBNUMA_SUPPORT
			
 
				+	{ "numa",
			
 
				+	  "NUMA scheduling and MM behavior",
			
 
				+	  numa_suites },
			
 
				+#endif
			
 
				 	{ "sched",
			
 
				 	  "scheduler and IPC mechanism",
			
 
				 	  sched_suites },
			
@@ -159,6 +176,7 @@ static void all_suite(struct bench_subsys *subsys)	  /* FROM HERE */
 
				 		printf("# Running %s/%s benchmark...\n",
			
 
				 		       subsys->name,
			
 
				 		       suites[i].name);
			
 
				+		fflush(stdout);
			
 
				 
			
 
				 		argv[1] = suites[i].name;
			
 
				 		suites[i].fn(1, argv, NULL);
			
@@ -225,6 +243,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 				printf("# Running %s/%s benchmark...\n",
			
 
				 				       subsystems[i].name,
			
 
				 				       subsystems[i].suites[j].name);
			
 
				+			fflush(stdout);
			
 
				 			status = subsystems[i].suites[j].fn(argc - 1,
			
 
				 							    argv + 1, prefix);
			
 
				 			goto end;
			
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -14,6 +14,7 @@
 
				 #include "util/parse-options.h"
			
 
				 #include "util/strlist.h"
			
 
				 #include "util/build-id.h"
			
 
				+#include "util/session.h"
			
 
				 #include "util/symbol.h"
			
 
				 
			
 
				 static int build_id_cache__add_file(const char *filename, const char *debugdir)
			
@@ -58,19 +59,89 @@ static int build_id_cache__remove_file(const char *filename,
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				+static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
			
 
				+{
			
 
				+	char filename[PATH_MAX];
			
 
				+	u8 build_id[BUILD_ID_SIZE];
			
 
				+
			
 
				+	if (dso__build_id_filename(dso, filename, sizeof(filename)) &&
			
 
				+	    filename__read_build_id(filename, build_id,
			
 
				+				    sizeof(build_id)) != sizeof(build_id)) {
			
 
				+		if (errno == ENOENT)
			
 
				+			return false;
			
 
				+
			
 
				+		pr_warning("Problems with %s file, consider removing it from the cache\n", 
			
 
				+			   filename);
			
 
				+	} else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) {
			
 
				+		pr_warning("Problems with %s file, consider removing it from the cache\n", 
			
 
				+			   filename);
			
 
				+	}
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static int build_id_cache__fprintf_missing(const char *filename, bool force, FILE *fp)
			
 
				+{
			
 
				+	struct perf_session *session = perf_session__new(filename, O_RDONLY,
			
 
				+							 force, false, NULL);
			
 
				+	if (session == NULL)
			
 
				+		return -1;
			
 
				+
			
 
				+	perf_session__fprintf_dsos_buildid(session, fp, dso__missing_buildid_cache, 0);
			
 
				+	perf_session__delete(session);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int build_id_cache__update_file(const char *filename,
			
 
				+				       const char *debugdir)
			
 
				+{
			
 
				+	u8 build_id[BUILD_ID_SIZE];
			
 
				+	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
			
 
				+
			
 
				+	int err;
			
 
				+
			
 
				+	if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
			
 
				+		pr_debug("Couldn't read a build-id in %s\n", filename);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
			
 
				+	err = build_id_cache__remove_s(sbuild_id, debugdir);
			
 
				+	if (!err) {
			
 
				+		err = build_id_cache__add_s(sbuild_id, debugdir, filename,
			
 
				+					    false, false);
			
 
				+	}
			
 
				+	if (verbose)
			
 
				+		pr_info("Updating %s %s: %s\n", sbuild_id, filename,
			
 
				+			err ? "FAIL" : "Ok");
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				 int cmd_buildid_cache(int argc, const char **argv,
			
 
				 		      const char *prefix __maybe_unused)
			
 
				 {
			
 
				 	struct strlist *list;
			
 
				 	struct str_node *pos;
			
 
				+	int ret = 0;
			
 
				+	bool force = false;
			
 
				 	char debugdir[PATH_MAX];
			
 
				 	char const *add_name_list_str = NULL,
			
 
				-		   *remove_name_list_str = NULL;
			
 
				+		   *remove_name_list_str = NULL,
			
 
				+		   *missing_filename = NULL,
			
 
				+		   *update_name_list_str = NULL;
			
 
				+
			
 
				 	const struct option buildid_cache_options[] = {
			
 
				 	OPT_STRING('a', "add", &add_name_list_str,
			
 
				 		   "file list", "file(s) to add"),
			
 
				 	OPT_STRING('r', "remove", &remove_name_list_str, "file list",
			
 
				 		    "file(s) to remove"),
			
 
				+	OPT_STRING('M', "missing", &missing_filename, "file",
			
 
				+		   "to find missing build ids in the cache"),
			
 
				+	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
			
 
				+	OPT_STRING('u', "update", &update_name_list_str, "file list",
			
 
				+		    "file(s) to update"),
			
 
				 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
			
 
				 	OPT_END()
			
 
				 	};
			
@@ -125,5 +196,26 @@ int cmd_buildid_cache(int argc, const char **argv,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	return 0;
			
 
				+	if (missing_filename)
			
 
				+		ret = build_id_cache__fprintf_missing(missing_filename, force, stdout);
			
 
				+
			
 
				+	if (update_name_list_str) {
			
 
				+		list = strlist__new(true, update_name_list_str);
			
 
				+		if (list) {
			
 
				+			strlist__for_each(pos, list)
			
 
				+				if (build_id_cache__update_file(pos->s, debugdir)) {
			
 
				+					if (errno == ENOENT) {
			
 
				+						pr_debug("%s wasn't in the cache\n",
			
 
				+							 pos->s);
			
 
				+						continue;
			
 
				+					}
			
 
				+					pr_warning("Couldn't update %s: %s\n",
			
 
				+						   pos->s, strerror(errno));
			
 
				+				}
			
 
				+
			
 
				+			strlist__delete(list);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -44,23 +44,26 @@ static int filename__fprintf_build_id(const char *name, FILE *fp)
 
				 	return fprintf(fp, "%s\n", sbuild_id);
			
 
				 }
			
 
				 
			
 
				+static bool dso__skip_buildid(struct dso *dso, int with_hits)
			
 
				+{
			
 
				+	return with_hits && !dso->hit;
			
 
				+}
			
 
				+
			
 
				 static int perf_session__list_build_ids(bool force, bool with_hits)
			
 
				 {
			
 
				 	struct perf_session *session;
			
 
				 
			
 
				 	symbol__elf_init();
			
 
				-
			
 
				-	session = perf_session__new(input_name, O_RDONLY, force, false,
			
 
				-				    &build_id__mark_dso_hit_ops);
			
 
				-	if (session == NULL)
			
 
				-		return -1;
			
 
				-
			
 
				 	/*
			
 
				 	 * See if this is an ELF file first:
			
 
				 	 */
			
 
				-	if (filename__fprintf_build_id(session->filename, stdout))
			
 
				+	if (filename__fprintf_build_id(input_name, stdout))
			
 
				 		goto out;
			
 
				 
			
 
				+	session = perf_session__new(input_name, O_RDONLY, force, false,
			
 
				+				    &build_id__mark_dso_hit_ops);
			
 
				+	if (session == NULL)
			
 
				+		return -1;
			
 
				 	/*
			
 
				 	 * in pipe-mode, the only way to get the buildids is to parse
			
 
				 	 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
			
@@ -68,9 +71,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
 
				 	if (with_hits || session->fd_pipe)
			
 
				 		perf_session__process_events(session, &build_id__mark_dso_hit_ops);
			
 
				 
			
 
				-	perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
			
 
				-out:
			
 
				+	perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits);
			
 
				 	perf_session__delete(session);
			
 
				+out:
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -23,7 +23,6 @@ static char const *input_old = "perf.data.old",
 
				 		  *input_new = "perf.data";
			
 
				 static char	  diff__default_sort_order[] = "dso,symbol";
			
 
				 static bool  force;
			
 
				-static bool show_displacement;
			
 
				 static bool show_period;
			
 
				 static bool show_formula;
			
 
				 static bool show_baseline_only;
			
@@ -146,58 +145,47 @@ static int setup_compute(const struct option *opt, const char *str,
 
				 	return -EINVAL;
			
 
				 }
			
 
				 
			
 
				-static double get_period_percent(struct hist_entry *he, u64 period)
			
 
				+double perf_diff__period_percent(struct hist_entry *he, u64 period)
			
 
				 {
			
 
				 	u64 total = he->hists->stats.total_period;
			
 
				 	return (period * 100.0) / total;
			
 
				 }
			
 
				 
			
 
				-double perf_diff__compute_delta(struct hist_entry *he)
			
 
				+double perf_diff__compute_delta(struct hist_entry *he, struct hist_entry *pair)
			
 
				 {
			
 
				-	struct hist_entry *pair = hist_entry__next_pair(he);
			
 
				-	double new_percent = get_period_percent(he, he->stat.period);
			
 
				-	double old_percent = pair ? get_period_percent(pair, pair->stat.period) : 0.0;
			
 
				+	double new_percent = perf_diff__period_percent(he, he->stat.period);
			
 
				+	double old_percent = perf_diff__period_percent(pair, pair->stat.period);
			
 
				 
			
 
				 	he->diff.period_ratio_delta = new_percent - old_percent;
			
 
				 	he->diff.computed = true;
			
 
				 	return he->diff.period_ratio_delta;
			
 
				 }
			
 
				 
			
 
				-double perf_diff__compute_ratio(struct hist_entry *he)
			
 
				+double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair)
			
 
				 {
			
 
				-	struct hist_entry *pair = hist_entry__next_pair(he);
			
 
				 	double new_period = he->stat.period;
			
 
				-	double old_period = pair ? pair->stat.period : 0;
			
 
				+	double old_period = pair->stat.period;
			
 
				 
			
 
				 	he->diff.computed = true;
			
 
				-	he->diff.period_ratio = pair ? (new_period / old_period) : 0;
			
 
				+	he->diff.period_ratio = new_period / old_period;
			
 
				 	return he->diff.period_ratio;
			
 
				 }
			
 
				 
			
 
				-s64 perf_diff__compute_wdiff(struct hist_entry *he)
			
 
				+s64 perf_diff__compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
			
 
				 {
			
 
				-	struct hist_entry *pair = hist_entry__next_pair(he);
			
 
				 	u64 new_period = he->stat.period;
			
 
				-	u64 old_period = pair ? pair->stat.period : 0;
			
 
				+	u64 old_period = pair->stat.period;
			
 
				 
			
 
				 	he->diff.computed = true;
			
 
				-
			
 
				-	if (!pair)
			
 
				-		he->diff.wdiff = 0;
			
 
				-	else
			
 
				-		he->diff.wdiff = new_period * compute_wdiff_w2 -
			
 
				-				 old_period * compute_wdiff_w1;
			
 
				+	he->diff.wdiff = new_period * compute_wdiff_w2 -
			
 
				+			 old_period * compute_wdiff_w1;
			
 
				 
			
 
				 	return he->diff.wdiff;
			
 
				 }
			
 
				 
			
 
				-static int formula_delta(struct hist_entry *he, char *buf, size_t size)
			
 
				+static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
			
 
				+			 char *buf, size_t size)
			
 
				 {
			
 
				-	struct hist_entry *pair = hist_entry__next_pair(he);
			
 
				-
			
 
				-	if (!pair)
			
 
				-		return -1;
			
 
				-
			
 
				 	return scnprintf(buf, size,
			
 
				 			 "(%" PRIu64 " * 100 / %" PRIu64 ") - "
			
 
				 			 "(%" PRIu64 " * 100 / %" PRIu64 ")",
			
@@ -205,41 +193,36 @@ static int formula_delta(struct hist_entry *he, char *buf, size_t size)
 
				 			  pair->stat.period, pair->hists->stats.total_period);
			
 
				 }
			
 
				 
			
 
				-static int formula_ratio(struct hist_entry *he, char *buf, size_t size)
			
 
				+static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
			
 
				+			 char *buf, size_t size)
			
 
				 {
			
 
				-	struct hist_entry *pair = hist_entry__next_pair(he);
			
 
				 	double new_period = he->stat.period;
			
 
				-	double old_period = pair ? pair->stat.period : 0;
			
 
				-
			
 
				-	if (!pair)
			
 
				-		return -1;
			
 
				+	double old_period = pair->stat.period;
			
 
				 
			
 
				 	return scnprintf(buf, size, "%.0F / %.0F", new_period, old_period);
			
 
				 }
			
 
				 
			
 
				-static int formula_wdiff(struct hist_entry *he, char *buf, size_t size)
			
 
				+static int formula_wdiff(struct hist_entry *he, struct hist_entry *pair,
			
 
				+			 char *buf, size_t size)
			
 
				 {
			
 
				-	struct hist_entry *pair = hist_entry__next_pair(he);
			
 
				 	u64 new_period = he->stat.period;
			
 
				-	u64 old_period = pair ? pair->stat.period : 0;
			
 
				-
			
 
				-	if (!pair)
			
 
				-		return -1;
			
 
				+	u64 old_period = pair->stat.period;
			
 
				 
			
 
				 	return scnprintf(buf, size,
			
 
				 		  "(%" PRIu64 " * " "%" PRId64 ") - (%" PRIu64 " * " "%" PRId64 ")",
			
 
				 		  new_period, compute_wdiff_w2, old_period, compute_wdiff_w1);
			
 
				 }
			
 
				 
			
 
				-int perf_diff__formula(char *buf, size_t size, struct hist_entry *he)
			
 
				+int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
			
 
				+		       char *buf, size_t size)
			
 
				 {
			
 
				 	switch (compute) {
			
 
				 	case COMPUTE_DELTA:
			
 
				-		return formula_delta(he, buf, size);
			
 
				+		return formula_delta(he, pair, buf, size);
			
 
				 	case COMPUTE_RATIO:
			
 
				-		return formula_ratio(he, buf, size);
			
 
				+		return formula_ratio(he, pair, buf, size);
			
 
				 	case COMPUTE_WEIGHTED_DIFF:
			
 
				-		return formula_wdiff(he, buf, size);
			
 
				+		return formula_wdiff(he, pair, buf, size);
			
 
				 	default:
			
 
				 		BUG_ON(1);
			
 
				 	}
			
@@ -292,48 +275,6 @@ static struct perf_tool tool = {
 
				 	.ordering_requires_timestamps = true,
			
 
				 };
			
 
				 
			
 
				-static void insert_hist_entry_by_name(struct rb_root *root,
			
 
				-				      struct hist_entry *he)
			
 
				-{
			
 
				-	struct rb_node **p = &root->rb_node;
			
 
				-	struct rb_node *parent = NULL;
			
 
				-	struct hist_entry *iter;
			
 
				-
			
 
				-	while (*p != NULL) {
			
 
				-		parent = *p;
			
 
				-		iter = rb_entry(parent, struct hist_entry, rb_node);
			
 
				-		if (hist_entry__cmp(he, iter) < 0)
			
 
				-			p = &(*p)->rb_left;
			
 
				-		else
			
 
				-			p = &(*p)->rb_right;
			
 
				-	}
			
 
				-
			
 
				-	rb_link_node(&he->rb_node, parent, p);
			
 
				-	rb_insert_color(&he->rb_node, root);
			
 
				-}
			
 
				-
			
 
				-static void hists__name_resort(struct hists *self, bool sort)
			
 
				-{
			
 
				-	unsigned long position = 1;
			
 
				-	struct rb_root tmp = RB_ROOT;
			
 
				-	struct rb_node *next = rb_first(&self->entries);
			
 
				-
			
 
				-	while (next != NULL) {
			
 
				-		struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node);
			
 
				-
			
 
				-		next = rb_next(&n->rb_node);
			
 
				-		n->position = position++;
			
 
				-
			
 
				-		if (sort) {
			
 
				-			rb_erase(&n->rb_node, &self->entries);
			
 
				-			insert_hist_entry_by_name(&tmp, n);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (sort)
			
 
				-		self->entries = tmp;
			
 
				-}
			
 
				-
			
 
				 static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
			
 
				 				      struct perf_evlist *evlist)
			
 
				 {
			
@@ -346,34 +287,34 @@ static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				-static void perf_evlist__resort_hists(struct perf_evlist *evlist, bool name)
			
 
				+static void perf_evlist__collapse_resort(struct perf_evlist *evlist)
			
 
				 {
			
 
				 	struct perf_evsel *evsel;
			
 
				 
			
 
				 	list_for_each_entry(evsel, &evlist->entries, node) {
			
 
				 		struct hists *hists = &evsel->hists;
			
 
				 
			
 
				-		hists__output_resort(hists);
			
 
				-
			
 
				-		/*
			
 
				-		 * The hists__name_resort only sets possition
			
 
				-		 * if name is false.
			
 
				-		 */
			
 
				-		if (name || ((!name) && show_displacement))
			
 
				-			hists__name_resort(hists, name);
			
 
				+		hists__collapse_resort(hists);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 static void hists__baseline_only(struct hists *hists)
			
 
				 {
			
 
				-	struct rb_node *next = rb_first(&hists->entries);
			
 
				+	struct rb_root *root;
			
 
				+	struct rb_node *next;
			
 
				 
			
 
				+	if (sort__need_collapse)
			
 
				+		root = &hists->entries_collapsed;
			
 
				+	else
			
 
				+		root = hists->entries_in;
			
 
				+
			
 
				+	next = rb_first(root);
			
 
				 	while (next != NULL) {
			
 
				-		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
			
 
				+		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in);
			
 
				 
			
 
				-		next = rb_next(&he->rb_node);
			
 
				+		next = rb_next(&he->rb_node_in);
			
 
				 		if (!hist_entry__next_pair(he)) {
			
 
				-			rb_erase(&he->rb_node, &hists->entries);
			
 
				+			rb_erase(&he->rb_node_in, root);
			
 
				 			hist_entry__free(he);
			
 
				 		}
			
 
				 	}
			
@@ -385,18 +326,21 @@ static void hists__precompute(struct hists *hists)
 
				 
			
 
				 	while (next != NULL) {
			
 
				 		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
			
 
				+		struct hist_entry *pair = hist_entry__next_pair(he);
			
 
				 
			
 
				 		next = rb_next(&he->rb_node);
			
 
				+		if (!pair)
			
 
				+			continue;
			
 
				 
			
 
				 		switch (compute) {
			
 
				 		case COMPUTE_DELTA:
			
 
				-			perf_diff__compute_delta(he);
			
 
				+			perf_diff__compute_delta(he, pair);
			
 
				 			break;
			
 
				 		case COMPUTE_RATIO:
			
 
				-			perf_diff__compute_ratio(he);
			
 
				+			perf_diff__compute_ratio(he, pair);
			
 
				 			break;
			
 
				 		case COMPUTE_WEIGHTED_DIFF:
			
 
				-			perf_diff__compute_wdiff(he);
			
 
				+			perf_diff__compute_wdiff(he, pair);
			
 
				 			break;
			
 
				 		default:
			
 
				 			BUG_ON(1);
			
@@ -470,19 +414,30 @@ static void insert_hist_entry_by_compute(struct rb_root *root,
 
				 
			
 
				 static void hists__compute_resort(struct hists *hists)
			
 
				 {
			
 
				-	struct rb_root tmp = RB_ROOT;
			
 
				-	struct rb_node *next = rb_first(&hists->entries);
			
 
				+	struct rb_root *root;
			
 
				+	struct rb_node *next;
			
 
				+
			
 
				+	if (sort__need_collapse)
			
 
				+		root = &hists->entries_collapsed;
			
 
				+	else
			
 
				+		root = hists->entries_in;
			
 
				+
			
 
				+	hists->entries = RB_ROOT;
			
 
				+	next = rb_first(root);
			
 
				+
			
 
				+	hists->nr_entries = 0;
			
 
				+	hists->stats.total_period = 0;
			
 
				+	hists__reset_col_len(hists);
			
 
				 
			
 
				 	while (next != NULL) {
			
 
				-		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
			
 
				+		struct hist_entry *he;
			
 
				 
			
 
				-		next = rb_next(&he->rb_node);
			
 
				+		he = rb_entry(next, struct hist_entry, rb_node_in);
			
 
				+		next = rb_next(&he->rb_node_in);
			
 
				 
			
 
				-		rb_erase(&he->rb_node, &hists->entries);
			
 
				-		insert_hist_entry_by_compute(&tmp, he, compute);
			
 
				+		insert_hist_entry_by_compute(&hists->entries, he, compute);
			
 
				+		hists__inc_nr_entries(hists, he);
			
 
				 	}
			
 
				-
			
 
				-	hists->entries = tmp;
			
 
				 }
			
 
				 
			
 
				 static void hists__process(struct hists *old, struct hists *new)
			
@@ -497,6 +452,8 @@ static void hists__process(struct hists *old, struct hists *new)
 
				 	if (sort_compute) {
			
 
				 		hists__precompute(new);
			
 
				 		hists__compute_resort(new);
			
 
				+	} else {
			
 
				+		hists__output_resort(new);
			
 
				 	}
			
 
				 
			
 
				 	hists__fprintf(new, true, 0, 0, stdout);
			
@@ -528,8 +485,8 @@ static int __cmd_diff(void)
 
				 	evlist_old = older->evlist;
			
 
				 	evlist_new = newer->evlist;
			
 
				 
			
 
				-	perf_evlist__resort_hists(evlist_old, true);
			
 
				-	perf_evlist__resort_hists(evlist_new, false);
			
 
				+	perf_evlist__collapse_resort(evlist_old);
			
 
				+	perf_evlist__collapse_resort(evlist_new);
			
 
				 
			
 
				 	list_for_each_entry(evsel, &evlist_new->entries, node) {
			
 
				 		struct perf_evsel *evsel_old;
			
@@ -562,8 +519,6 @@ static const char * const diff_usage[] = {
 
				 static const struct option options[] = {
			
 
				 	OPT_INCR('v', "verbose", &verbose,
			
 
				 		    "be more verbose (show symbol address, etc)"),
			
 
				-	OPT_BOOLEAN('M', "displacement", &show_displacement,
			
 
				-		    "Show position displacement relative to baseline"),
			
 
				 	OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
			
 
				 		    "Show only items with match in baseline"),
			
 
				 	OPT_CALLBACK('c', "compute", &compute,
			
@@ -597,40 +552,32 @@ static const struct option options[] = {
 
				 
			
 
				 static void ui_init(void)
			
 
				 {
			
 
				-	perf_hpp__init();
			
 
				-
			
 
				-	/* No overhead column. */
			
 
				-	perf_hpp__column_enable(PERF_HPP__OVERHEAD, false);
			
 
				-
			
 
				 	/*
			
 
				-	 * Display baseline/delta/ratio/displacement/
			
 
				+	 * Display baseline/delta/ratio
			
 
				 	 * formula/periods columns.
			
 
				 	 */
			
 
				-	perf_hpp__column_enable(PERF_HPP__BASELINE, true);
			
 
				+	perf_hpp__column_enable(PERF_HPP__BASELINE);
			
 
				 
			
 
				 	switch (compute) {
			
 
				 	case COMPUTE_DELTA:
			
 
				-		perf_hpp__column_enable(PERF_HPP__DELTA, true);
			
 
				+		perf_hpp__column_enable(PERF_HPP__DELTA);
			
 
				 		break;
			
 
				 	case COMPUTE_RATIO:
			
 
				-		perf_hpp__column_enable(PERF_HPP__RATIO, true);
			
 
				+		perf_hpp__column_enable(PERF_HPP__RATIO);
			
 
				 		break;
			
 
				 	case COMPUTE_WEIGHTED_DIFF:
			
 
				-		perf_hpp__column_enable(PERF_HPP__WEIGHTED_DIFF, true);
			
 
				+		perf_hpp__column_enable(PERF_HPP__WEIGHTED_DIFF);
			
 
				 		break;
			
 
				 	default:
			
 
				 		BUG_ON(1);
			
 
				 	};
			
 
				 
			
 
				-	if (show_displacement)
			
 
				-		perf_hpp__column_enable(PERF_HPP__DISPL, true);
			
 
				-
			
 
				 	if (show_formula)
			
 
				-		perf_hpp__column_enable(PERF_HPP__FORMULA, true);
			
 
				+		perf_hpp__column_enable(PERF_HPP__FORMULA);
			
 
				 
			
 
				 	if (show_period) {
			
 
				-		perf_hpp__column_enable(PERF_HPP__PERIOD, true);
			
 
				-		perf_hpp__column_enable(PERF_HPP__PERIOD_BASELINE, true);
			
 
				+		perf_hpp__column_enable(PERF_HPP__PERIOD);
			
 
				+		perf_hpp__column_enable(PERF_HPP__PERIOD_BASELINE);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -658,7 +605,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 
			
 
				 	ui_init();
			
 
				 
			
 
				-	setup_sorting(diff_usage, options);
			
 
				+	if (setup_sorting() < 0)
			
 
				+		usage_with_options(diff_usage, options);
			
 
				+
			
 
				 	setup_pager();
			
 
				 
			
 
				 	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL);
			
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -15,39 +15,6 @@
 
				 #include "util/parse-options.h"
			
 
				 #include "util/session.h"
			
 
				 
			
 
				-struct perf_attr_details {
			
 
				-	bool freq;
			
 
				-	bool verbose;
			
 
				-};
			
 
				-
			
 
				-static int comma_printf(bool *first, const char *fmt, ...)
			
 
				-{
			
 
				-	va_list args;
			
 
				-	int ret = 0;
			
 
				-
			
 
				-	if (!*first) {
			
 
				-		ret += printf(",");
			
 
				-	} else {
			
 
				-		ret += printf(":");
			
 
				-		*first = false;
			
 
				-	}
			
 
				-
			
 
				-	va_start(args, fmt);
			
 
				-	ret += vprintf(fmt, args);
			
 
				-	va_end(args);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static int __if_print(bool *first, const char *field, u64 value)
			
 
				-{
			
 
				-	if (value == 0)
			
 
				-		return 0;
			
 
				-
			
 
				-	return comma_printf(first, " %s: %" PRIu64, field, value);
			
 
				-}
			
 
				-
			
 
				-#define if_print(field) __if_print(&first, #field, pos->attr.field)
			
 
				-
			
 
				 static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
			
 
				 {
			
 
				 	struct perf_session *session;
			
@@ -57,52 +24,8 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
 
				 	if (session == NULL)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	list_for_each_entry(pos, &session->evlist->entries, node) {
			
 
				-		bool first = true;
			
 
				-
			
 
				-		printf("%s", perf_evsel__name(pos));
			
 
				-
			
 
				-		if (details->verbose || details->freq) {
			
 
				-			comma_printf(&first, " sample_freq=%" PRIu64,
			
 
				-				     (u64)pos->attr.sample_freq);
			
 
				-		}
			
 
				-
			
 
				-		if (details->verbose) {
			
 
				-			if_print(type);
			
 
				-			if_print(config);
			
 
				-			if_print(config1);
			
 
				-			if_print(config2);
			
 
				-			if_print(size);
			
 
				-			if_print(sample_type);
			
 
				-			if_print(read_format);
			
 
				-			if_print(disabled);
			
 
				-			if_print(inherit);
			
 
				-			if_print(pinned);
			
 
				-			if_print(exclusive);
			
 
				-			if_print(exclude_user);
			
 
				-			if_print(exclude_kernel);
			
 
				-			if_print(exclude_hv);
			
 
				-			if_print(exclude_idle);
			
 
				-			if_print(mmap);
			
 
				-			if_print(comm);
			
 
				-			if_print(freq);
			
 
				-			if_print(inherit_stat);
			
 
				-			if_print(enable_on_exec);
			
 
				-			if_print(task);
			
 
				-			if_print(watermark);
			
 
				-			if_print(precise_ip);
			
 
				-			if_print(mmap_data);
			
 
				-			if_print(sample_id_all);
			
 
				-			if_print(exclude_host);
			
 
				-			if_print(exclude_guest);
			
 
				-			if_print(__reserved_1);
			
 
				-			if_print(wakeup_events);
			
 
				-			if_print(bp_type);
			
 
				-			if_print(branch_sample_type);
			
 
				-		}
			
 
				-
			
 
				-		putchar('\n');
			
 
				-	}
			
 
				+	list_for_each_entry(pos, &session->evlist->entries, node)
			
 
				+		perf_evsel__fprintf(pos, details, stdout);
			
 
				 
			
 
				 	perf_session__delete(session);
			
 
				 	return 0;
			
@@ -116,6 +39,8 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	OPT_BOOLEAN('F', "freq", &details.freq, "Show the sample frequency"),
			
 
				 	OPT_BOOLEAN('v', "verbose", &details.verbose,
			
 
				 		    "Show all event attr details"),
			
 
				+	OPT_BOOLEAN('g', "group", &details.event_group,
			
 
				+		    "Show event group information"),
			
 
				 	OPT_END()
			
 
				 	};
			
 
				 	const char * const evlist_usage[] = {
			
@@ -127,5 +52,10 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	if (argc)
			
 
				 		usage_with_options(evlist_usage, options);
			
 
				 
			
 
				+	if (details.event_group && (details.verbose || details.freq)) {
			
 
				+		pr_err("--group option is not compatible with other options\n");
			
 
				+		usage_with_options(evlist_usage, options);
			
 
				+	}
			
 
				+
			
 
				 	return __cmd_evlist(input_name, &details);
			
 
				 }
			
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -17,6 +17,7 @@
 
				 #include "util/debug.h"
			
 
				 
			
 
				 #include <linux/rbtree.h>
			
 
				+#include <linux/string.h>
			
 
				 
			
 
				 struct alloc_stat;
			
 
				 typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
			
@@ -340,7 +341,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
 
				 			   int n_lines, int is_caller)
			
 
				 {
			
 
				 	struct rb_node *next;
			
 
				-	struct machine *machine;
			
 
				+	struct machine *machine = &session->machines.host;
			
 
				 
			
 
				 	printf("%.102s\n", graph_dotted_line);
			
 
				 	printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
			
@@ -349,11 +350,6 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
 
				 
			
 
				 	next = rb_first(root);
			
 
				 
			
 
				-	machine = perf_session__find_host_machine(session);
			
 
				-	if (!machine) {
			
 
				-		pr_err("__print_result: couldn't find kernel information\n");
			
 
				-		return;
			
 
				-	}
			
 
				 	while (next && n_lines--) {
			
 
				 		struct alloc_stat *data = rb_entry(next, struct alloc_stat,
			
 
				 						   node);
			
@@ -614,8 +610,7 @@ static struct sort_dimension *avail_sorts[] = {
 
				 	&pingpong_sort_dimension,
			
 
				 };
			
 
				 
			
 
				-#define NUM_AVAIL_SORTS	\
			
 
				-	(int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
			
 
				+#define NUM_AVAIL_SORTS	((int)ARRAY_SIZE(avail_sorts))
			
 
				 
			
 
				 static int sort_dimension__add(const char *tok, struct list_head *list)
			
 
				 {
			
@@ -624,12 +619,11 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
 
				 
			
 
				 	for (i = 0; i < NUM_AVAIL_SORTS; i++) {
			
 
				 		if (!strcmp(avail_sorts[i]->name, tok)) {
			
 
				-			sort = malloc(sizeof(*sort));
			
 
				+			sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
			
 
				 			if (!sort) {
			
 
				-				pr_err("%s: malloc failed\n", __func__);
			
 
				+				pr_err("%s: memdup failed\n", __func__);
			
 
				 				return -1;
			
 
				 			}
			
 
				-			memcpy(sort, avail_sorts[i], sizeof(*sort));
			
 
				 			list_add_tail(&sort->list, list);
			
 
				 			return 0;
			
 
				 		}
			
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -973,8 +973,7 @@ __cmd_buildid_list(const char *file_name, int argc, const char **argv)
 
				 
			
 
				 int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
			
 
				 {
			
 
				-	const char *file_name;
			
 
				-
			
 
				+	const char *file_name = NULL;
			
 
				 	const struct option kvm_options[] = {
			
 
				 		OPT_STRING('i', "input", &file_name, "file",
			
 
				 			   "Input file name"),
			
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -224,130 +224,28 @@ static bool perf_evlist__equal(struct perf_evlist *evlist,
 
				 
			
 
				 static int perf_record__open(struct perf_record *rec)
			
 
				 {
			
 
				+	char msg[512];
			
 
				 	struct perf_evsel *pos;
			
 
				 	struct perf_evlist *evlist = rec->evlist;
			
 
				 	struct perf_session *session = rec->session;
			
 
				 	struct perf_record_opts *opts = &rec->opts;
			
 
				 	int rc = 0;
			
 
				 
			
 
				-	/*
			
 
				-	 * Set the evsel leader links before we configure attributes,
			
 
				-	 * since some might depend on this info.
			
 
				-	 */
			
 
				-	if (opts->group)
			
 
				-		perf_evlist__set_leader(evlist);
			
 
				-
			
 
				-	perf_evlist__config_attrs(evlist, opts);
			
 
				+	perf_evlist__config(evlist, opts);
			
 
				 
			
 
				 	list_for_each_entry(pos, &evlist->entries, node) {
			
 
				-		struct perf_event_attr *attr = &pos->attr;
			
 
				-		/*
			
 
				-		 * Check if parse_single_tracepoint_event has already asked for
			
 
				-		 * PERF_SAMPLE_TIME.
			
 
				-		 *
			
 
				-		 * XXX this is kludgy but short term fix for problems introduced by
			
 
				-		 * eac23d1c that broke 'perf script' by having different sample_types
			
 
				-		 * when using multiple tracepoint events when we use a perf binary
			
 
				-		 * that tries to use sample_id_all on an older kernel.
			
 
				-		 *
			
 
				-		 * We need to move counter creation to perf_session, support
			
 
				-		 * different sample_types, etc.
			
 
				-		 */
			
 
				-		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
			
 
				-
			
 
				-fallback_missing_features:
			
 
				-		if (opts->exclude_guest_missing)
			
 
				-			attr->exclude_guest = attr->exclude_host = 0;
			
 
				-retry_sample_id:
			
 
				-		attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
			
 
				 try_again:
			
 
				 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
			
 
				-			int err = errno;
			
 
				-
			
 
				-			if (err == EPERM || err == EACCES) {
			
 
				-				ui__error_paranoid();
			
 
				-				rc = -err;
			
 
				-				goto out;
			
 
				-			} else if (err ==  ENODEV && opts->target.cpu_list) {
			
 
				-				pr_err("No such device - did you specify"
			
 
				-				       " an out-of-range profile CPU?\n");
			
 
				-				rc = -err;
			
 
				-				goto out;
			
 
				-			} else if (err == EINVAL) {
			
 
				-				if (!opts->exclude_guest_missing &&
			
 
				-				    (attr->exclude_guest || attr->exclude_host)) {
			
 
				-					pr_debug("Old kernel, cannot exclude "
			
 
				-						 "guest or host samples.\n");
			
 
				-					opts->exclude_guest_missing = true;
			
 
				-					goto fallback_missing_features;
			
 
				-				} else if (!opts->sample_id_all_missing) {
			
 
				-					/*
			
 
				-					 * Old kernel, no attr->sample_id_type_all field
			
 
				-					 */
			
 
				-					opts->sample_id_all_missing = true;
			
 
				-					if (!opts->sample_time && !opts->raw_samples && !time_needed)
			
 
				-						attr->sample_type &= ~PERF_SAMPLE_TIME;
			
 
				-
			
 
				-					goto retry_sample_id;
			
 
				-				}
			
 
				-			}
			
 
				-
			
 
				-			/*
			
 
				-			 * If it's cycles then fall back to hrtimer
			
 
				-			 * based cpu-clock-tick sw counter, which
			
 
				-			 * is always available even if no PMU support.
			
 
				-			 *
			
 
				-			 * PPC returns ENXIO until 2.6.37 (behavior changed
			
 
				-			 * with commit b0a873e).
			
 
				-			 */
			
 
				-			if ((err == ENOENT || err == ENXIO)
			
 
				-					&& attr->type == PERF_TYPE_HARDWARE
			
 
				-					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
			
 
				-
			
 
				+			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
			
 
				 				if (verbose)
			
 
				-					ui__warning("The cycles event is not supported, "
			
 
				-						    "trying to fall back to cpu-clock-ticks\n");
			
 
				-				attr->type = PERF_TYPE_SOFTWARE;
			
 
				-				attr->config = PERF_COUNT_SW_CPU_CLOCK;
			
 
				-				if (pos->name) {
			
 
				-					free(pos->name);
			
 
				-					pos->name = NULL;
			
 
				-				}
			
 
				+					ui__warning("%s\n", msg);
			
 
				 				goto try_again;
			
 
				 			}
			
 
				 
			
 
				-			if (err == ENOENT) {
			
 
				-				ui__error("The %s event is not supported.\n",
			
 
				-					  perf_evsel__name(pos));
			
 
				-				rc = -err;
			
 
				-				goto out;
			
 
				-			} else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
			
 
				-				ui__error("\'precise\' request may not be supported. "
			
 
				-					  "Try removing 'p' modifier\n");
			
 
				-				rc = -err;
			
 
				-				goto out;
			
 
				-			}
			
 
				-
			
 
				-			printf("\n");
			
 
				-			error("sys_perf_event_open() syscall returned with %d "
			
 
				-			      "(%s) for event %s. /bin/dmesg may provide "
			
 
				-			      "additional information.\n",
			
 
				-			      err, strerror(err), perf_evsel__name(pos));
			
 
				-
			
 
				-#if defined(__i386__) || defined(__x86_64__)
			
 
				-			if (attr->type == PERF_TYPE_HARDWARE &&
			
 
				-			    err == EOPNOTSUPP) {
			
 
				-				pr_err("No hardware sampling interrupt available."
			
 
				-				       " No APIC? If so then you can boot the kernel"
			
 
				-				       " with the \"lapic\" boot parameter to"
			
 
				-				       " force-enable it.\n");
			
 
				-				rc = -err;
			
 
				-				goto out;
			
 
				-			}
			
 
				-#endif
			
 
				-
			
 
				-			pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
			
 
				-			rc = -err;
			
 
				+			rc = -errno;
			
 
				+			perf_evsel__open_strerror(pos, &opts->target,
			
 
				+						  errno, msg, sizeof(msg));
			
 
				+			ui__error("%s\n", msg);
			
 
				 			goto out;
			
 
				 		}
			
 
				 	}
			
@@ -430,10 +328,6 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 
				 {
			
 
				 	int err;
			
 
				 	struct perf_tool *tool = data;
			
 
				-
			
 
				-	if (machine__is_host(machine))
			
 
				-		return;
			
 
				-
			
 
				 	/*
			
 
				 	 *As for guest kernel when processing subcommand record&report,
			
 
				 	 *we arrange module mmap prior to guest kernel mmap and trigger
			
@@ -592,6 +486,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 
				 		goto out_delete_session;
			
 
				 	}
			
 
				 
			
 
				+	if (!evsel_list->nr_groups)
			
 
				+		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
			
 
				+
			
 
				 	/*
			
 
				 	 * perf_session__delete(session) will be called at perf_record__exit()
			
 
				 	 */
			
@@ -618,12 +515,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 
				 
			
 
				 	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
			
 
				 
			
 
				-	machine = perf_session__find_host_machine(session);
			
 
				-	if (!machine) {
			
 
				-		pr_err("Couldn't find native kernel information.\n");
			
 
				-		err = -1;
			
 
				-		goto out_delete_session;
			
 
				-	}
			
 
				+	machine = &session->machines.host;
			
 
				 
			
 
				 	if (opts->pipe_output) {
			
 
				 		err = perf_event__synthesize_attrs(tool, session,
			
@@ -676,9 +568,10 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 
				 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
			
 
				 		       "Check /proc/modules permission or run as root.\n");
			
 
				 
			
 
				-	if (perf_guest)
			
 
				-		perf_session__process_machines(session, tool,
			
 
				-					       perf_event__synthesize_guest_os);
			
 
				+	if (perf_guest) {
			
 
				+		machines__process_guests(&session->machines,
			
 
				+					 perf_event__synthesize_guest_os, tool);
			
 
				+	}
			
 
				 
			
 
				 	if (!opts->target.system_wide)
			
 
				 		err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
			
@@ -875,11 +768,10 @@ static int get_stack_size(char *str, unsigned long *_size)
 
				 }
			
 
				 #endif /* LIBUNWIND_SUPPORT */
			
 
				 
			
 
				-static int
			
 
				-parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
			
 
				-		    int unset)
			
 
				+int record_parse_callchain_opt(const struct option *opt,
			
 
				+			       const char *arg, int unset)
			
 
				 {
			
 
				-	struct perf_record *rec = (struct perf_record *)opt->value;
			
 
				+	struct perf_record_opts *opts = opt->value;
			
 
				 	char *tok, *name, *saveptr = NULL;
			
 
				 	char *buf;
			
 
				 	int ret = -1;
			
@@ -905,7 +797,7 @@ parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
 
				 		/* Framepointer style */
			
 
				 		if (!strncmp(name, "fp", sizeof("fp"))) {
			
 
				 			if (!strtok_r(NULL, ",", &saveptr)) {
			
 
				-				rec->opts.call_graph = CALLCHAIN_FP;
			
 
				+				opts->call_graph = CALLCHAIN_FP;
			
 
				 				ret = 0;
			
 
				 			} else
			
 
				 				pr_err("callchain: No more arguments "
			
@@ -918,20 +810,20 @@ parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
 
				 			const unsigned long default_stack_dump_size = 8192;
			
 
				 
			
 
				 			ret = 0;
			
 
				-			rec->opts.call_graph = CALLCHAIN_DWARF;
			
 
				-			rec->opts.stack_dump_size = default_stack_dump_size;
			
 
				+			opts->call_graph = CALLCHAIN_DWARF;
			
 
				+			opts->stack_dump_size = default_stack_dump_size;
			
 
				 
			
 
				 			tok = strtok_r(NULL, ",", &saveptr);
			
 
				 			if (tok) {
			
 
				 				unsigned long size = 0;
			
 
				 
			
 
				 				ret = get_stack_size(tok, &size);
			
 
				-				rec->opts.stack_dump_size = size;
			
 
				+				opts->stack_dump_size = size;
			
 
				 			}
			
 
				 
			
 
				 			if (!ret)
			
 
				 				pr_debug("callchain: stack dump size %d\n",
			
 
				-					 rec->opts.stack_dump_size);
			
 
				+					 opts->stack_dump_size);
			
 
				 #endif /* LIBUNWIND_SUPPORT */
			
 
				 		} else {
			
 
				 			pr_err("callchain: Unknown -g option "
			
@@ -944,7 +836,7 @@ parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
 
				 	free(buf);
			
 
				 
			
 
				 	if (!ret)
			
 
				-		pr_debug("callchain: type %d\n", rec->opts.call_graph);
			
 
				+		pr_debug("callchain: type %d\n", opts->call_graph);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -982,9 +874,9 @@ static struct perf_record record = {
 
				 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
			
 
				 
			
 
				 #ifdef LIBUNWIND_SUPPORT
			
 
				-static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
			
 
				+const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
			
 
				 #else
			
 
				-static const char callchain_help[] = CALLCHAIN_HELP "[fp]";
			
 
				+const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
			
 
				 #endif
			
 
				 
			
 
				 /*
			
@@ -1028,9 +920,9 @@ const struct option record_options[] = {
 
				 		     "number of mmap data pages"),
			
 
				 	OPT_BOOLEAN(0, "group", &record.opts.group,
			
 
				 		    "put the counters into a counter group"),
			
 
				-	OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
			
 
				-			     callchain_help, &parse_callchain_opt,
			
 
				-			     "fp"),
			
 
				+	OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
			
 
				+			     "mode[,dump_size]", record_callchain_help,
			
 
				+			     &record_parse_callchain_opt, "fp"),
			
 
				 	OPT_INCR('v', "verbose", &verbose,
			
 
				 		    "be more verbose (show counter open errors, etc)"),
			
 
				 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
			
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -8,6 +8,7 @@
 
				 #include "builtin.h"
			
 
				 
			
 
				 #include "util/util.h"
			
 
				+#include "util/cache.h"
			
 
				 
			
 
				 #include "util/annotate.h"
			
 
				 #include "util/color.h"
			
@@ -54,6 +55,16 @@ struct perf_report {
 
				 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
			
 
				 };
			
 
				 
			
 
				+static int perf_report_config(const char *var, const char *value, void *cb)
			
 
				+{
			
 
				+	if (!strcmp(var, "report.group")) {
			
 
				+		symbol_conf.event_group = perf_config_bool(var, value);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	return perf_default_config(var, value, cb);
			
 
				+}
			
 
				+
			
 
				 static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
			
 
				 					struct addr_location *al,
			
 
				 					struct perf_sample *sample,
			
@@ -299,6 +310,21 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
 
				 	char unit;
			
 
				 	unsigned long nr_samples = self->stats.nr_events[PERF_RECORD_SAMPLE];
			
 
				 	u64 nr_events = self->stats.total_period;
			
 
				+	struct perf_evsel *evsel = hists_to_evsel(self);
			
 
				+	char buf[512];
			
 
				+	size_t size = sizeof(buf);
			
 
				+
			
 
				+	if (symbol_conf.event_group && evsel->nr_members > 1) {
			
 
				+		struct perf_evsel *pos;
			
 
				+
			
 
				+		perf_evsel__group_desc(evsel, buf, size);
			
 
				+		evname = buf;
			
 
				+
			
 
				+		for_each_group_member(pos, evsel) {
			
 
				+			nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
			
 
				+			nr_events += pos->hists.stats.total_period;
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	nr_samples = convert_unit(nr_samples, &unit);
			
 
				 	ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit);
			
@@ -319,6 +345,10 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 
				 		struct hists *hists = &pos->hists;
			
 
				 		const char *evname = perf_evsel__name(pos);
			
 
				 
			
 
				+		if (symbol_conf.event_group &&
			
 
				+		    !perf_evsel__is_group_leader(pos))
			
 
				+			continue;
			
 
				+
			
 
				 		hists__fprintf_nr_sample_events(hists, evname, stdout);
			
 
				 		hists__fprintf(hists, true, 0, 0, stdout);
			
 
				 		fprintf(stdout, "\n\n");
			
@@ -372,7 +402,7 @@ static int __cmd_report(struct perf_report *rep)
 
				 	if (ret)
			
 
				 		goto out_delete;
			
 
				 
			
 
				-	kernel_map = session->host_machine.vmlinux_maps[MAP__FUNCTION];
			
 
				+	kernel_map = session->machines.host.vmlinux_maps[MAP__FUNCTION];
			
 
				 	kernel_kmap = map__kmap(kernel_map);
			
 
				 	if (kernel_map == NULL ||
			
 
				 	    (kernel_map->dso->hit &&
			
@@ -416,8 +446,16 @@ static int __cmd_report(struct perf_report *rep)
 
				 			hists->symbol_filter_str = rep->symbol_filter_str;
			
 
				 
			
 
				 		hists__collapse_resort(hists);
			
 
				-		hists__output_resort(hists);
			
 
				 		nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
			
 
				+
			
 
				+		/* Non-group events are considered as leader */
			
 
				+		if (symbol_conf.event_group &&
			
 
				+		    !perf_evsel__is_group_leader(pos)) {
			
 
				+			struct hists *leader_hists = &pos->leader->hists;
			
 
				+
			
 
				+			hists__match(leader_hists, hists);
			
 
				+			hists__link(leader_hists, hists);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	if (nr_samples == 0) {
			
@@ -425,11 +463,22 @@ static int __cmd_report(struct perf_report *rep)
 
				 		goto out_delete;
			
 
				 	}
			
 
				 
			
 
				+	list_for_each_entry(pos, &session->evlist->entries, node)
			
 
				+		hists__output_resort(&pos->hists);
			
 
				+
			
 
				 	if (use_browser > 0) {
			
 
				 		if (use_browser == 1) {
			
 
				-			perf_evlist__tui_browse_hists(session->evlist, help,
			
 
				-						      NULL,
			
 
				-						      &session->header.env);
			
 
				+			ret = perf_evlist__tui_browse_hists(session->evlist,
			
 
				+							help,
			
 
				+							NULL,
			
 
				+							&session->header.env);
			
 
				+			/*
			
 
				+			 * Usually "ret" is the last pressed key, and we only
			
 
				+			 * care if the key notifies us to switch data file.
			
 
				+			 */
			
 
				+			if (ret != K_SWITCH_INPUT_DATA)
			
 
				+				ret = 0;
			
 
				+
			
 
				 		} else if (use_browser == 2) {
			
 
				 			perf_evlist__gtk_browse_hists(session->evlist, help,
			
 
				 						      NULL);
			
@@ -595,8 +644,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	OPT_BOOLEAN(0, "stdio", &report.use_stdio,
			
 
				 		    "Use the stdio interface"),
			
 
				 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
			
 
				-		   "sort by key(s): pid, comm, dso, symbol, parent, dso_to,"
			
 
				-		   " dso_from, symbol_to, symbol_from, mispredict"),
			
 
				+		   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
			
 
				+		   " dso_to, dso_from, symbol_to, symbol_from, mispredict"),
			
 
				 	OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
			
 
				 		    "Show sample percentage for different cpu modes"),
			
 
				 	OPT_STRING('p', "parent", &parent_pattern, "regex",
			
@@ -638,6 +687,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
			
 
				 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
			
 
				 		    "Show a column with the sum of periods"),
			
 
				+	OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
			
 
				+		    "Show event group information together"),
			
 
				 	OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",
			
 
				 		    "use branch records for histogram filling", parse_branch_mode),
			
 
				 	OPT_STRING(0, "objdump", &objdump_path, "path",
			
@@ -645,6 +696,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	OPT_END()
			
 
				 	};
			
 
				 
			
 
				+	perf_config(perf_report_config, NULL);
			
 
				+
			
 
				 	argc = parse_options(argc, argv, options, report_usage, 0);
			
 
				 
			
 
				 	if (report.use_stdio)
			
@@ -663,6 +716,16 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		else
			
 
				 			input_name = "perf.data";
			
 
				 	}
			
 
				+
			
 
				+	if (strcmp(input_name, "-") != 0)
			
 
				+		setup_browser(true);
			
 
				+	else {
			
 
				+		use_browser = 0;
			
 
				+		perf_hpp__column_enable(PERF_HPP__OVERHEAD);
			
 
				+		perf_hpp__init();
			
 
				+	}
			
 
				+
			
 
				+repeat:
			
 
				 	session = perf_session__new(input_name, O_RDONLY,
			
 
				 				    report.force, false, &report.tool);
			
 
				 	if (session == NULL)
			
@@ -688,14 +751,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 
			
 
				 	}
			
 
				 
			
 
				-	if (strcmp(input_name, "-") != 0)
			
 
				-		setup_browser(true);
			
 
				-	else {
			
 
				-		use_browser = 0;
			
 
				-		perf_hpp__init();
			
 
				-	}
			
 
				-
			
 
				-	setup_sorting(report_usage, options);
			
 
				+	if (setup_sorting() < 0)
			
 
				+		usage_with_options(report_usage, options);
			
 
				 
			
 
				 	/*
			
 
				 	 * Only in the newt browser we are doing integrated annotation,
			
@@ -763,6 +820,12 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	}
			
 
				 
			
 
				 	ret = __cmd_report(&report);
			
 
				+	if (ret == K_SWITCH_INPUT_DATA) {
			
 
				+		perf_session__delete(session);
			
 
				+		goto repeat;
			
 
				+	} else
			
 
				+		ret = 0;
			
 
				+
			
 
				 error:
			
 
				 	perf_session__delete(session);
			
 
				 	return ret;
			
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1475,9 +1475,9 @@ static int perf_sched__read_events(struct perf_sched *sched, bool destroy,
 
				 			goto out_delete;
			
 
				 		}
			
 
				 
			
 
				-		sched->nr_events      = session->hists.stats.nr_events[0];
			
 
				-		sched->nr_lost_events = session->hists.stats.total_lost;
			
 
				-		sched->nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST];
			
 
				+		sched->nr_events      = session->stats.nr_events[0];
			
 
				+		sched->nr_lost_events = session->stats.total_lost;
			
 
				+		sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST];
			
 
				 	}
			
 
				 
			
 
				 	if (destroy)
			
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -692,7 +692,7 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 
				 			    const char *arg, int unset __maybe_unused)
			
 
				 {
			
 
				 	char *tok;
			
 
				-	int i, imax = sizeof(all_output_options) / sizeof(struct output_option);
			
 
				+	int i, imax = ARRAY_SIZE(all_output_options);
			
 
				 	int j;
			
 
				 	int rc = 0;
			
 
				 	char *str = strdup(arg);
			
@@ -909,18 +909,6 @@ static const char *ends_with(const char *str, const char *suffix)
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				-static char *ltrim(char *str)
			
 
				-{
			
 
				-	int len = strlen(str);
			
 
				-
			
 
				-	while (len && isspace(*str)) {
			
 
				-		len--;
			
 
				-		str++;
			
 
				-	}
			
 
				-
			
 
				-	return str;
			
 
				-}
			
 
				-
			
 
				 static int read_script_info(struct script_desc *desc, const char *filename)
			
 
				 {
			
 
				 	char line[BUFSIZ], *p;
			
@@ -1487,7 +1475,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 			return -1;
			
 
				 	}
			
 
				 
			
 
				-	perf_session__fprintf_info(session, stdout, show_full_info);
			
 
				+	if (!script_name && !generate_script_lang)
			
 
				+		perf_session__fprintf_info(session, stdout, show_full_info);
			
 
				 
			
 
				 	if (!no_callchain)
			
 
				 		symbol_conf.use_callchain = true;
			
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,11 @@
 
				 #define CNTR_NOT_SUPPORTED	"<not supported>"
			
 
				 #define CNTR_NOT_COUNTED	"<not counted>"
			
 
				 
			
 
				+static void print_stat(int argc, const char **argv);
			
 
				+static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
			
 
				+static void print_counter(struct perf_evsel *counter, char *prefix);
			
 
				+static void print_aggr_socket(char *prefix);
			
 
				+
			
 
				 static struct perf_evlist	*evsel_list;
			
 
				 
			
 
				 static struct perf_target	target = {
			
@@ -75,6 +80,7 @@ static int			run_count			=  1;
 
				 static bool			no_inherit			= false;
			
 
				 static bool			scale				=  true;
			
 
				 static bool			no_aggr				= false;
			
 
				+static bool			aggr_socket			= false;
			
 
				 static pid_t			child_pid			= -1;
			
 
				 static bool			null_run			=  false;
			
 
				 static int			detailed_run			=  0;
			
@@ -87,6 +93,9 @@ static FILE			*output				= NULL;
 
				 static const char		*pre_cmd			= NULL;
			
 
				 static const char		*post_cmd			= NULL;
			
 
				 static bool			sync_run			= false;
			
 
				+static unsigned int		interval			= 0;
			
 
				+static struct timespec		ref_time;
			
 
				+static struct cpu_map		*sock_map;
			
 
				 
			
 
				 static volatile int done = 0;
			
 
				 
			
@@ -94,6 +103,28 @@ struct perf_stat {
 
				 	struct stats	  res_stats[3];
			
 
				 };
			
 
				 
			
 
				+static inline void diff_timespec(struct timespec *r, struct timespec *a,
			
 
				+				 struct timespec *b)
			
 
				+{
			
 
				+	r->tv_sec = a->tv_sec - b->tv_sec;
			
 
				+	if (a->tv_nsec < b->tv_nsec) {
			
 
				+		r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
			
 
				+		r->tv_sec--;
			
 
				+	} else {
			
 
				+		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
			
 
				+{
			
 
				+	return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
			
 
				+}
			
 
				+
			
 
				+static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
			
 
				+{
			
 
				+	return perf_evsel__cpus(evsel)->nr;
			
 
				+}
			
 
				+
			
 
				 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
			
 
				 {
			
 
				 	evsel->priv = zalloc(sizeof(struct perf_stat));
			
@@ -106,14 +137,27 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
 
				 	evsel->priv = NULL;
			
 
				 }
			
 
				 
			
 
				-static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
			
 
				+static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
			
 
				 {
			
 
				-	return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
			
 
				+	void *addr;
			
 
				+	size_t sz;
			
 
				+
			
 
				+	sz = sizeof(*evsel->counts) +
			
 
				+	     (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
			
 
				+
			
 
				+	addr = zalloc(sz);
			
 
				+	if (!addr)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	evsel->prev_raw_counts =  addr;
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
			
 
				+static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
			
 
				 {
			
 
				-	return perf_evsel__cpus(evsel)->nr;
			
 
				+	free(evsel->prev_raw_counts);
			
 
				+	evsel->prev_raw_counts = NULL;
			
 
				 }
			
 
				 
			
 
				 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
			
@@ -132,8 +176,6 @@ static struct stats walltime_nsecs_stats;
 
				 static int create_perf_stat_counter(struct perf_evsel *evsel)
			
 
				 {
			
 
				 	struct perf_event_attr *attr = &evsel->attr;
			
 
				-	bool exclude_guest_missing = false;
			
 
				-	int ret;
			
 
				 
			
 
				 	if (scale)
			
 
				 		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
			
@@ -141,38 +183,16 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
 
				 
			
 
				 	attr->inherit = !no_inherit;
			
 
				 
			
 
				-retry:
			
 
				-	if (exclude_guest_missing)
			
 
				-		evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
			
 
				-
			
 
				-	if (perf_target__has_cpu(&target)) {
			
 
				-		ret = perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
			
 
				-		if (ret)
			
 
				-			goto check_ret;
			
 
				-		return 0;
			
 
				-	}
			
 
				+	if (perf_target__has_cpu(&target))
			
 
				+		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
			
 
				 
			
 
				 	if (!perf_target__has_task(&target) &&
			
 
				-	    !perf_evsel__is_group_member(evsel)) {
			
 
				+	    perf_evsel__is_group_leader(evsel)) {
			
 
				 		attr->disabled = 1;
			
 
				 		attr->enable_on_exec = 1;
			
 
				 	}
			
 
				 
			
 
				-	ret = perf_evsel__open_per_thread(evsel, evsel_list->threads);
			
 
				-	if (!ret)
			
 
				-		return 0;
			
 
				-	/* fall through */
			
 
				-check_ret:
			
 
				-	if (ret && errno == EINVAL) {
			
 
				-		if (!exclude_guest_missing &&
			
 
				-		    (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
			
 
				-			pr_debug("Old kernel, cannot exclude "
			
 
				-				 "guest or host samples.\n");
			
 
				-			exclude_guest_missing = true;
			
 
				-			goto retry;
			
 
				-		}
			
 
				-	}
			
 
				-	return ret;
			
 
				+	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -269,15 +289,79 @@ static int read_counter(struct perf_evsel *counter)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void print_interval(void)
			
 
				+{
			
 
				+	static int num_print_interval;
			
 
				+	struct perf_evsel *counter;
			
 
				+	struct perf_stat *ps;
			
 
				+	struct timespec ts, rs;
			
 
				+	char prefix[64];
			
 
				+
			
 
				+	if (no_aggr) {
			
 
				+		list_for_each_entry(counter, &evsel_list->entries, node) {
			
 
				+			ps = counter->priv;
			
 
				+			memset(ps->res_stats, 0, sizeof(ps->res_stats));
			
 
				+			read_counter(counter);
			
 
				+		}
			
 
				+	} else {
			
 
				+		list_for_each_entry(counter, &evsel_list->entries, node) {
			
 
				+			ps = counter->priv;
			
 
				+			memset(ps->res_stats, 0, sizeof(ps->res_stats));
			
 
				+			read_counter_aggr(counter);
			
 
				+		}
			
 
				+	}
			
 
				+	clock_gettime(CLOCK_MONOTONIC, &ts);
			
 
				+	diff_timespec(&rs, &ts, &ref_time);
			
 
				+	sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
			
 
				+
			
 
				+	if (num_print_interval == 0 && !csv_output) {
			
 
				+		if (aggr_socket)
			
 
				+			fprintf(output, "#           time socket cpus             counts events\n");
			
 
				+		else if (no_aggr)
			
 
				+			fprintf(output, "#           time CPU                 counts events\n");
			
 
				+		else
			
 
				+			fprintf(output, "#           time             counts events\n");
			
 
				+	}
			
 
				+
			
 
				+	if (++num_print_interval == 25)
			
 
				+		num_print_interval = 0;
			
 
				+
			
 
				+	if (aggr_socket)
			
 
				+		print_aggr_socket(prefix);
			
 
				+	else if (no_aggr) {
			
 
				+		list_for_each_entry(counter, &evsel_list->entries, node)
			
 
				+			print_counter(counter, prefix);
			
 
				+	} else {
			
 
				+		list_for_each_entry(counter, &evsel_list->entries, node)
			
 
				+			print_counter_aggr(counter, prefix);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static int __run_perf_stat(int argc __maybe_unused, const char **argv)
			
 
				 {
			
 
				+	char msg[512];
			
 
				 	unsigned long long t0, t1;
			
 
				 	struct perf_evsel *counter;
			
 
				+	struct timespec ts;
			
 
				 	int status = 0;
			
 
				 	int child_ready_pipe[2], go_pipe[2];
			
 
				 	const bool forks = (argc > 0);
			
 
				 	char buf;
			
 
				 
			
 
				+	if (interval) {
			
 
				+		ts.tv_sec  = interval / 1000;
			
 
				+		ts.tv_nsec = (interval % 1000) * 1000000;
			
 
				+	} else {
			
 
				+		ts.tv_sec  = 1;
			
 
				+		ts.tv_nsec = 0;
			
 
				+	}
			
 
				+
			
 
				+	if (aggr_socket
			
 
				+	    && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
			
 
				+		perror("cannot build socket map");
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				 	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
			
 
				 		perror("failed to create pipes");
			
 
				 		return -1;
			
@@ -348,20 +432,13 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
 
				 				continue;
			
 
				 			}
			
 
				 
			
 
				-			if (errno == EPERM || errno == EACCES) {
			
 
				-				error("You may not have permission to collect %sstats.\n"
			
 
				-				      "\t Consider tweaking"
			
 
				-				      " /proc/sys/kernel/perf_event_paranoid or running as root.",
			
 
				-				      target.system_wide ? "system-wide " : "");
			
 
				-			} else {
			
 
				-				error("open_counter returned with %d (%s). "
			
 
				-				      "/bin/dmesg may provide additional information.\n",
			
 
				-				       errno, strerror(errno));
			
 
				-			}
			
 
				+			perf_evsel__open_strerror(counter, &target,
			
 
				+						  errno, msg, sizeof(msg));
			
 
				+			ui__error("%s\n", msg);
			
 
				+
			
 
				 			if (child_pid != -1)
			
 
				 				kill(child_pid, SIGTERM);
			
 
				 
			
 
				-			pr_err("Not all events could be opened.\n");
			
 
				 			return -1;
			
 
				 		}
			
 
				 		counter->supported = true;
			
@@ -377,14 +454,25 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
 
				 	 * Enable counters and exec the command:
			
 
				 	 */
			
 
				 	t0 = rdclock();
			
 
				+	clock_gettime(CLOCK_MONOTONIC, &ref_time);
			
 
				 
			
 
				 	if (forks) {
			
 
				 		close(go_pipe[1]);
			
 
				+		if (interval) {
			
 
				+			while (!waitpid(child_pid, &status, WNOHANG)) {
			
 
				+				nanosleep(&ts, NULL);
			
 
				+				print_interval();
			
 
				+			}
			
 
				+		}
			
 
				 		wait(&status);
			
 
				 		if (WIFSIGNALED(status))
			
 
				 			psignal(WTERMSIG(status), argv[0]);
			
 
				 	} else {
			
 
				-		while(!done) sleep(1);
			
 
				+		while (!done) {
			
 
				+			nanosleep(&ts, NULL);
			
 
				+			if (interval)
			
 
				+				print_interval();
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	t1 = rdclock();
			
@@ -454,13 +542,21 @@ static void print_noise(struct perf_evsel *evsel, double avg)
 
				 	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
			
 
				 }
			
 
				 
			
 
				-static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
			
 
				+static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
			
 
				 {
			
 
				 	double msecs = avg / 1e6;
			
 
				 	char cpustr[16] = { '\0', };
			
 
				 	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
			
 
				 
			
 
				-	if (no_aggr)
			
 
				+	if (aggr_socket)
			
 
				+		sprintf(cpustr, "S%*d%s%*d%s",
			
 
				+			csv_output ? 0 : -5,
			
 
				+			cpu,
			
 
				+			csv_sep,
			
 
				+			csv_output ? 0 : 4,
			
 
				+			nr,
			
 
				+			csv_sep);
			
 
				+	else if (no_aggr)
			
 
				 		sprintf(cpustr, "CPU%*d%s",
			
 
				 			csv_output ? 0 : -4,
			
 
				 			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
			
@@ -470,7 +566,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
 
				 	if (evsel->cgrp)
			
 
				 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
			
 
				 
			
 
				-	if (csv_output)
			
 
				+	if (csv_output || interval)
			
 
				 		return;
			
 
				 
			
 
				 	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
			
@@ -659,7 +755,7 @@ static void print_ll_cache_misses(int cpu,
 
				 	fprintf(output, " of all LL-cache hits   ");
			
 
				 }
			
 
				 
			
 
				-static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
			
 
				+static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
			
 
				 {
			
 
				 	double total, ratio = 0.0;
			
 
				 	char cpustr[16] = { '\0', };
			
@@ -672,7 +768,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 
				 	else
			
 
				 		fmt = "%s%18.0f%s%-25s";
			
 
				 
			
 
				-	if (no_aggr)
			
 
				+	if (aggr_socket)
			
 
				+		sprintf(cpustr, "S%*d%s%*d%s",
			
 
				+			csv_output ? 0 : -5,
			
 
				+			cpu,
			
 
				+			csv_sep,
			
 
				+			csv_output ? 0 : 4,
			
 
				+			nr,
			
 
				+			csv_sep);
			
 
				+	else if (no_aggr)
			
 
				 		sprintf(cpustr, "CPU%*d%s",
			
 
				 			csv_output ? 0 : -4,
			
 
				 			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
			
@@ -684,12 +788,11 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 
				 	if (evsel->cgrp)
			
 
				 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
			
 
				 
			
 
				-	if (csv_output)
			
 
				+	if (csv_output || interval)
			
 
				 		return;
			
 
				 
			
 
				 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
			
 
				 		total = avg_stats(&runtime_cycles_stats[cpu]);
			
 
				-
			
 
				 		if (total)
			
 
				 			ratio = avg / total;
			
 
				 
			
@@ -779,16 +882,83 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void print_aggr_socket(char *prefix)
			
 
				+{
			
 
				+	struct perf_evsel *counter;
			
 
				+	u64 ena, run, val;
			
 
				+	int cpu, s, s2, sock, nr;
			
 
				+
			
 
				+	if (!sock_map)
			
 
				+		return;
			
 
				+
			
 
				+	for (s = 0; s < sock_map->nr; s++) {
			
 
				+		sock = cpu_map__socket(sock_map, s);
			
 
				+		list_for_each_entry(counter, &evsel_list->entries, node) {
			
 
				+			val = ena = run = 0;
			
 
				+			nr = 0;
			
 
				+			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
			
 
				+				s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
			
 
				+				if (s2 != sock)
			
 
				+					continue;
			
 
				+				val += counter->counts->cpu[cpu].val;
			
 
				+				ena += counter->counts->cpu[cpu].ena;
			
 
				+				run += counter->counts->cpu[cpu].run;
			
 
				+				nr++;
			
 
				+			}
			
 
				+			if (prefix)
			
 
				+				fprintf(output, "%s", prefix);
			
 
				+
			
 
				+			if (run == 0 || ena == 0) {
			
 
				+				fprintf(output, "S%*d%s%*d%s%*s%s%*s",
			
 
				+					csv_output ? 0 : -5,
			
 
				+					s,
			
 
				+					csv_sep,
			
 
				+					csv_output ? 0 : 4,
			
 
				+					nr,
			
 
				+					csv_sep,
			
 
				+					csv_output ? 0 : 18,
			
 
				+					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
			
 
				+					csv_sep,
			
 
				+					csv_output ? 0 : -24,
			
 
				+					perf_evsel__name(counter));
			
 
				+				if (counter->cgrp)
			
 
				+					fprintf(output, "%s%s",
			
 
				+						csv_sep, counter->cgrp->name);
			
 
				+
			
 
				+				fputc('\n', output);
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			if (nsec_counter(counter))
			
 
				+				nsec_printout(sock, nr, counter, val);
			
 
				+			else
			
 
				+				abs_printout(sock, nr, counter, val);
			
 
				+
			
 
				+			if (!csv_output) {
			
 
				+				print_noise(counter, 1.0);
			
 
				+
			
 
				+				if (run != ena)
			
 
				+					fprintf(output, "  (%.2f%%)",
			
 
				+						100.0 * run / ena);
			
 
				+			}
			
 
				+			fputc('\n', output);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Print out the results of a single counter:
			
 
				  * aggregated counts in system-wide mode
			
 
				  */
			
 
				-static void print_counter_aggr(struct perf_evsel *counter)
			
 
				+static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
			
 
				 {
			
 
				 	struct perf_stat *ps = counter->priv;
			
 
				 	double avg = avg_stats(&ps->res_stats[0]);
			
 
				 	int scaled = counter->counts->scaled;
			
 
				 
			
 
				+	if (prefix)
			
 
				+		fprintf(output, "%s", prefix);
			
 
				+
			
 
				 	if (scaled == -1) {
			
 
				 		fprintf(output, "%*s%s%*s",
			
 
				 			csv_output ? 0 : 18,
			
@@ -805,9 +975,9 @@ static void print_counter_aggr(struct perf_evsel *counter)
 
				 	}
			
 
				 
			
 
				 	if (nsec_counter(counter))
			
 
				-		nsec_printout(-1, counter, avg);
			
 
				+		nsec_printout(-1, 0, counter, avg);
			
 
				 	else
			
 
				-		abs_printout(-1, counter, avg);
			
 
				+		abs_printout(-1, 0, counter, avg);
			
 
				 
			
 
				 	print_noise(counter, avg);
			
 
				 
			
@@ -831,7 +1001,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
 
				  * Print out the results of a single counter:
			
 
				  * does not use aggregated count in system-wide
			
 
				  */
			
 
				-static void print_counter(struct perf_evsel *counter)
			
 
				+static void print_counter(struct perf_evsel *counter, char *prefix)
			
 
				 {
			
 
				 	u64 ena, run, val;
			
 
				 	int cpu;
			
@@ -840,6 +1010,10 @@ static void print_counter(struct perf_evsel *counter)
 
				 		val = counter->counts->cpu[cpu].val;
			
 
				 		ena = counter->counts->cpu[cpu].ena;
			
 
				 		run = counter->counts->cpu[cpu].run;
			
 
				+
			
 
				+		if (prefix)
			
 
				+			fprintf(output, "%s", prefix);
			
 
				+
			
 
				 		if (run == 0 || ena == 0) {
			
 
				 			fprintf(output, "CPU%*d%s%*s%s%*s",
			
 
				 				csv_output ? 0 : -4,
			
@@ -859,9 +1033,9 @@ static void print_counter(struct perf_evsel *counter)
 
				 		}
			
 
				 
			
 
				 		if (nsec_counter(counter))
			
 
				-			nsec_printout(cpu, counter, val);
			
 
				+			nsec_printout(cpu, 0, counter, val);
			
 
				 		else
			
 
				-			abs_printout(cpu, counter, val);
			
 
				+			abs_printout(cpu, 0, counter, val);
			
 
				 
			
 
				 		if (!csv_output) {
			
 
				 			print_noise(counter, 1.0);
			
@@ -899,12 +1073,14 @@ static void print_stat(int argc, const char **argv)
 
				 		fprintf(output, ":\n\n");
			
 
				 	}
			
 
				 
			
 
				-	if (no_aggr) {
			
 
				+	if (aggr_socket)
			
 
				+		print_aggr_socket(NULL);
			
 
				+	else if (no_aggr) {
			
 
				 		list_for_each_entry(counter, &evsel_list->entries, node)
			
 
				-			print_counter(counter);
			
 
				+			print_counter(counter, NULL);
			
 
				 	} else {
			
 
				 		list_for_each_entry(counter, &evsel_list->entries, node)
			
 
				-			print_counter_aggr(counter);
			
 
				+			print_counter_aggr(counter, NULL);
			
 
				 	}
			
 
				 
			
 
				 	if (!csv_output) {
			
@@ -925,7 +1101,7 @@ static volatile int signr = -1;
 
				 
			
 
				 static void skip_signal(int signo)
			
 
				 {
			
 
				-	if(child_pid == -1)
			
 
				+	if ((child_pid == -1) || interval)
			
 
				 		done = 1;
			
 
				 
			
 
				 	signr = signo;
			
@@ -1145,6 +1321,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 			"command to run prior to the measured command"),
			
 
				 	OPT_STRING(0, "post", &post_cmd, "command",
			
 
				 			"command to run after to the measured command"),
			
 
				+	OPT_UINTEGER('I', "interval-print", &interval,
			
 
				+		    "print counts at regular interval in ms (>= 100)"),
			
 
				+	OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
			
 
				 	OPT_END()
			
 
				 	};
			
 
				 	const char * const stat_usage[] = {
			
@@ -1231,6 +1410,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		usage_with_options(stat_usage, options);
			
 
				 	}
			
 
				 
			
 
				+	if (aggr_socket) {
			
 
				+		if (!perf_target__has_cpu(&target)) {
			
 
				+			fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
			
 
				+			usage_with_options(stat_usage, options);
			
 
				+		}
			
 
				+		no_aggr = true;
			
 
				+	}
			
 
				+
			
 
				 	if (add_default_attributes())
			
 
				 		goto out;
			
 
				 
			
@@ -1245,12 +1432,23 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		usage_with_options(stat_usage, options);
			
 
				 		return -1;
			
 
				 	}
			
 
				+	if (interval && interval < 100) {
			
 
				+		pr_err("print interval must be >= 100ms\n");
			
 
				+		usage_with_options(stat_usage, options);
			
 
				+		return -1;
			
 
				+	}
			
 
				 
			
 
				 	list_for_each_entry(pos, &evsel_list->entries, node) {
			
 
				 		if (perf_evsel__alloc_stat_priv(pos) < 0 ||
			
 
				 		    perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0)
			
 
				 			goto out_free_fd;
			
 
				 	}
			
 
				+	if (interval) {
			
 
				+		list_for_each_entry(pos, &evsel_list->entries, node) {
			
 
				+			if (perf_evsel__alloc_prev_raw_counts(pos) < 0)
			
 
				+				goto out_free_fd;
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	/*
			
 
				 	 * We dont want to block the signals - that would cause
			
@@ -1260,6 +1458,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	 */
			
 
				 	atexit(sig_atexit);
			
 
				 	signal(SIGINT,  skip_signal);
			
 
				+	signal(SIGCHLD, skip_signal);
			
 
				 	signal(SIGALRM, skip_signal);
			
 
				 	signal(SIGABRT, skip_signal);
			
 
				 
			
@@ -1272,11 +1471,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		status = run_perf_stat(argc, argv);
			
 
				 	}
			
 
				 
			
 
				-	if (status != -1)
			
 
				+	if (status != -1 && !interval)
			
 
				 		print_stat(argc, argv);
			
 
				 out_free_fd:
			
 
				-	list_for_each_entry(pos, &evsel_list->entries, node)
			
 
				+	list_for_each_entry(pos, &evsel_list->entries, node) {
			
 
				 		perf_evsel__free_stat_priv(pos);
			
 
				+		perf_evsel__free_counts(pos);
			
 
				+		perf_evsel__free_prev_raw_counts(pos);
			
 
				+	}
			
 
				 	perf_evlist__delete_maps(evsel_list);
			
 
				 out:
			
 
				 	perf_evlist__delete(evsel_list);
			
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -68,27 +68,7 @@
 
				 #include <linux/unistd.h>
			
 
				 #include <linux/types.h>
			
 
				 
			
 
				-void get_term_dimensions(struct winsize *ws)
			
 
				-{
			
 
				-	char *s = getenv("LINES");
			
 
				-
			
 
				-	if (s != NULL) {
			
 
				-		ws->ws_row = atoi(s);
			
 
				-		s = getenv("COLUMNS");
			
 
				-		if (s != NULL) {
			
 
				-			ws->ws_col = atoi(s);
			
 
				-			if (ws->ws_row && ws->ws_col)
			
 
				-				return;
			
 
				-		}
			
 
				-	}
			
 
				-#ifdef TIOCGWINSZ
			
 
				-	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
			
 
				-	    ws->ws_row && ws->ws_col)
			
 
				-		return;
			
 
				-#endif
			
 
				-	ws->ws_row = 25;
			
 
				-	ws->ws_col = 80;
			
 
				-}
			
 
				+static volatile int done;
			
 
				 
			
 
				 static void perf_top__update_print_entries(struct perf_top *top)
			
 
				 {
			
@@ -453,8 +433,10 @@ static int perf_top__key_mapped(struct perf_top *top, int c)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void perf_top__handle_keypress(struct perf_top *top, int c)
			
 
				+static bool perf_top__handle_keypress(struct perf_top *top, int c)
			
 
				 {
			
 
				+	bool ret = true;
			
 
				+
			
 
				 	if (!perf_top__key_mapped(top, c)) {
			
 
				 		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
			
 
				 		struct termios tc, save;
			
@@ -475,7 +457,7 @@ static void perf_top__handle_keypress(struct perf_top *top, int c)
 
				 
			
 
				 		tcsetattr(0, TCSAFLUSH, &save);
			
 
				 		if (!perf_top__key_mapped(top, c))
			
 
				-			return;
			
 
				+			return ret;
			
 
				 	}
			
 
				 
			
 
				 	switch (c) {
			
@@ -537,7 +519,8 @@ static void perf_top__handle_keypress(struct perf_top *top, int c)
 
				 			printf("exiting.\n");
			
 
				 			if (top->dump_symtab)
			
 
				 				perf_session__fprintf_dsos(top->session, stderr);
			
 
				-			exit(0);
			
 
				+			ret = false;
			
 
				+			break;
			
 
				 		case 's':
			
 
				 			perf_top__prompt_symbol(top, "Enter details symbol");
			
 
				 			break;
			
@@ -560,6 +543,8 @@ static void perf_top__handle_keypress(struct perf_top *top, int c)
 
				 		default:
			
 
				 			break;
			
 
				 	}
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static void perf_top__sort_new_samples(void *arg)
			
@@ -596,13 +581,12 @@ static void *display_thread_tui(void *arg)
 
				 	 * via --uid.
			
 
				 	 */
			
 
				 	list_for_each_entry(pos, &top->evlist->entries, node)
			
 
				-		pos->hists.uid_filter_str = top->target.uid_str;
			
 
				+		pos->hists.uid_filter_str = top->record_opts.target.uid_str;
			
 
				 
			
 
				 	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
			
 
				 				      &top->session->header.env);
			
 
				 
			
 
				-	exit_browser(0);
			
 
				-	exit(0);
			
 
				+	done = 1;
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
@@ -626,7 +610,7 @@ repeat:
 
				 	/* trash return*/
			
 
				 	getc(stdin);
			
 
				 
			
 
				-	while (1) {
			
 
				+	while (!done) {
			
 
				 		perf_top__print_sym_table(top);
			
 
				 		/*
			
 
				 		 * Either timeout expired or we got an EINTR due to SIGWINCH,
			
@@ -640,15 +624,14 @@ repeat:
 
				 				continue;
			
 
				 			/* Fall trhu */
			
 
				 		default:
			
 
				-			goto process_hotkey;
			
 
				+			c = getc(stdin);
			
 
				+			tcsetattr(0, TCSAFLUSH, &save);
			
 
				+
			
 
				+			if (perf_top__handle_keypress(top, c))
			
 
				+				goto repeat;
			
 
				+			done = 1;
			
 
				 		}
			
 
				 	}
			
 
				-process_hotkey:
			
 
				-	c = getc(stdin);
			
 
				-	tcsetattr(0, TCSAFLUSH, &save);
			
 
				-
			
 
				-	perf_top__handle_keypress(top, c);
			
 
				-	goto repeat;
			
 
				 
			
 
				 	return NULL;
			
 
				 }
			
@@ -716,7 +699,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 
				 		static struct intlist *seen;
			
 
				 
			
 
				 		if (!seen)
			
 
				-			seen = intlist__new();
			
 
				+			seen = intlist__new(NULL);
			
 
				 
			
 
				 		if (!intlist__has_entry(seen, event->ip.pid)) {
			
 
				 			pr_err("Can't find guest [%d]'s kernel information\n",
			
@@ -727,8 +710,8 @@ static void perf_event__process_sample(struct perf_tool *tool,
 
				 	}
			
 
				 
			
 
				 	if (!machine) {
			
 
				-		pr_err("%u unprocessable samples recorded.",
			
 
				-		       top->session->hists.stats.nr_unprocessable_samples++);
			
 
				+		pr_err("%u unprocessable samples recorded.\r",
			
 
				+		       top->session->stats.nr_unprocessable_samples++);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
@@ -847,13 +830,13 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 
				 			++top->us_samples;
			
 
				 			if (top->hide_user_symbols)
			
 
				 				continue;
			
 
				-			machine = perf_session__find_host_machine(session);
			
 
				+			machine = &session->machines.host;
			
 
				 			break;
			
 
				 		case PERF_RECORD_MISC_KERNEL:
			
 
				 			++top->kernel_samples;
			
 
				 			if (top->hide_kernel_symbols)
			
 
				 				continue;
			
 
				-			machine = perf_session__find_host_machine(session);
			
 
				+			machine = &session->machines.host;
			
 
				 			break;
			
 
				 		case PERF_RECORD_MISC_GUEST_KERNEL:
			
 
				 			++top->guest_kernel_samples;
			
@@ -878,7 +861,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 
				 			hists__inc_nr_events(&evsel->hists, event->header.type);
			
 
				 			machine__process_event(machine, event);
			
 
				 		} else
			
 
				-			++session->hists.stats.nr_unknown_events;
			
 
				+			++session->stats.nr_unknown_events;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -890,123 +873,42 @@ static void perf_top__mmap_read(struct perf_top *top)
 
				 		perf_top__mmap_read_idx(top, i);
			
 
				 }
			
 
				 
			
 
				-static void perf_top__start_counters(struct perf_top *top)
			
 
				+static int perf_top__start_counters(struct perf_top *top)
			
 
				 {
			
 
				+	char msg[512];
			
 
				 	struct perf_evsel *counter;
			
 
				 	struct perf_evlist *evlist = top->evlist;
			
 
				+	struct perf_record_opts *opts = &top->record_opts;
			
 
				 
			
 
				-	if (top->group)
			
 
				-		perf_evlist__set_leader(evlist);
			
 
				+	perf_evlist__config(evlist, opts);
			
 
				 
			
 
				 	list_for_each_entry(counter, &evlist->entries, node) {
			
 
				-		struct perf_event_attr *attr = &counter->attr;
			
 
				-
			
 
				-		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
			
 
				-
			
 
				-		if (top->freq) {
			
 
				-			attr->sample_type |= PERF_SAMPLE_PERIOD;
			
 
				-			attr->freq	  = 1;
			
 
				-			attr->sample_freq = top->freq;
			
 
				-		}
			
 
				-
			
 
				-		if (evlist->nr_entries > 1) {
			
 
				-			attr->sample_type |= PERF_SAMPLE_ID;
			
 
				-			attr->read_format |= PERF_FORMAT_ID;
			
 
				-		}
			
 
				-
			
 
				-		if (perf_target__has_cpu(&top->target))
			
 
				-			attr->sample_type |= PERF_SAMPLE_CPU;
			
 
				-
			
 
				-		if (symbol_conf.use_callchain)
			
 
				-			attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
			
 
				-
			
 
				-		attr->mmap = 1;
			
 
				-		attr->comm = 1;
			
 
				-		attr->inherit = top->inherit;
			
 
				-fallback_missing_features:
			
 
				-		if (top->exclude_guest_missing)
			
 
				-			attr->exclude_guest = attr->exclude_host = 0;
			
 
				-retry_sample_id:
			
 
				-		attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
			
 
				 try_again:
			
 
				 		if (perf_evsel__open(counter, top->evlist->cpus,
			
 
				 				     top->evlist->threads) < 0) {
			
 
				-			int err = errno;
			
 
				-
			
 
				-			if (err == EPERM || err == EACCES) {
			
 
				-				ui__error_paranoid();
			
 
				-				goto out_err;
			
 
				-			} else if (err == EINVAL) {
			
 
				-				if (!top->exclude_guest_missing &&
			
 
				-				    (attr->exclude_guest || attr->exclude_host)) {
			
 
				-					pr_debug("Old kernel, cannot exclude "
			
 
				-						 "guest or host samples.\n");
			
 
				-					top->exclude_guest_missing = true;
			
 
				-					goto fallback_missing_features;
			
 
				-				} else if (!top->sample_id_all_missing) {
			
 
				-					/*
			
 
				-					 * Old kernel, no attr->sample_id_type_all field
			
 
				-					 */
			
 
				-					top->sample_id_all_missing = true;
			
 
				-					goto retry_sample_id;
			
 
				-				}
			
 
				-			}
			
 
				-			/*
			
 
				-			 * If it's cycles then fall back to hrtimer
			
 
				-			 * based cpu-clock-tick sw counter, which
			
 
				-			 * is always available even if no PMU support:
			
 
				-			 */
			
 
				-			if ((err == ENOENT || err == ENXIO) &&
			
 
				-			    (attr->type == PERF_TYPE_HARDWARE) &&
			
 
				-			    (attr->config == PERF_COUNT_HW_CPU_CYCLES)) {
			
 
				-
			
 
				+			if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
			
 
				 				if (verbose)
			
 
				-					ui__warning("Cycles event not supported,\n"
			
 
				-						    "trying to fall back to cpu-clock-ticks\n");
			
 
				-
			
 
				-				attr->type = PERF_TYPE_SOFTWARE;
			
 
				-				attr->config = PERF_COUNT_SW_CPU_CLOCK;
			
 
				-				if (counter->name) {
			
 
				-					free(counter->name);
			
 
				-					counter->name = NULL;
			
 
				-				}
			
 
				+					ui__warning("%s\n", msg);
			
 
				 				goto try_again;
			
 
				 			}
			
 
				 
			
 
				-			if (err == ENOENT) {
			
 
				-				ui__error("The %s event is not supported.\n",
			
 
				-					  perf_evsel__name(counter));
			
 
				-				goto out_err;
			
 
				-			} else if (err == EMFILE) {
			
 
				-				ui__error("Too many events are opened.\n"
			
 
				-					    "Try again after reducing the number of events\n");
			
 
				-				goto out_err;
			
 
				-			} else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
			
 
				-				ui__error("\'precise\' request may not be supported. "
			
 
				-					  "Try removing 'p' modifier\n");
			
 
				-				goto out_err;
			
 
				-			}
			
 
				-
			
 
				-			ui__error("The sys_perf_event_open() syscall "
			
 
				-				    "returned with %d (%s).  /bin/dmesg "
			
 
				-				    "may provide additional information.\n"
			
 
				-				    "No CONFIG_PERF_EVENTS=y kernel support "
			
 
				-				    "configured?\n", err, strerror(err));
			
 
				+			perf_evsel__open_strerror(counter, &opts->target,
			
 
				+						  errno, msg, sizeof(msg));
			
 
				+			ui__error("%s\n", msg);
			
 
				 			goto out_err;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
			
 
				+	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
			
 
				 		ui__error("Failed to mmap with %d (%s)\n",
			
 
				 			    errno, strerror(errno));
			
 
				 		goto out_err;
			
 
				 	}
			
 
				 
			
 
				-	return;
			
 
				+	return 0;
			
 
				 
			
 
				 out_err:
			
 
				-	exit_browser(0);
			
 
				-	exit(0);
			
 
				+	return -1;
			
 
				 }
			
 
				 
			
 
				 static int perf_top__setup_sample_type(struct perf_top *top)
			
@@ -1016,7 +918,7 @@ static int perf_top__setup_sample_type(struct perf_top *top)
 
				 			ui__error("Selected -g but \"sym\" not present in --sort/-s.");
			
 
				 			return -EINVAL;
			
 
				 		}
			
 
				-	} else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
			
 
				+	} else if (callchain_param.mode != CHAIN_NONE) {
			
 
				 		if (callchain_register_param(&callchain_param) < 0) {
			
 
				 			ui__error("Can't register callchain params.\n");
			
 
				 			return -EINVAL;
			
@@ -1028,6 +930,7 @@ static int perf_top__setup_sample_type(struct perf_top *top)
 
				 
			
 
				 static int __cmd_top(struct perf_top *top)
			
 
				 {
			
 
				+	struct perf_record_opts *opts = &top->record_opts;
			
 
				 	pthread_t thread;
			
 
				 	int ret;
			
 
				 	/*
			
@@ -1042,26 +945,42 @@ static int __cmd_top(struct perf_top *top)
 
				 	if (ret)
			
 
				 		goto out_delete;
			
 
				 
			
 
				-	if (perf_target__has_task(&top->target))
			
 
				+	if (perf_target__has_task(&opts->target))
			
 
				 		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
			
 
				 						  perf_event__process,
			
 
				-						  &top->session->host_machine);
			
 
				+						  &top->session->machines.host);
			
 
				 	else
			
 
				 		perf_event__synthesize_threads(&top->tool, perf_event__process,
			
 
				-					       &top->session->host_machine);
			
 
				-	perf_top__start_counters(top);
			
 
				+					       &top->session->machines.host);
			
 
				+
			
 
				+	ret = perf_top__start_counters(top);
			
 
				+	if (ret)
			
 
				+		goto out_delete;
			
 
				+
			
 
				 	top->session->evlist = top->evlist;
			
 
				 	perf_session__set_id_hdr_size(top->session);
			
 
				 
			
 
				+	/*
			
 
				+	 * When perf is starting the traced process, all the events (apart from
			
 
				+	 * group members) have enable_on_exec=1 set, so don't spoil it by
			
 
				+	 * prematurely enabling them.
			
 
				+	 *
			
 
				+	 * XXX 'top' still doesn't start workloads like record, trace, but should,
			
 
				+	 * so leave the check here.
			
 
				+	 */
			
 
				+        if (!perf_target__none(&opts->target))
			
 
				+                perf_evlist__enable(top->evlist);
			
 
				+
			
 
				 	/* Wait for a minimal set of events before starting the snapshot */
			
 
				 	poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
			
 
				 
			
 
				 	perf_top__mmap_read(top);
			
 
				 
			
 
				+	ret = -1;
			
 
				 	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
			
 
				 							    display_thread), top)) {
			
 
				 		ui__error("Could not create display thread.\n");
			
 
				-		exit(-1);
			
 
				+		goto out_delete;
			
 
				 	}
			
 
				 
			
 
				 	if (top->realtime_prio) {
			
@@ -1070,11 +989,11 @@ static int __cmd_top(struct perf_top *top)
 
				 		param.sched_priority = top->realtime_prio;
			
 
				 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
			
 
				 			ui__error("Could not set realtime priority.\n");
			
 
				-			exit(-1);
			
 
				+			goto out_delete;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	while (1) {
			
 
				+	while (!done) {
			
 
				 		u64 hits = top->samples;
			
 
				 
			
 
				 		perf_top__mmap_read(top);
			
@@ -1083,126 +1002,67 @@ static int __cmd_top(struct perf_top *top)
 
				 			ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
			
 
				 	}
			
 
				 
			
 
				+	ret = 0;
			
 
				 out_delete:
			
 
				 	perf_session__delete(top->session);
			
 
				 	top->session = NULL;
			
 
				 
			
 
				-	return 0;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static int
			
 
				 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
			
 
				 {
			
 
				-	struct perf_top *top = (struct perf_top *)opt->value;
			
 
				-	char *tok, *tok2;
			
 
				-	char *endptr;
			
 
				-
			
 
				 	/*
			
 
				 	 * --no-call-graph
			
 
				 	 */
			
 
				-	if (unset) {
			
 
				-		top->dont_use_callchains = true;
			
 
				+	if (unset)
			
 
				 		return 0;
			
 
				-	}
			
 
				 
			
 
				 	symbol_conf.use_callchain = true;
			
 
				 
			
 
				-	if (!arg)
			
 
				-		return 0;
			
 
				-
			
 
				-	tok = strtok((char *)arg, ",");
			
 
				-	if (!tok)
			
 
				-		return -1;
			
 
				-
			
 
				-	/* get the output mode */
			
 
				-	if (!strncmp(tok, "graph", strlen(arg)))
			
 
				-		callchain_param.mode = CHAIN_GRAPH_ABS;
			
 
				-
			
 
				-	else if (!strncmp(tok, "flat", strlen(arg)))
			
 
				-		callchain_param.mode = CHAIN_FLAT;
			
 
				-
			
 
				-	else if (!strncmp(tok, "fractal", strlen(arg)))
			
 
				-		callchain_param.mode = CHAIN_GRAPH_REL;
			
 
				-
			
 
				-	else if (!strncmp(tok, "none", strlen(arg))) {
			
 
				-		callchain_param.mode = CHAIN_NONE;
			
 
				-		symbol_conf.use_callchain = false;
			
 
				-
			
 
				-		return 0;
			
 
				-	} else
			
 
				-		return -1;
			
 
				-
			
 
				-	/* get the min percentage */
			
 
				-	tok = strtok(NULL, ",");
			
 
				-	if (!tok)
			
 
				-		goto setup;
			
 
				-
			
 
				-	callchain_param.min_percent = strtod(tok, &endptr);
			
 
				-	if (tok == endptr)
			
 
				-		return -1;
			
 
				-
			
 
				-	/* get the print limit */
			
 
				-	tok2 = strtok(NULL, ",");
			
 
				-	if (!tok2)
			
 
				-		goto setup;
			
 
				-
			
 
				-	if (tok2[0] != 'c') {
			
 
				-		callchain_param.print_limit = strtod(tok2, &endptr);
			
 
				-		tok2 = strtok(NULL, ",");
			
 
				-		if (!tok2)
			
 
				-			goto setup;
			
 
				-	}
			
 
				-
			
 
				-	/* get the call chain order */
			
 
				-	if (!strcmp(tok2, "caller"))
			
 
				-		callchain_param.order = ORDER_CALLER;
			
 
				-	else if (!strcmp(tok2, "callee"))
			
 
				-		callchain_param.order = ORDER_CALLEE;
			
 
				-	else
			
 
				-		return -1;
			
 
				-setup:
			
 
				-	if (callchain_register_param(&callchain_param) < 0) {
			
 
				-		fprintf(stderr, "Can't register callchain params\n");
			
 
				-		return -1;
			
 
				-	}
			
 
				-	return 0;
			
 
				+	return record_parse_callchain_opt(opt, arg, unset);
			
 
				 }
			
 
				 
			
 
				 int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
			
 
				 {
			
 
				-	struct perf_evsel *pos;
			
 
				 	int status;
			
 
				 	char errbuf[BUFSIZ];
			
 
				 	struct perf_top top = {
			
 
				 		.count_filter	     = 5,
			
 
				 		.delay_secs	     = 2,
			
 
				-		.freq		     = 4000, /* 4 KHz */
			
 
				-		.mmap_pages	     = 128,
			
 
				-		.sym_pcnt_filter     = 5,
			
 
				-		.target		     = {
			
 
				-			.uses_mmap   = true,
			
 
				+		.record_opts = {
			
 
				+			.mmap_pages	= UINT_MAX,
			
 
				+			.user_freq	= UINT_MAX,
			
 
				+			.user_interval	= ULLONG_MAX,
			
 
				+			.freq		= 4000, /* 4 KHz */
			
 
				+			.target		     = {
			
 
				+				.uses_mmap   = true,
			
 
				+			},
			
 
				 		},
			
 
				+		.sym_pcnt_filter     = 5,
			
 
				 	};
			
 
				-	char callchain_default_opt[] = "fractal,0.5,callee";
			
 
				+	struct perf_record_opts *opts = &top.record_opts;
			
 
				+	struct perf_target *target = &opts->target;
			
 
				 	const struct option options[] = {
			
 
				 	OPT_CALLBACK('e', "event", &top.evlist, "event",
			
 
				 		     "event selector. use 'perf list' to list available events",
			
 
				 		     parse_events_option),
			
 
				-	OPT_INTEGER('c', "count", &top.default_interval,
			
 
				-		    "event period to sample"),
			
 
				-	OPT_STRING('p', "pid", &top.target.pid, "pid",
			
 
				+	OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
			
 
				+	OPT_STRING('p', "pid", &target->pid, "pid",
			
 
				 		    "profile events on existing process id"),
			
 
				-	OPT_STRING('t', "tid", &top.target.tid, "tid",
			
 
				+	OPT_STRING('t', "tid", &target->tid, "tid",
			
 
				 		    "profile events on existing thread id"),
			
 
				-	OPT_BOOLEAN('a', "all-cpus", &top.target.system_wide,
			
 
				+	OPT_BOOLEAN('a', "all-cpus", &target->system_wide,
			
 
				 			    "system-wide collection from all CPUs"),
			
 
				-	OPT_STRING('C', "cpu", &top.target.cpu_list, "cpu",
			
 
				+	OPT_STRING('C', "cpu", &target->cpu_list, "cpu",
			
 
				 		    "list of cpus to monitor"),
			
 
				 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
			
 
				 		   "file", "vmlinux pathname"),
			
 
				 	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
			
 
				 		    "hide kernel symbols"),
			
 
				-	OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
			
 
				+	OPT_UINTEGER('m', "mmap-pages", &opts->mmap_pages,
			
 
				+		     "number of mmap data pages"),
			
 
				 	OPT_INTEGER('r', "realtime", &top.realtime_prio,
			
 
				 		    "collect data with this RT SCHED_FIFO priority"),
			
 
				 	OPT_INTEGER('d', "delay", &top.delay_secs,
			
@@ -1211,16 +1071,14 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 			    "dump the symbol table used for profiling"),
			
 
				 	OPT_INTEGER('f', "count-filter", &top.count_filter,
			
 
				 		    "only display functions with more events than this"),
			
 
				-	OPT_BOOLEAN('g', "group", &top.group,
			
 
				+	OPT_BOOLEAN('g', "group", &opts->group,
			
 
				 			    "put the counters into a counter group"),
			
 
				-	OPT_BOOLEAN('i', "inherit", &top.inherit,
			
 
				-		    "child tasks inherit counters"),
			
 
				+	OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
			
 
				+		    "child tasks do not inherit counters"),
			
 
				 	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
			
 
				 		    "symbol to annotate"),
			
 
				-	OPT_BOOLEAN('z', "zero", &top.zero,
			
 
				-		    "zero history across updates"),
			
 
				-	OPT_INTEGER('F', "freq", &top.freq,
			
 
				-		    "profile at this frequency"),
			
 
				+	OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
			
 
				+	OPT_UINTEGER('F', "freq", &opts->user_freq, "profile at this frequency"),
			
 
				 	OPT_INTEGER('E', "entries", &top.print_entries,
			
 
				 		    "display this many functions"),
			
 
				 	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
			
@@ -1233,10 +1091,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		   "sort by key(s): pid, comm, dso, symbol, parent"),
			
 
				 	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
			
 
				 		    "Show a column with the number of samples"),
			
 
				-	OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
			
 
				-		     "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
			
 
				-		     "Default: fractal,0.5,callee", &parse_callchain_opt,
			
 
				-		     callchain_default_opt),
			
 
				+	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
			
 
				+			     "mode[,dump_size]", record_callchain_help,
			
 
				+			     &parse_callchain_opt, "fp"),
			
 
				 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
			
 
				 		    "Show a column with the sum of periods"),
			
 
				 	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
			
@@ -1251,7 +1108,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 		    "Display raw encoding of assembly instructions (default)"),
			
 
				 	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
			
 
				 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
			
 
				-	OPT_STRING('u', "uid", &top.target.uid_str, "user", "user to profile"),
			
 
				+	OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
			
 
				 	OPT_END()
			
 
				 	};
			
 
				 	const char * const top_usage[] = {
			
@@ -1272,7 +1129,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	if (sort_order == default_sort_order)
			
 
				 		sort_order = "dso,symbol";
			
 
				 
			
 
				-	setup_sorting(top_usage, options);
			
 
				+	if (setup_sorting() < 0)
			
 
				+		usage_with_options(top_usage, options);
			
 
				 
			
 
				 	if (top.use_stdio)
			
 
				 		use_browser = 0;
			
@@ -1281,33 +1139,33 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 
			
 
				 	setup_browser(false);
			
 
				 
			
 
				-	status = perf_target__validate(&top.target);
			
 
				+	status = perf_target__validate(target);
			
 
				 	if (status) {
			
 
				-		perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
			
 
				+		perf_target__strerror(target, status, errbuf, BUFSIZ);
			
 
				 		ui__warning("%s", errbuf);
			
 
				 	}
			
 
				 
			
 
				-	status = perf_target__parse_uid(&top.target);
			
 
				+	status = perf_target__parse_uid(target);
			
 
				 	if (status) {
			
 
				 		int saved_errno = errno;
			
 
				 
			
 
				-		perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
			
 
				+		perf_target__strerror(target, status, errbuf, BUFSIZ);
			
 
				 		ui__error("%s", errbuf);
			
 
				 
			
 
				 		status = -saved_errno;
			
 
				 		goto out_delete_evlist;
			
 
				 	}
			
 
				 
			
 
				-	if (perf_target__none(&top.target))
			
 
				-		top.target.system_wide = true;
			
 
				+	if (perf_target__none(target))
			
 
				+		target->system_wide = true;
			
 
				 
			
 
				-	if (perf_evlist__create_maps(top.evlist, &top.target) < 0)
			
 
				+	if (perf_evlist__create_maps(top.evlist, target) < 0)
			
 
				 		usage_with_options(top_usage, options);
			
 
				 
			
 
				 	if (!top.evlist->nr_entries &&
			
 
				 	    perf_evlist__add_default(top.evlist) < 0) {
			
 
				 		ui__error("Not enough memory for event selector list\n");
			
 
				-		return -ENOMEM;
			
 
				+		goto out_delete_maps;
			
 
				 	}
			
 
				 
			
 
				 	symbol_conf.nr_events = top.evlist->nr_entries;
			
@@ -1315,24 +1173,22 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 	if (top.delay_secs < 1)
			
 
				 		top.delay_secs = 1;
			
 
				 
			
 
				+	if (opts->user_interval != ULLONG_MAX)
			
 
				+		opts->default_interval = opts->user_interval;
			
 
				+	if (opts->user_freq != UINT_MAX)
			
 
				+		opts->freq = opts->user_freq;
			
 
				+
			
 
				 	/*
			
 
				 	 * User specified count overrides default frequency.
			
 
				 	 */
			
 
				-	if (top.default_interval)
			
 
				-		top.freq = 0;
			
 
				-	else if (top.freq) {
			
 
				-		top.default_interval = top.freq;
			
 
				+	if (opts->default_interval)
			
 
				+		opts->freq = 0;
			
 
				+	else if (opts->freq) {
			
 
				+		opts->default_interval = opts->freq;
			
 
				 	} else {
			
 
				 		ui__error("frequency and count are zero, aborting\n");
			
 
				-		exit(EXIT_FAILURE);
			
 
				-	}
			
 
				-
			
 
				-	list_for_each_entry(pos, &top.evlist->entries, node) {
			
 
				-		/*
			
 
				-		 * Fill in the ones not specifically initialized via -c:
			
 
				-		 */
			
 
				-		if (!pos->attr.sample_period)
			
 
				-			pos->attr.sample_period = top.default_interval;
			
 
				+		status = -EINVAL;
			
 
				+		goto out_delete_maps;
			
 
				 	}
			
 
				 
			
 
				 	top.sym_evsel = perf_evlist__first(top.evlist);
			
@@ -1365,6 +1221,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
				 
			
 
				 	status = __cmd_top(&top);
			
 
				 
			
 
				+out_delete_maps:
			
 
				+	perf_evlist__delete_maps(top.evlist);
			
 
				 out_delete_evlist:
			
 
				 	perf_evlist__delete(top.evlist);
			
 
				 
			
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -455,7 +455,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 
				 		goto out_delete_evlist;
			
 
				 	}
			
 
				 
			
 
				-	perf_evlist__config_attrs(evlist, &trace->opts);
			
 
				+	perf_evlist__config(evlist, &trace->opts);
			
 
				 
			
 
				 	signal(SIGCHLD, sig_handler);
			
 
				 	signal(SIGINT, sig_handler);
			
--- a/tools/perf/config/feature-tests.mak
+++ b/tools/perf/config/feature-tests.mak
@@ -225,3 +225,14 @@ int main(void)
 
				 	return on_exit(NULL, NULL);
			
 
				 }
			
 
				 endef
			
 
				+
			
 
				+define SOURCE_LIBNUMA
			
 
				+#include <numa.h>
			
 
				+#include <numaif.h>
			
 
				+
			
 
				+int main(void)
			
 
				+{
			
 
				+	numa_available();
			
 
				+	return 0;
			
 
				+}
			
 
				+endef
			
--- a/tools/perf/config/utilities.mak
+++ b/tools/perf/config/utilities.mak
@@ -13,7 +13,7 @@ newline := $(newline)
 
				 # what should replace a newline when escaping
			
 
				 # newlines; the default is a bizarre string.
			
 
				 #
			
 
				-nl-escape = $(or $(1),m822df3020w6a44id34bt574ctac44eb9f4n)
			
 
				+nl-escape = $(if $(1),$(1),m822df3020w6a44id34bt574ctac44eb9f4n)
			
 
				 
			
 
				 # escape-nl
			
 
				 #
			
@@ -173,9 +173,9 @@ _ge-abspath = $(if $(is-executable),$(1))
 
				 # Usage: absolute-executable-path-or-empty = $(call get-executable-or-default,variable,default)
			
 
				 #
			
 
				 define get-executable-or-default
			
 
				-$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
			
 
				+$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2),$(1)))
			
 
				 endef
			
 
				-_ge_attempt = $(or $(get-executable),$(_gea_warn),$(call _gea_err,$(2)))
			
 
				+_ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2)))
			
 
				 _gea_warn = $(warning The path '$(1)' is not executable.)
			
 
				 _gea_err  = $(if $(1),$(error Please set '$(1)' appropriately))
			
 
				 
			
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -328,14 +328,23 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 
				 	if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode))
			
 
				 		return 0;
			
 
				 
			
 
				+	status = 1;
			
 
				 	/* Check for ENOSPC and EIO errors.. */
			
 
				-	if (fflush(stdout))
			
 
				-		die("write failure on standard output: %s", strerror(errno));
			
 
				-	if (ferror(stdout))
			
 
				-		die("unknown write failure on standard output");
			
 
				-	if (fclose(stdout))
			
 
				-		die("close failed on standard output: %s", strerror(errno));
			
 
				-	return 0;
			
 
				+	if (fflush(stdout)) {
			
 
				+		fprintf(stderr, "write failure on standard output: %s", strerror(errno));
			
 
				+		goto out;
			
 
				+	}
			
 
				+	if (ferror(stdout)) {
			
 
				+		fprintf(stderr, "unknown write failure on standard output");
			
 
				+		goto out;
			
 
				+	}
			
 
				+	if (fclose(stdout)) {
			
 
				+		fprintf(stderr, "close failed on standard output: %s", strerror(errno));
			
 
				+		goto out;
			
 
				+	}
			
 
				+	status = 0;
			
 
				+out:
			
 
				+	return status;
			
 
				 }
			
 
				 
			
 
				 static void handle_internal_command(int argc, const char **argv)
			
@@ -467,7 +476,8 @@ int main(int argc, const char **argv)
 
				 		cmd += 5;
			
 
				 		argv[0] = cmd;
			
 
				 		handle_internal_command(argc, argv);
			
 
				-		die("cannot handle %s internally", cmd);
			
 
				+		fprintf(stderr, "cannot handle %s internally", cmd);
			
 
				+		goto out;
			
 
				 	}
			
 
				 
			
 
				 	/* Look for flags.. */
			
@@ -485,7 +495,7 @@ int main(int argc, const char **argv)
 
				 		printf("\n usage: %s\n\n", perf_usage_string);
			
 
				 		list_common_cmds_help();
			
 
				 		printf("\n %s\n\n", perf_more_info_string);
			
 
				-		exit(1);
			
 
				+		goto out;
			
 
				 	}
			
 
				 	cmd = argv[0];
			
 
				 
			
@@ -517,7 +527,7 @@ int main(int argc, const char **argv)
 
				 			fprintf(stderr, "Expansion of alias '%s' failed; "
			
 
				 				"'%s' is not a perf-command\n",
			
 
				 				cmd, argv[0]);
			
 
				-			exit(1);
			
 
				+			goto out;
			
 
				 		}
			
 
				 		if (!done_help) {
			
 
				 			cmd = argv[0] = help_unknown_cmd(cmd);
			
@@ -528,6 +538,6 @@ int main(int argc, const char **argv)
 
				 
			
 
				 	fprintf(stderr, "Failed to run command '%s': %s\n",
			
 
				 		cmd, strerror(errno));
			
 
				-
			
 
				+out:
			
 
				 	return 1;
			
 
				 }