Browse Source

Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing into perf/core

Ingo Molnar 15 years ago
parent
commit
0806ebd974

+ 11 - 0
arch/Kconfig

@@ -137,6 +137,17 @@ config HAVE_HW_BREAKPOINT
 	bool
 	bool
 	depends on PERF_EVENTS
 	depends on PERF_EVENTS
 
 
+config HAVE_MIXED_BREAKPOINTS_REGS
+	bool
+	depends on HAVE_HW_BREAKPOINT
+	help
+	  Depending on the arch implementation of hardware breakpoints,
+	  some of them have separate registers for data and instruction
+	  breakpoints addresses, others have mixed registers to store
+	  them but define the access type in a control register.
+	  Select this option if your arch implements breakpoints under the
+	  latter fashion.
+
 config HAVE_USER_RETURN_NOTIFIER
 config HAVE_USER_RETURN_NOTIFIER
 	bool
 	bool
 
 

+ 1 - 0
arch/sh/Kconfig

@@ -44,6 +44,7 @@ config SUPERH32
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_KGDB
 	select HAVE_HW_BREAKPOINT
 	select HAVE_HW_BREAKPOINT
+	select HAVE_MIXED_BREAKPOINTS_REGS
 	select PERF_EVENTS if HAVE_HW_BREAKPOINT
 	select PERF_EVENTS if HAVE_HW_BREAKPOINT
 	select ARCH_HIBERNATION_POSSIBLE if MMU
 	select ARCH_HIBERNATION_POSSIBLE if MMU
 
 

+ 7 - 3
arch/sh/include/asm/hw_breakpoint.h

@@ -46,10 +46,14 @@ struct pmu;
 /* Maximum number of UBC channels */
 /* Maximum number of UBC channels */
 #define HBP_NUM		2
 #define HBP_NUM		2
 
 
+static inline int hw_breakpoint_slots(int type)
+{
+	return HBP_NUM;
+}
+
 /* arch/sh/kernel/hw_breakpoint.c */
 /* arch/sh/kernel/hw_breakpoint.c */
-extern int arch_check_va_in_userspace(unsigned long va, u16 hbp_len);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
-					 struct task_struct *tsk);
+extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
 					   unsigned long val, void *data);
 					   unsigned long val, void *data);
 
 

+ 7 - 27
arch/sh/kernel/hw_breakpoint.c

@@ -119,26 +119,17 @@ static int get_hbp_len(u16 hbp_len)
 	return len_in_bytes;
 	return len_in_bytes;
 }
 }
 
 
-/*
- * Check for virtual address in user space.
- */
-int arch_check_va_in_userspace(unsigned long va, u16 hbp_len)
-{
-	unsigned int len;
-
-	len = get_hbp_len(hbp_len);
-
-	return (va <= TASK_SIZE - len);
-}
-
 /*
 /*
  * Check for virtual address in kernel space.
  * Check for virtual address in kernel space.
  */
  */
-static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
+int arch_check_bp_in_kernelspace(struct perf_event *bp)
 {
 {
 	unsigned int len;
 	unsigned int len;
+	unsigned long va;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 
 
-	len = get_hbp_len(hbp_len);
+	va = info->address;
+	len = get_hbp_len(info->len);
 
 
 	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
 	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
 }
 }
@@ -226,8 +217,7 @@ static int arch_build_bp_info(struct perf_event *bp)
 /*
 /*
  * Validate the arch-specific HW Breakpoint register settings
  * Validate the arch-specific HW Breakpoint register settings
  */
  */
-int arch_validate_hwbkpt_settings(struct perf_event *bp,
-				  struct task_struct *tsk)
+int arch_validate_hwbkpt_settings(struct perf_event *bp)
 {
 {
 	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 	unsigned int align;
 	unsigned int align;
@@ -270,15 +260,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
 	if (info->address & align)
 	if (info->address & align)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	/* Check that the virtual address is in the proper range */
-	if (tsk) {
-		if (!arch_check_va_in_userspace(info->address, info->len))
-			return -EFAULT;
-	} else {
-		if (!arch_check_va_in_kernelspace(info->address, info->len))
-			return -EFAULT;
-	}
-
 	return 0;
 	return 0;
 }
 }
 
 
@@ -363,8 +344,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 		perf_bp_event(bp, args->regs);
 		perf_bp_event(bp, args->regs);
 
 
 		/* Deliver the signal to userspace */
 		/* Deliver the signal to userspace */
-		if (arch_check_va_in_userspace(bp->attr.bp_addr,
-					       bp->attr.bp_len)) {
+		if (!arch_check_bp_in_kernelspace(bp)) {
 			siginfo_t info;
 			siginfo_t info;
 
 
 			info.si_signo = args->signr;
 			info.si_signo = args->signr;

+ 1 - 1
arch/sh/kernel/ptrace_32.c

@@ -85,7 +85,7 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr)
 
 
 	bp = thread->ptrace_bps[0];
 	bp = thread->ptrace_bps[0];
 	if (!bp) {
 	if (!bp) {
-		hw_breakpoint_init(&attr);
+		ptrace_breakpoint_init(&attr);
 
 
 		attr.bp_addr = addr;
 		attr.bp_addr = addr;
 		attr.bp_len = HW_BREAKPOINT_LEN_2;
 		attr.bp_len = HW_BREAKPOINT_LEN_2;

+ 1 - 0
arch/x86/Kconfig

@@ -53,6 +53,7 @@ config X86
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_LZO
 	select HAVE_HW_BREAKPOINT
 	select HAVE_HW_BREAKPOINT
+	select HAVE_MIXED_BREAKPOINTS_REGS
 	select PERF_EVENTS
 	select PERF_EVENTS
 	select ANON_INODES
 	select ANON_INODES
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_ARCH_KMEMCHECK

+ 7 - 3
arch/x86/include/asm/hw_breakpoint.h

@@ -41,12 +41,16 @@ struct arch_hw_breakpoint {
 /* Total number of available HW breakpoint registers */
 /* Total number of available HW breakpoint registers */
 #define HBP_NUM 4
 #define HBP_NUM 4
 
 
+static inline int hw_breakpoint_slots(int type)
+{
+	return HBP_NUM;
+}
+
 struct perf_event;
 struct perf_event;
 struct pmu;
 struct pmu;
 
 
-extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
-					 struct task_struct *tsk);
+extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
 					   unsigned long val, void *data);
 					   unsigned long val, void *data);
 
 

+ 6 - 35
arch/x86/kernel/hw_breakpoint.c

@@ -188,26 +188,17 @@ static int get_hbp_len(u8 hbp_len)
 	return len_in_bytes;
 	return len_in_bytes;
 }
 }
 
 
-/*
- * Check for virtual address in user space.
- */
-int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
-{
-	unsigned int len;
-
-	len = get_hbp_len(hbp_len);
-
-	return (va <= TASK_SIZE - len);
-}
-
 /*
 /*
  * Check for virtual address in kernel space.
  * Check for virtual address in kernel space.
  */
  */
-static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
+int arch_check_bp_in_kernelspace(struct perf_event *bp)
 {
 {
 	unsigned int len;
 	unsigned int len;
+	unsigned long va;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 
 
-	len = get_hbp_len(hbp_len);
+	va = info->address;
+	len = get_hbp_len(info->len);
 
 
 	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
 	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
 }
 }
@@ -300,8 +291,7 @@ static int arch_build_bp_info(struct perf_event *bp)
 /*
 /*
  * Validate the arch-specific HW Breakpoint register settings
  * Validate the arch-specific HW Breakpoint register settings
  */
  */
-int arch_validate_hwbkpt_settings(struct perf_event *bp,
-				  struct task_struct *tsk)
+int arch_validate_hwbkpt_settings(struct perf_event *bp)
 {
 {
 	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 	unsigned int align;
 	unsigned int align;
@@ -314,16 +304,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
 
 
 	ret = -EINVAL;
 	ret = -EINVAL;
 
 
-	if (info->type == X86_BREAKPOINT_EXECUTE)
-		/*
-		 * Ptrace-refactoring code
-		 * For now, we'll allow instruction breakpoint only for user-space
-		 * addresses
-		 */
-		if ((!arch_check_va_in_userspace(info->address, info->len)) &&
-			info->len != X86_BREAKPOINT_EXECUTE)
-			return ret;
-
 	switch (info->len) {
 	switch (info->len) {
 	case X86_BREAKPOINT_LEN_1:
 	case X86_BREAKPOINT_LEN_1:
 		align = 0;
 		align = 0;
@@ -350,15 +330,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
 	if (info->address & align)
 	if (info->address & align)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	/* Check that the virtual address is in the proper range */
-	if (tsk) {
-		if (!arch_check_va_in_userspace(info->address, info->len))
-			return -EFAULT;
-	} else {
-		if (!arch_check_va_in_kernelspace(info->address, info->len))
-			return -EFAULT;
-	}
-
 	return 0;
 	return 0;
 }
 }
 
 

+ 1 - 1
arch/x86/kernel/ptrace.c

@@ -688,7 +688,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 	struct perf_event_attr attr;
 	struct perf_event_attr attr;
 
 
 	if (!t->ptrace_bps[nr]) {
 	if (!t->ptrace_bps[nr]) {
-		hw_breakpoint_init(&attr);
+		ptrace_breakpoint_init(&attr);
 		/*
 		/*
 		 * Put stub len and type to register (reserve) an inactive but
 		 * Put stub len and type to register (reserve) an inactive but
 		 * correct bp
 		 * correct bp

+ 22 - 3
include/linux/hw_breakpoint.h

@@ -9,9 +9,22 @@ enum {
 };
 };
 
 
 enum {
 enum {
-	HW_BREAKPOINT_R = 1,
-	HW_BREAKPOINT_W = 2,
-	HW_BREAKPOINT_X = 4,
+	HW_BREAKPOINT_EMPTY	= 0,
+	HW_BREAKPOINT_R		= 1,
+	HW_BREAKPOINT_W		= 2,
+	HW_BREAKPOINT_RW	= HW_BREAKPOINT_R | HW_BREAKPOINT_W,
+	HW_BREAKPOINT_X		= 4,
+	HW_BREAKPOINT_INVALID   = HW_BREAKPOINT_RW | HW_BREAKPOINT_X,
+};
+
+enum bp_type_idx {
+	TYPE_INST 	= 0,
+#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
+	TYPE_DATA	= 0,
+#else
+	TYPE_DATA	= 1,
+#endif
+	TYPE_MAX
 };
 };
 
 
 #ifdef __KERNEL__
 #ifdef __KERNEL__
@@ -34,6 +47,12 @@ static inline void hw_breakpoint_init(struct perf_event_attr *attr)
 	attr->sample_period = 1;
 	attr->sample_period = 1;
 }
 }
 
 
+static inline void ptrace_breakpoint_init(struct perf_event_attr *attr)
+{
+	hw_breakpoint_init(attr);
+	attr->exclude_kernel = 1;
+}
+
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 {
 	return bp->attr.bp_addr;
 	return bp->attr.bp_addr;

+ 148 - 48
kernel/hw_breakpoint.c

@@ -40,23 +40,29 @@
 #include <linux/percpu.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/smp.h>
 
 
 #include <linux/hw_breakpoint.h>
 #include <linux/hw_breakpoint.h>
 
 
+
 /*
 /*
  * Constraints data
  * Constraints data
  */
  */
 
 
 /* Number of pinned cpu breakpoints in a cpu */
 /* Number of pinned cpu breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
+static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
 
 
 /* Number of pinned task breakpoints in a cpu */
 /* Number of pinned task breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]);
+static DEFINE_PER_CPU(unsigned int, *nr_task_bp_pinned[TYPE_MAX]);
 
 
 /* Number of non-pinned cpu/task breakpoints in a cpu */
 /* Number of non-pinned cpu/task breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
+static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
+
+static int nr_slots[TYPE_MAX];
+
+static int constraints_initialized;
 
 
 /* Gather the number of total pinned and un-pinned bp in a cpuset */
 /* Gather the number of total pinned and un-pinned bp in a cpuset */
 struct bp_busy_slots {
 struct bp_busy_slots {
@@ -67,16 +73,29 @@ struct bp_busy_slots {
 /* Serialize accesses to the above constraints */
 /* Serialize accesses to the above constraints */
 static DEFINE_MUTEX(nr_bp_mutex);
 static DEFINE_MUTEX(nr_bp_mutex);
 
 
+__weak int hw_breakpoint_weight(struct perf_event *bp)
+{
+	return 1;
+}
+
+static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
+{
+	if (bp->attr.bp_type & HW_BREAKPOINT_RW)
+		return TYPE_DATA;
+
+	return TYPE_INST;
+}
+
 /*
 /*
  * Report the maximum number of pinned breakpoints a task
  * Report the maximum number of pinned breakpoints a task
  * have in this cpu
  * have in this cpu
  */
  */
-static unsigned int max_task_bp_pinned(int cpu)
+static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 {
 {
 	int i;
 	int i;
-	unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
+	unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
 
 
-	for (i = HBP_NUM -1; i >= 0; i--) {
+	for (i = nr_slots[type] - 1; i >= 0; i--) {
 		if (tsk_pinned[i] > 0)
 		if (tsk_pinned[i] > 0)
 			return i + 1;
 			return i + 1;
 	}
 	}
@@ -84,7 +103,7 @@ static unsigned int max_task_bp_pinned(int cpu)
 	return 0;
 	return 0;
 }
 }
 
 
-static int task_bp_pinned(struct task_struct *tsk)
+static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
 {
 {
 	struct perf_event_context *ctx = tsk->perf_event_ctxp;
 	struct perf_event_context *ctx = tsk->perf_event_ctxp;
 	struct list_head *list;
 	struct list_head *list;
@@ -105,7 +124,8 @@ static int task_bp_pinned(struct task_struct *tsk)
 	 */
 	 */
 	list_for_each_entry(bp, list, event_entry) {
 	list_for_each_entry(bp, list, event_entry) {
 		if (bp->attr.type == PERF_TYPE_BREAKPOINT)
 		if (bp->attr.type == PERF_TYPE_BREAKPOINT)
-			count++;
+			if (find_slot_idx(bp) == type)
+				count += hw_breakpoint_weight(bp);
 	}
 	}
 
 
 	raw_spin_unlock_irqrestore(&ctx->lock, flags);
 	raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -118,18 +138,19 @@ static int task_bp_pinned(struct task_struct *tsk)
  * a given cpu (cpu > -1) or in all of them (cpu = -1).
  * a given cpu (cpu > -1) or in all of them (cpu = -1).
  */
  */
 static void
 static void
-fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
+fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
+		    enum bp_type_idx type)
 {
 {
 	int cpu = bp->cpu;
 	int cpu = bp->cpu;
 	struct task_struct *tsk = bp->ctx->task;
 	struct task_struct *tsk = bp->ctx->task;
 
 
 	if (cpu >= 0) {
 	if (cpu >= 0) {
-		slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
+		slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
 		if (!tsk)
 		if (!tsk)
-			slots->pinned += max_task_bp_pinned(cpu);
+			slots->pinned += max_task_bp_pinned(cpu, type);
 		else
 		else
-			slots->pinned += task_bp_pinned(tsk);
-		slots->flexible = per_cpu(nr_bp_flexible, cpu);
+			slots->pinned += task_bp_pinned(tsk, type);
+		slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
 
 
 		return;
 		return;
 	}
 	}
@@ -137,48 +158,66 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
 	for_each_online_cpu(cpu) {
 	for_each_online_cpu(cpu) {
 		unsigned int nr;
 		unsigned int nr;
 
 
-		nr = per_cpu(nr_cpu_bp_pinned, cpu);
+		nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
 		if (!tsk)
 		if (!tsk)
-			nr += max_task_bp_pinned(cpu);
+			nr += max_task_bp_pinned(cpu, type);
 		else
 		else
-			nr += task_bp_pinned(tsk);
+			nr += task_bp_pinned(tsk, type);
 
 
 		if (nr > slots->pinned)
 		if (nr > slots->pinned)
 			slots->pinned = nr;
 			slots->pinned = nr;
 
 
-		nr = per_cpu(nr_bp_flexible, cpu);
+		nr = per_cpu(nr_bp_flexible[type], cpu);
 
 
 		if (nr > slots->flexible)
 		if (nr > slots->flexible)
 			slots->flexible = nr;
 			slots->flexible = nr;
 	}
 	}
 }
 }
 
 
+/*
+ * For now, continue to consider flexible as pinned, until we can
+ * ensure no flexible event can ever be scheduled before a pinned event
+ * in a same cpu.
+ */
+static void
+fetch_this_slot(struct bp_busy_slots *slots, int weight)
+{
+	slots->pinned += weight;
+}
+
 /*
 /*
  * Add a pinned breakpoint for the given task in our constraint table
  * Add a pinned breakpoint for the given task in our constraint table
  */
  */
-static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
+static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
+				enum bp_type_idx type, int weight)
 {
 {
 	unsigned int *tsk_pinned;
 	unsigned int *tsk_pinned;
-	int count = 0;
+	int old_count = 0;
+	int old_idx = 0;
+	int idx = 0;
 
 
-	count = task_bp_pinned(tsk);
+	old_count = task_bp_pinned(tsk, type);
+	old_idx = old_count - 1;
+	idx = old_idx + weight;
 
 
-	tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
+	tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
 	if (enable) {
 	if (enable) {
-		tsk_pinned[count]++;
-		if (count > 0)
-			tsk_pinned[count-1]--;
+		tsk_pinned[idx]++;
+		if (old_count > 0)
+			tsk_pinned[old_idx]--;
 	} else {
 	} else {
-		tsk_pinned[count]--;
-		if (count > 0)
-			tsk_pinned[count-1]++;
+		tsk_pinned[idx]--;
+		if (old_count > 0)
+			tsk_pinned[old_idx]++;
 	}
 	}
 }
 }
 
 
 /*
 /*
  * Add/remove the given breakpoint in our constraint table
  * Add/remove the given breakpoint in our constraint table
  */
  */
-static void toggle_bp_slot(struct perf_event *bp, bool enable)
+static void
+toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
+	       int weight)
 {
 {
 	int cpu = bp->cpu;
 	int cpu = bp->cpu;
 	struct task_struct *tsk = bp->ctx->task;
 	struct task_struct *tsk = bp->ctx->task;
@@ -186,20 +225,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
 	/* Pinned counter task profiling */
 	/* Pinned counter task profiling */
 	if (tsk) {
 	if (tsk) {
 		if (cpu >= 0) {
 		if (cpu >= 0) {
-			toggle_bp_task_slot(tsk, cpu, enable);
+			toggle_bp_task_slot(tsk, cpu, enable, type, weight);
 			return;
 			return;
 		}
 		}
 
 
 		for_each_online_cpu(cpu)
 		for_each_online_cpu(cpu)
-			toggle_bp_task_slot(tsk, cpu, enable);
+			toggle_bp_task_slot(tsk, cpu, enable, type, weight);
 		return;
 		return;
 	}
 	}
 
 
 	/* Pinned counter cpu profiling */
 	/* Pinned counter cpu profiling */
 	if (enable)
 	if (enable)
-		per_cpu(nr_cpu_bp_pinned, bp->cpu)++;
+		per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
 	else
 	else
-		per_cpu(nr_cpu_bp_pinned, bp->cpu)--;
+		per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
 }
 }
 
 
 /*
 /*
@@ -246,14 +285,29 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
 static int __reserve_bp_slot(struct perf_event *bp)
 static int __reserve_bp_slot(struct perf_event *bp)
 {
 {
 	struct bp_busy_slots slots = {0};
 	struct bp_busy_slots slots = {0};
+	enum bp_type_idx type;
+	int weight;
 
 
-	fetch_bp_busy_slots(&slots, bp);
+	/* We couldn't initialize breakpoint constraints on boot */
+	if (!constraints_initialized)
+		return -ENOMEM;
+
+	/* Basic checks */
+	if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
+	    bp->attr.bp_type == HW_BREAKPOINT_INVALID)
+		return -EINVAL;
+
+	type = find_slot_idx(bp);
+	weight = hw_breakpoint_weight(bp);
+
+	fetch_bp_busy_slots(&slots, bp, type);
+	fetch_this_slot(&slots, weight);
 
 
 	/* Flexible counters need to keep at least one slot */
 	/* Flexible counters need to keep at least one slot */
-	if (slots.pinned + (!!slots.flexible) == HBP_NUM)
+	if (slots.pinned + (!!slots.flexible) > nr_slots[type])
 		return -ENOSPC;
 		return -ENOSPC;
 
 
-	toggle_bp_slot(bp, true);
+	toggle_bp_slot(bp, true, type, weight);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -273,7 +327,12 @@ int reserve_bp_slot(struct perf_event *bp)
 
 
 static void __release_bp_slot(struct perf_event *bp)
 static void __release_bp_slot(struct perf_event *bp)
 {
 {
-	toggle_bp_slot(bp, false);
+	enum bp_type_idx type;
+	int weight;
+
+	type = find_slot_idx(bp);
+	weight = hw_breakpoint_weight(bp);
+	toggle_bp_slot(bp, false, type, weight);
 }
 }
 
 
 void release_bp_slot(struct perf_event *bp)
 void release_bp_slot(struct perf_event *bp)
@@ -308,6 +367,28 @@ int dbg_release_bp_slot(struct perf_event *bp)
 	return 0;
 	return 0;
 }
 }
 
 
+static int validate_hw_breakpoint(struct perf_event *bp)
+{
+	int ret;
+
+	ret = arch_validate_hwbkpt_settings(bp);
+	if (ret)
+		return ret;
+
+	if (arch_check_bp_in_kernelspace(bp)) {
+		if (bp->attr.exclude_kernel)
+			return -EINVAL;
+		/*
+		 * Don't let unprivileged users set a breakpoint in the trap
+		 * path to avoid trap recursion attacks.
+		 */
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+	}
+
+	return 0;
+}
+
 int register_perf_hw_breakpoint(struct perf_event *bp)
 int register_perf_hw_breakpoint(struct perf_event *bp)
 {
 {
 	int ret;
 	int ret;
@@ -316,17 +397,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
-	/*
-	 * Ptrace breakpoints can be temporary perf events only
-	 * meant to reserve a slot. In this case, it is created disabled and
-	 * we don't want to check the params right now (as we put a null addr)
-	 * But perf tools create events as disabled and we want to check
-	 * the params for them.
-	 * This is a quick hack that will be removed soon, once we remove
-	 * the tmp breakpoints from ptrace
-	 */
-	if (!bp->attr.disabled || !bp->overflow_handler)
-		ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+	ret = validate_hw_breakpoint(bp);
 
 
 	/* if arch_validate_hwbkpt_settings() fails then release bp slot */
 	/* if arch_validate_hwbkpt_settings() fails then release bp slot */
 	if (ret)
 	if (ret)
@@ -373,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
 	if (attr->disabled)
 	if (attr->disabled)
 		goto end;
 		goto end;
 
 
-	err = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+	err = validate_hw_breakpoint(bp);
 	if (!err)
 	if (!err)
 		perf_event_enable(bp);
 		perf_event_enable(bp);
 
 
@@ -480,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
 
 
 static int __init init_hw_breakpoint(void)
 static int __init init_hw_breakpoint(void)
 {
 {
+	unsigned int **task_bp_pinned;
+	int cpu, err_cpu;
+	int i;
+
+	for (i = 0; i < TYPE_MAX; i++)
+		nr_slots[i] = hw_breakpoint_slots(i);
+
+	for_each_possible_cpu(cpu) {
+		for (i = 0; i < TYPE_MAX; i++) {
+			task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu);
+			*task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i],
+						  GFP_KERNEL);
+			if (!*task_bp_pinned)
+				goto err_alloc;
+		}
+	}
+
+	constraints_initialized = 1;
+
 	return register_die_notifier(&hw_breakpoint_exceptions_nb);
 	return register_die_notifier(&hw_breakpoint_exceptions_nb);
+
+ err_alloc:
+	for_each_possible_cpu(err_cpu) {
+		if (err_cpu == cpu)
+			break;
+		for (i = 0; i < TYPE_MAX; i++)
+			kfree(per_cpu(nr_task_bp_pinned[i], cpu));
+	}
+
+	return -ENOMEM;
 }
 }
 core_initcall(init_hw_breakpoint);
 core_initcall(init_hw_breakpoint);
 
 

+ 7 - 19
kernel/trace/trace_ksym.c

@@ -34,12 +34,6 @@
 
 
 #include <asm/atomic.h>
 #include <asm/atomic.h>
 
 
-/*
- * For now, let us restrict the no. of symbols traced simultaneously to number
- * of available hardware breakpoint registers.
- */
-#define KSYM_TRACER_MAX HBP_NUM
-
 #define KSYM_TRACER_OP_LEN 3 /* rw- */
 #define KSYM_TRACER_OP_LEN 3 /* rw- */
 
 
 struct trace_ksym {
 struct trace_ksym {
@@ -53,7 +47,6 @@ struct trace_ksym {
 
 
 static struct trace_array *ksym_trace_array;
 static struct trace_array *ksym_trace_array;
 
 
-static unsigned int ksym_filter_entry_count;
 static unsigned int ksym_tracing_enabled;
 static unsigned int ksym_tracing_enabled;
 
 
 static HLIST_HEAD(ksym_filter_head);
 static HLIST_HEAD(ksym_filter_head);
@@ -181,13 +174,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
 	struct trace_ksym *entry;
 	struct trace_ksym *entry;
 	int ret = -ENOMEM;
 	int ret = -ENOMEM;
 
 
-	if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
-		printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
-		" new requests for tracing can be accepted now.\n",
-			KSYM_TRACER_MAX);
-		return -ENOSPC;
-	}
-
 	entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
 	entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
 	if (!entry)
 	if (!entry)
 		return -ENOMEM;
 		return -ENOMEM;
@@ -203,13 +189,17 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
 
 
 	if (IS_ERR(entry->ksym_hbp)) {
 	if (IS_ERR(entry->ksym_hbp)) {
 		ret = PTR_ERR(entry->ksym_hbp);
 		ret = PTR_ERR(entry->ksym_hbp);
-		printk(KERN_INFO "ksym_tracer request failed. Try again"
-					" later!!\n");
+		if (ret == -ENOSPC) {
+			printk(KERN_ERR "ksym_tracer: Maximum limit reached."
+			" No new requests for tracing can be accepted now.\n");
+		} else {
+			printk(KERN_INFO "ksym_tracer request failed. Try again"
+					 " later!!\n");
+		}
 		goto err;
 		goto err;
 	}
 	}
 
 
 	hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
 	hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
-	ksym_filter_entry_count++;
 
 
 	return 0;
 	return 0;
 
 
@@ -265,7 +255,6 @@ static void __ksym_trace_reset(void)
 	hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
 	hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
 								ksym_hlist) {
 								ksym_hlist) {
 		unregister_wide_hw_breakpoint(entry->ksym_hbp);
 		unregister_wide_hw_breakpoint(entry->ksym_hbp);
-		ksym_filter_entry_count--;
 		hlist_del_rcu(&(entry->ksym_hlist));
 		hlist_del_rcu(&(entry->ksym_hlist));
 		synchronize_rcu();
 		synchronize_rcu();
 		kfree(entry);
 		kfree(entry);
@@ -338,7 +327,6 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 				goto out_unlock;
 				goto out_unlock;
 		}
 		}
 		/* Error or "symbol:---" case: drop it */
 		/* Error or "symbol:---" case: drop it */
-		ksym_filter_entry_count--;
 		hlist_del_rcu(&(entry->ksym_hlist));
 		hlist_del_rcu(&(entry->ksym_hlist));
 		synchronize_rcu();
 		synchronize_rcu();
 		kfree(entry);
 		kfree(entry);

+ 2 - 4
tools/perf/Documentation/perf-trace-perl.txt

@@ -49,12 +49,10 @@ available as calls back into the perf executable (see below).
 As an example, the following perf record command can be used to record
 As an example, the following perf record command can be used to record
 all sched_wakeup events in the system:
 all sched_wakeup events in the system:
 
 
- # perf record -c 1 -f -a -M -R -e sched:sched_wakeup
+ # perf record -a -e sched:sched_wakeup
 
 
 Traces meant to be processed using a script should be recorded with
 Traces meant to be processed using a script should be recorded with
-the above options: -c 1 says to sample every event, -a to enable
-system-wide collection, -M to multiplex the output, and -R to collect
-raw samples.
+the above option: -a to enable system-wide collection.
 
 
 The format file for the sched_wakep event defines the following fields
 The format file for the sched_wakep event defines the following fields
 (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
 (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):

+ 4 - 6
tools/perf/Documentation/perf-trace-python.txt

@@ -93,7 +93,7 @@ don't care how it exited, so we'll use 'perf record' to record only
 the sys_enter events:
 the sys_enter events:
 
 
 ----
 ----
-# perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter
+# perf record -a -e raw_syscalls:sys_enter
 
 
 ^C[ perf record: Woken up 1 times to write data ]
 ^C[ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ]
 [ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ]
@@ -359,7 +359,7 @@ your script:
 # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record
 # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record
 
 
 #!/bin/bash
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter
+perf record -a -e raw_syscalls:sys_enter
 ----
 ----
 
 
 The 'report' script is also a shell script with the same base name as
 The 'report' script is also a shell script with the same base name as
@@ -449,12 +449,10 @@ available as calls back into the perf executable (see below).
 As an example, the following perf record command can be used to record
 As an example, the following perf record command can be used to record
 all sched_wakeup events in the system:
 all sched_wakeup events in the system:
 
 
- # perf record -c 1 -f -a -M -R -e sched:sched_wakeup
+ # perf record -a -e sched:sched_wakeup
 
 
 Traces meant to be processed using a script should be recorded with
 Traces meant to be processed using a script should be recorded with
-the above options: -c 1 says to sample every event, -a to enable
-system-wide collection, -M to multiplex the output, and -R to collect
-raw samples.
+the above option: -a to enable system-wide collection.
 
 
 The format file for the sched_wakep event defines the following fields
 The format file for the sched_wakep event defines the following fields
 (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
 (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):

+ 1 - 1
tools/perf/scripts/perl/bin/check-perf-trace-record

@@ -1,2 +1,2 @@
 #!/bin/bash
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree
+perf record -a -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree

+ 1 - 1
tools/perf/scripts/perl/bin/failed-syscalls-record

@@ -1,2 +1,2 @@
 #!/bin/bash
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit $@
+perf record -a -e raw_syscalls:sys_exit $@

+ 1 - 1
tools/perf/scripts/perl/bin/rw-by-file-record

@@ -1,3 +1,3 @@
 #!/bin/bash
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@
+perf record -a -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@
 
 

+ 1 - 1
tools/perf/scripts/perl/bin/rw-by-pid-record

@@ -1,2 +1,2 @@
 #!/bin/bash
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
+perf record -a -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@

+ 1 - 1
tools/perf/scripts/perl/bin/rwtop-record

@@ -1,2 +1,2 @@
 #!/bin/bash
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
+perf record -a -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@

+ 1 - 1
tools/perf/scripts/perl/bin/wakeup-latency-record

@@ -1,5 +1,5 @@
 #!/bin/bash
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e sched:sched_switch -e sched:sched_wakeup $@
+perf record -a -e sched:sched_switch -e sched:sched_wakeup $@
 
 
 
 
 
 

+ 1 - 1
tools/perf/scripts/perl/bin/workqueue-stats-record

@@ -1,2 +1,2 @@
 #!/bin/bash
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@
+perf record -a -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@

+ 1 - 1
tools/perf/scripts/python/bin/failed-syscalls-by-pid-record

@@ -1,2 +1,2 @@
 #!/bin/bash
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit $@
+perf record -a -e raw_syscalls:sys_exit $@

+ 1 - 1
tools/perf/scripts/python/bin/sctop-record

@@ -1,2 +1,2 @@
 #!/bin/bash
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@
+perf record -a -e raw_syscalls:sys_enter $@

+ 1 - 1
tools/perf/scripts/python/bin/syscall-counts-by-pid-record

@@ -1,2 +1,2 @@
 #!/bin/bash
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@
+perf record -a -e raw_syscalls:sys_enter $@

+ 1 - 1
tools/perf/scripts/python/bin/syscall-counts-record

@@ -1,2 +1,2 @@
 #!/bin/bash
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@
+perf record -a -e raw_syscalls:sys_enter $@

+ 0 - 89
tools/perf/util/trace-event-parse.c

@@ -691,11 +691,6 @@ static int __read_expected(enum event_type expect, const char *str,
 	return ret;
 	return ret;
 }
 }
 
 
-static int read_expected_warn(enum event_type expect, const char *str, bool warn)
-{
-	return __read_expected(expect, str, 1, warn);
-}
-
 static int read_expected(enum event_type expect, const char *str)
 static int read_expected(enum event_type expect, const char *str)
 {
 {
 	return __read_expected(expect, str, 1, true);
 	return __read_expected(expect, str, 1, true);
@@ -3104,90 +3099,6 @@ static void print_args(struct print_arg *args)
 	}
 	}
 }
 }
 
 
-static void parse_header_field(const char *field,
-			       int *offset, int *size, bool warn)
-{
-	char *token;
-	int type;
-
-	if (read_expected(EVENT_ITEM, "field") < 0)
-		return;
-	if (read_expected(EVENT_OP, ":") < 0)
-		return;
-
-	/* type */
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
-		goto fail;
-	free_token(token);
-
-	if (read_expected_warn(EVENT_ITEM, field, warn) < 0)
-		return;
-	if (read_expected(EVENT_OP, ";") < 0)
-		return;
-	if (read_expected(EVENT_ITEM, "offset") < 0)
-		return;
-	if (read_expected(EVENT_OP, ":") < 0)
-		return;
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
-		goto fail;
-	*offset = atoi(token);
-	free_token(token);
-	if (read_expected(EVENT_OP, ";") < 0)
-		return;
-	if (read_expected(EVENT_ITEM, "size") < 0)
-		return;
-	if (read_expected(EVENT_OP, ":") < 0)
-		return;
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
-		goto fail;
-	*size = atoi(token);
-	free_token(token);
-	if (read_expected(EVENT_OP, ";") < 0)
-		return;
-	type = read_token(&token);
-	if (type != EVENT_NEWLINE) {
-		/* newer versions of the kernel have a "signed" type */
-		if (type != EVENT_ITEM)
-			goto fail;
-
-		if (strcmp(token, "signed") != 0)
-			goto fail;
-
-		free_token(token);
-
-		if (read_expected(EVENT_OP, ":") < 0)
-			return;
-
-		if (read_expect_type(EVENT_ITEM, &token))
-			goto fail;
-
-		free_token(token);
-		if (read_expected(EVENT_OP, ";") < 0)
-			return;
-
-		if (read_expect_type(EVENT_NEWLINE, &token))
-			goto fail;
-	}
- fail:
-	free_token(token);
-}
-
-int parse_header_page(char *buf, unsigned long size)
-{
-	init_input_buf(buf, size);
-
-	parse_header_field("timestamp", &header_page_ts_offset,
-			   &header_page_ts_size, true);
-	parse_header_field("commit", &header_page_size_offset,
-			   &header_page_size_size, true);
-	parse_header_field("overwrite", &header_page_overwrite_offset,
-			   &header_page_overwrite_size, false);
-	parse_header_field("data", &header_page_data_offset,
-			   &header_page_data_size, true);
-
-	return 0;
-}
-
 int parse_ftrace_file(char *buf, unsigned long size)
 int parse_ftrace_file(char *buf, unsigned long size)
 {
 {
 	struct format_field *field;
 	struct format_field *field;

+ 7 - 5
tools/perf/util/trace-event-read.c

@@ -53,6 +53,12 @@ static unsigned long	page_size;
 static ssize_t calc_data_size;
 static ssize_t calc_data_size;
 static bool repipe;
 static bool repipe;
 
 
+/* If it fails, the next read will report it */
+static void skip(int size)
+{
+	lseek(input_fd, size, SEEK_CUR);
+}
+
 static int do_read(int fd, void *buf, int size)
 static int do_read(int fd, void *buf, int size)
 {
 {
 	int rsize = size;
 	int rsize = size;
@@ -184,7 +190,6 @@ static void read_ftrace_printk(void)
 static void read_header_files(void)
 static void read_header_files(void)
 {
 {
 	unsigned long long size;
 	unsigned long long size;
-	char *header_page;
 	char *header_event;
 	char *header_event;
 	char buf[BUFSIZ];
 	char buf[BUFSIZ];
 
 
@@ -194,10 +199,7 @@ static void read_header_files(void)
 		die("did not read header page");
 		die("did not read header page");
 
 
 	size = read8();
 	size = read8();
-	header_page = malloc_or_die(size);
-	read_or_die(header_page, size);
-	parse_header_page(header_page, size);
-	free(header_page);
+	skip(size);
 
 
 	/*
 	/*
 	 * The size field in the page is of type long,
 	 * The size field in the page is of type long,

+ 0 - 1
tools/perf/util/trace-event.h

@@ -244,7 +244,6 @@ extern int header_page_data_size;
 
 
 extern bool latency_format;
 extern bool latency_format;
 
 
-int parse_header_page(char *buf, unsigned long size);
 int trace_parse_common_type(void *data);
 int trace_parse_common_type(void *data);
 int trace_parse_common_pid(void *data);
 int trace_parse_common_pid(void *data);
 int parse_common_pc(void *data);
 int parse_common_pc(void *data);