12 years ago · 4913ae3991
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -76,6 +76,15 @@ config OPTPROBES
 
				 	depends on KPROBES && HAVE_OPTPROBES
			
 
				 	depends on !PREEMPT
			
 
				 
			
 
				+config KPROBES_ON_FTRACE
			
 
				+	def_bool y
			
 
				+	depends on KPROBES && HAVE_KPROBES_ON_FTRACE
			
 
				+	depends on DYNAMIC_FTRACE_WITH_REGS
			
 
				+	help
			
 
				+	 If function tracer is enabled and the arch supports full
			
 
				+	 passing of pt_regs to function tracing, then kprobes can
			
 
				+	 optimize on top of function tracing.
			
 
				+
			
 
				 config UPROBES
			
 
				 	bool "Transparent user-space probes (EXPERIMENTAL)"
			
 
				 	depends on UPROBE_EVENT && PERF_EVENTS
			
@@ -158,6 +167,9 @@ config HAVE_KRETPROBES
 
				 config HAVE_OPTPROBES
			
 
				 	bool
			
 
				 
			
 
				+config HAVE_KPROBES_ON_FTRACE
			
 
				+	bool
			
 
				+
			
 
				 config HAVE_NMI_WATCHDOG
			
 
				 	bool
			
 
				 #
			
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -40,10 +40,12 @@ config X86
 
				 	select HAVE_DMA_CONTIGUOUS if !SWIOTLB
			
 
				 	select HAVE_KRETPROBES
			
 
				 	select HAVE_OPTPROBES
			
 
				+	select HAVE_KPROBES_ON_FTRACE
			
 
				 	select HAVE_FTRACE_MCOUNT_RECORD
			
 
				 	select HAVE_FENTRY if X86_64
			
 
				 	select HAVE_C_RECORDMCOUNT
			
 
				 	select HAVE_DYNAMIC_FTRACE
			
 
				+	select HAVE_DYNAMIC_FTRACE_WITH_REGS
			
 
				 	select HAVE_FUNCTION_TRACER
			
 
				 	select HAVE_FUNCTION_GRAPH_TRACER
			
 
				 	select HAVE_FUNCTION_GRAPH_FP_TEST
			
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -44,7 +44,6 @@
 
				 
			
 
				 #ifdef CONFIG_DYNAMIC_FTRACE
			
 
				 #define ARCH_SUPPORTS_FTRACE_OPS 1
			
 
				-#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
			
 
				 #endif
			
 
				 
			
 
				 #ifndef __ASSEMBLY__
			
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -65,8 +65,7 @@ obj-$(CONFIG_X86_TSC)		+= trace_clock.o
 
				 obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
			
 
				 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
			
 
				 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
			
 
				-obj-$(CONFIG_KPROBES)		+= kprobes.o
			
 
				-obj-$(CONFIG_OPTPROBES)		+= kprobes-opt.o
			
 
				+obj-y				+= kprobes/
			
 
				 obj-$(CONFIG_MODULES)		+= module.o
			
 
				 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
			
 
				 obj-$(CONFIG_KGDB)		+= kgdb.o
			
--- a/arch/x86/kernel/kprobes/Makefile
+++ b/arch/x86/kernel/kprobes/Makefile
@@ -0,0 +1,7 @@
 
				+#
			
 
				+# Makefile for kernel probes
			
 
				+#
			
 
				+
			
 
				+obj-$(CONFIG_KPROBES)		+= core.o
			
 
				+obj-$(CONFIG_OPTPROBES)		+= opt.o
			
 
				+obj-$(CONFIG_KPROBES_ON_FTRACE)	+= ftrace.o
			
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
 
				 	return addr;
			
 
				 }
			
 
				 #endif
			
 
				+
			
 
				+#ifdef CONFIG_KPROBES_ON_FTRACE
			
 
				+extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
			
 
				+			   struct kprobe_ctlblk *kcb);
			
 
				+#else
			
 
				+static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
			
 
				+				  struct kprobe_ctlblk *kcb)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				 #endif
			
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -58,7 +58,7 @@
 
				 #include <asm/insn.h>
			
 
				 #include <asm/debugreg.h>
			
 
				 
			
 
				-#include "kprobes-common.h"
			
 
				+#include "common.h"
			
 
				 
			
 
				 void jprobe_return_end(void);
			
 
				 
			
@@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
				 	 * Groups, and some special opcodes can not boost.
			
 
				 	 * This is non-const and volatile to keep gcc from statically
			
 
				 	 * optimizing it out, as variable_test_bit makes gcc think only
			
 
				-	 * *(unsigned long*) is used. 
			
 
				+	 * *(unsigned long*) is used.
			
 
				 	 */
			
 
				 static volatile u32 twobyte_is_boostable[256 / 32] = {
			
 
				 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
			
@@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
 
				 	struct __arch_relative_insn {
			
 
				 		u8 op;
			
 
				 		s32 raddr;
			
 
				-	} __attribute__((packed)) *insn;
			
 
				+	} __packed *insn;
			
 
				 
			
 
				 	insn = (struct __arch_relative_insn *)from;
			
 
				 	insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
			
@@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-#ifdef KPROBES_CAN_USE_FTRACE
			
 
				-static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
			
 
				-				      struct kprobe_ctlblk *kcb)
			
 
				-{
			
 
				-	/*
			
 
				-	 * Emulate singlestep (and also recover regs->ip)
			
 
				-	 * as if there is a 5byte nop
			
 
				-	 */
			
 
				-	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
			
 
				-	if (unlikely(p->post_handler)) {
			
 
				-		kcb->kprobe_status = KPROBE_HIT_SSDONE;
			
 
				-		p->post_handler(p, regs, 0);
			
 
				-	}
			
 
				-	__this_cpu_write(current_kprobe, NULL);
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 /*
			
 
				  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
			
 
				  * remain disabled throughout this function.
			
@@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 
				 	} else if (kprobe_running()) {
			
 
				 		p = __this_cpu_read(current_kprobe);
			
 
				 		if (p->break_handler && p->break_handler(p, regs)) {
			
 
				-#ifdef KPROBES_CAN_USE_FTRACE
			
 
				-			if (kprobe_ftrace(p)) {
			
 
				-				skip_singlestep(p, regs, kcb);
			
 
				-				return 1;
			
 
				-			}
			
 
				-#endif
			
 
				-			setup_singlestep(p, regs, kcb, 0);
			
 
				+			if (!skip_singlestep(p, regs, kcb))
			
 
				+				setup_singlestep(p, regs, kcb, 0);
			
 
				 			return 1;
			
 
				 		}
			
 
				 	} /* else: not a kprobe fault; let the kernel handle it */
			
@@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-#ifdef KPROBES_CAN_USE_FTRACE
			
 
				-/* Ftrace callback handler for kprobes */
			
 
				-void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
			
 
				-				     struct ftrace_ops *ops, struct pt_regs *regs)
			
 
				-{
			
 
				-	struct kprobe *p;
			
 
				-	struct kprobe_ctlblk *kcb;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	/* Disable irq for emulating a breakpoint and avoiding preempt */
			
 
				-	local_irq_save(flags);
			
 
				-
			
 
				-	p = get_kprobe((kprobe_opcode_t *)ip);
			
 
				-	if (unlikely(!p) || kprobe_disabled(p))
			
 
				-		goto end;
			
 
				-
			
 
				-	kcb = get_kprobe_ctlblk();
			
 
				-	if (kprobe_running()) {
			
 
				-		kprobes_inc_nmissed_count(p);
			
 
				-	} else {
			
 
				-		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
			
 
				-		regs->ip = ip + sizeof(kprobe_opcode_t);
			
 
				-
			
 
				-		__this_cpu_write(current_kprobe, p);
			
 
				-		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
			
 
				-		if (!p->pre_handler || !p->pre_handler(p, regs))
			
 
				-			skip_singlestep(p, regs, kcb);
			
 
				-		/*
			
 
				-		 * If pre_handler returns !0, it sets regs->ip and
			
 
				-		 * resets current kprobe.
			
 
				-		 */
			
 
				-	}
			
 
				-end:
			
 
				-	local_irq_restore(flags);
			
 
				-}
			
 
				-
			
 
				-int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
			
 
				-{
			
 
				-	p->ainsn.insn = NULL;
			
 
				-	p->ainsn.boostable = -1;
			
 
				-	return 0;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 int __init arch_init_kprobes(void)
			
 
				 {
			
 
				 	return arch_init_optprobes();
			
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -0,0 +1,93 @@
 
				+/*
			
 
				+ * Dynamic Ftrace based Kprobes Optimization
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2 of the License, or
			
 
				+ * (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write to the Free Software
			
 
				+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
			
 
				+ *
			
 
				+ * Copyright (C) Hitachi Ltd., 2012
			
 
				+ */
			
 
				+#include <linux/kprobes.h>
			
 
				+#include <linux/ptrace.h>
			
 
				+#include <linux/hardirq.h>
			
 
				+#include <linux/preempt.h>
			
 
				+#include <linux/ftrace.h>
			
 
				+
			
 
				+#include "common.h"
			
 
				+
			
 
				+static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
			
 
				+			     struct kprobe_ctlblk *kcb)
			
 
				+{
			
 
				+	/*
			
 
				+	 * Emulate singlestep (and also recover regs->ip)
			
 
				+	 * as if there is a 5byte nop
			
 
				+	 */
			
 
				+	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
			
 
				+	if (unlikely(p->post_handler)) {
			
 
				+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
			
 
				+		p->post_handler(p, regs, 0);
			
 
				+	}
			
 
				+	__this_cpu_write(current_kprobe, NULL);
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
			
 
				+			      struct kprobe_ctlblk *kcb)
			
 
				+{
			
 
				+	if (kprobe_ftrace(p))
			
 
				+		return __skip_singlestep(p, regs, kcb);
			
 
				+	else
			
 
				+		return 0;
			
 
				+}
			
 
				+
			
 
				+/* Ftrace callback handler for kprobes */
			
 
				+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
			
 
				+				     struct ftrace_ops *ops, struct pt_regs *regs)
			
 
				+{
			
 
				+	struct kprobe *p;
			
 
				+	struct kprobe_ctlblk *kcb;
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	/* Disable irq for emulating a breakpoint and avoiding preempt */
			
 
				+	local_irq_save(flags);
			
 
				+
			
 
				+	p = get_kprobe((kprobe_opcode_t *)ip);
			
 
				+	if (unlikely(!p) || kprobe_disabled(p))
			
 
				+		goto end;
			
 
				+
			
 
				+	kcb = get_kprobe_ctlblk();
			
 
				+	if (kprobe_running()) {
			
 
				+		kprobes_inc_nmissed_count(p);
			
 
				+	} else {
			
 
				+		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
			
 
				+		regs->ip = ip + sizeof(kprobe_opcode_t);
			
 
				+
			
 
				+		__this_cpu_write(current_kprobe, p);
			
 
				+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
			
 
				+		if (!p->pre_handler || !p->pre_handler(p, regs))
			
 
				+			__skip_singlestep(p, regs, kcb);
			
 
				+		/*
			
 
				+		 * If pre_handler returns !0, it sets regs->ip and
			
 
				+		 * resets current kprobe.
			
 
				+		 */
			
 
				+	}
			
 
				+end:
			
 
				+	local_irq_restore(flags);
			
 
				+}
			
 
				+
			
 
				+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
			
 
				+{
			
 
				+	p->ainsn.insn = NULL;
			
 
				+	p->ainsn.boostable = -1;
			
 
				+	return 0;
			
 
				+}
			
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -37,7 +37,7 @@
 
				 #include <asm/insn.h>
			
 
				 #include <asm/debugreg.h>
			
 
				 
			
 
				-#include "kprobes-common.h"
			
 
				+#include "common.h"
			
 
				 
			
 
				 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
			
 
				 {
			
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -74,7 +74,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
 
				  * SAVE_REGS - The ftrace_ops wants regs saved at each function called
			
 
				  *            and passed to the callback. If this flag is set, but the
			
 
				  *            architecture does not support passing regs
			
 
				- *            (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the
			
 
				+ *            (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the
			
 
				  *            ftrace_ops will fail to register, unless the next flag
			
 
				  *            is set.
			
 
				  * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the
			
@@ -418,7 +418,7 @@ void ftrace_modify_all_code(int command);
 
				 #endif
			
 
				 
			
 
				 #ifndef FTRACE_REGS_ADDR
			
 
				-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
			
 
				+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
			
 
				 # define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller)
			
 
				 #else
			
 
				 # define FTRACE_REGS_ADDR FTRACE_ADDR
			
@@ -480,7 +480,7 @@ extern int ftrace_make_nop(struct module *mod,
 
				  */
			
 
				 extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
			
 
				 
			
 
				-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
			
 
				+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
			
 
				 /**
			
 
				  * ftrace_modify_call - convert from one addr to another (no nop)
			
 
				  * @rec: the mcount call site record
			
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -49,7 +49,6 @@ struct trace_entry {
 
				 	unsigned char		flags;
			
 
				 	unsigned char		preempt_count;
			
 
				 	int			pid;
			
 
				-	int			padding;
			
 
				 };
			
 
				 
			
 
				 #define FTRACE_MAX_EVENT						\
			
@@ -272,7 +271,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 
				 extern int trace_add_event_call(struct ftrace_event_call *call);
			
 
				 extern void trace_remove_event_call(struct ftrace_event_call *call);
			
 
				 
			
 
				-#define is_signed_type(type)	(((type)(-1)) < 0)
			
 
				+#define is_signed_type(type)	(((type)(-1)) < (type)0)
			
 
				 
			
 
				 int trace_set_clr_event(const char *system, const char *event, int set);
			
 
				 
			
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -180,10 +180,10 @@ extern void irq_exit(void);
 
				 
			
 
				 #define nmi_enter()						\
			
 
				 	do {							\
			
 
				+		lockdep_off();					\
			
 
				 		ftrace_nmi_enter();				\
			
 
				 		BUG_ON(in_nmi());				\
			
 
				 		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
			
 
				-		lockdep_off();					\
			
 
				 		rcu_nmi_enter();				\
			
 
				 		trace_hardirq_enter();				\
			
 
				 	} while (0)
			
@@ -192,10 +192,10 @@ extern void irq_exit(void);
 
				 	do {							\
			
 
				 		trace_hardirq_exit();				\
			
 
				 		rcu_nmi_exit();					\
			
 
				-		lockdep_on();					\
			
 
				 		BUG_ON(!in_nmi());				\
			
 
				 		sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
			
 
				 		ftrace_nmi_exit();				\
			
 
				+		lockdep_on();					\
			
 
				 	} while (0)
			
 
				 
			
 
				 #endif /* LINUX_HARDIRQ_H */
			
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -49,16 +49,6 @@
 
				 #define KPROBE_REENTER		0x00000004
			
 
				 #define KPROBE_HIT_SSDONE	0x00000008
			
 
				 
			
 
				-/*
			
 
				- * If function tracer is enabled and the arch supports full
			
 
				- * passing of pt_regs to function tracing, then kprobes can
			
 
				- * optimize on top of function tracing.
			
 
				- */
			
 
				-#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \
			
 
				-	&& defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE)
			
 
				-# define KPROBES_CAN_USE_FTRACE
			
 
				-#endif
			
 
				-
			
 
				 /* Attach to insert probes on any functions which should be ignored*/
			
 
				 #define __kprobes	__attribute__((__section__(".kprobes.text")))
			
 
				 
			
@@ -316,7 +306,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
 
				 #endif
			
 
				 
			
 
				 #endif /* CONFIG_OPTPROBES */
			
 
				-#ifdef KPROBES_CAN_USE_FTRACE
			
 
				+#ifdef CONFIG_KPROBES_ON_FTRACE
			
 
				 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
			
 
				 				  struct ftrace_ops *ops, struct pt_regs *regs);
			
 
				 extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
			
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -919,7 +919,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 
				 }
			
 
				 #endif /* CONFIG_OPTPROBES */
			
 
				 
			
 
				-#ifdef KPROBES_CAN_USE_FTRACE
			
 
				+#ifdef CONFIG_KPROBES_ON_FTRACE
			
 
				 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
			
 
				 	.func = kprobe_ftrace_handler,
			
 
				 	.flags = FTRACE_OPS_FL_SAVE_REGS,
			
@@ -964,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
 
				 			   (unsigned long)p->addr, 1, 0);
			
 
				 	WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
			
 
				 }
			
 
				-#else	/* !KPROBES_CAN_USE_FTRACE */
			
 
				+#else	/* !CONFIG_KPROBES_ON_FTRACE */
			
 
				 #define prepare_kprobe(p)	arch_prepare_kprobe(p)
			
 
				 #define arm_kprobe_ftrace(p)	do {} while (0)
			
 
				 #define disarm_kprobe_ftrace(p)	do {} while (0)
			
@@ -1414,12 +1414,12 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
 
				 	 */
			
 
				 	ftrace_addr = ftrace_location((unsigned long)p->addr);
			
 
				 	if (ftrace_addr) {
			
 
				-#ifdef KPROBES_CAN_USE_FTRACE
			
 
				+#ifdef CONFIG_KPROBES_ON_FTRACE
			
 
				 		/* Given address is not on the instruction boundary */
			
 
				 		if ((unsigned long)p->addr != ftrace_addr)
			
 
				 			return -EILSEQ;
			
 
				 		p->flags |= KPROBE_FLAG_FTRACE;
			
 
				-#else	/* !KPROBES_CAN_USE_FTRACE */
			
 
				+#else	/* !CONFIG_KPROBES_ON_FTRACE */
			
 
				 		return -EINVAL;
			
 
				 #endif
			
 
				 	}
			
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -39,6 +39,9 @@ config HAVE_DYNAMIC_FTRACE
 
				 	help
			
 
				 	  See Documentation/trace/ftrace-design.txt
			
 
				 
			
 
				+config HAVE_DYNAMIC_FTRACE_WITH_REGS
			
 
				+	bool
			
 
				+
			
 
				 config HAVE_FTRACE_MCOUNT_RECORD
			
 
				 	bool
			
 
				 	help
			
@@ -434,6 +437,11 @@ config DYNAMIC_FTRACE
 
				 	  were made. If so, it runs stop_machine (stops all CPUS)
			
 
				 	  and modifies the code to jump over the call to ftrace.
			
 
				 
			
 
				+config DYNAMIC_FTRACE_WITH_REGS
			
 
				+	def_bool y
			
 
				+	depends on DYNAMIC_FTRACE
			
 
				+	depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
			
 
				+
			
 
				 config FUNCTION_PROFILER
			
 
				 	bool "Kernel function profiler"
			
 
				 	depends on FUNCTION_TRACER
			
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -147,7 +147,7 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
 
				 		return;
			
 
				 
			
 
				 	local_irq_save(flags);
			
 
				-	buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
			
 
				+	buf = this_cpu_ptr(bt->msg_data);
			
 
				 	va_start(args, fmt);
			
 
				 	n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
			
 
				 	va_end(args);
			
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
 
				 #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
			
 
				 #endif
			
 
				 
			
 
				+/*
			
 
				+ * Traverse the ftrace_global_list, invoking all entries.  The reason that we
			
 
				+ * can use rcu_dereference_raw() is that elements removed from this list
			
 
				+ * are simply leaked, so there is no need to interact with a grace-period
			
 
				+ * mechanism.  The rcu_dereference_raw() calls are needed to handle
			
 
				+ * concurrent insertions into the ftrace_global_list.
			
 
				+ *
			
 
				+ * Silly Alpha and silly pointer-speculation compiler optimizations!
			
 
				+ */
			
 
				+#define do_for_each_ftrace_op(op, list)			\
			
 
				+	op = rcu_dereference_raw(list);			\
			
 
				+	do
			
 
				+
			
 
				+/*
			
 
				+ * Optimized for just a single item in the list (as that is the normal case).
			
 
				+ */
			
 
				+#define while_for_each_ftrace_op(op)				\
			
 
				+	while (likely(op = rcu_dereference_raw((op)->next)) &&	\
			
 
				+	       unlikely((op) != &ftrace_list_end))
			
 
				+
			
 
				 /**
			
 
				  * ftrace_nr_registered_ops - return number of ops registered
			
 
				  *
			
@@ -132,29 +152,21 @@ int ftrace_nr_registered_ops(void)
 
				 	return cnt;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Traverse the ftrace_global_list, invoking all entries.  The reason that we
			
 
				- * can use rcu_dereference_raw() is that elements removed from this list
			
 
				- * are simply leaked, so there is no need to interact with a grace-period
			
 
				- * mechanism.  The rcu_dereference_raw() calls are needed to handle
			
 
				- * concurrent insertions into the ftrace_global_list.
			
 
				- *
			
 
				- * Silly Alpha and silly pointer-speculation compiler optimizations!
			
 
				- */
			
 
				 static void
			
 
				 ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
			
 
				 			struct ftrace_ops *op, struct pt_regs *regs)
			
 
				 {
			
 
				-	if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
			
 
				+	int bit;
			
 
				+
			
 
				+	bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
			
 
				+	if (bit < 0)
			
 
				 		return;
			
 
				 
			
 
				-	trace_recursion_set(TRACE_GLOBAL_BIT);
			
 
				-	op = rcu_dereference_raw(ftrace_global_list); /*see above*/
			
 
				-	while (op != &ftrace_list_end) {
			
 
				+	do_for_each_ftrace_op(op, ftrace_global_list) {
			
 
				 		op->func(ip, parent_ip, op, regs);
			
 
				-		op = rcu_dereference_raw(op->next); /*see above*/
			
 
				-	};
			
 
				-	trace_recursion_clear(TRACE_GLOBAL_BIT);
			
 
				+	} while_for_each_ftrace_op(op);
			
 
				+
			
 
				+	trace_clear_recursion(bit);
			
 
				 }
			
 
				 
			
 
				 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
			
@@ -221,10 +233,24 @@ static void update_global_ops(void)
 
				 	 * registered callers.
			
 
				 	 */
			
 
				 	if (ftrace_global_list == &ftrace_list_end ||
			
 
				-	    ftrace_global_list->next == &ftrace_list_end)
			
 
				+	    ftrace_global_list->next == &ftrace_list_end) {
			
 
				 		func = ftrace_global_list->func;
			
 
				-	else
			
 
				+		/*
			
 
				+		 * As we are calling the function directly.
			
 
				+		 * If it does not have recursion protection,
			
 
				+		 * the function_trace_op needs to be updated
			
 
				+		 * accordingly.
			
 
				+		 */
			
 
				+		if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
			
 
				+			global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
			
 
				+		else
			
 
				+			global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
			
 
				+	} else {
			
 
				 		func = ftrace_global_list_func;
			
 
				+		/* The list has its own recursion protection. */
			
 
				+		global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
			
 
				+	}
			
 
				+
			
 
				 
			
 
				 	/* If we filter on pids, update to use the pid function */
			
 
				 	if (!list_empty(&ftrace_pids)) {
			
@@ -337,7 +363,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 
				 	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
			
 
				+#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
			
 
				 	/*
			
 
				 	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
			
 
				 	 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
			
@@ -4090,14 +4116,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
 
				 	 */
			
 
				 	preempt_disable_notrace();
			
 
				 	trace_recursion_set(TRACE_CONTROL_BIT);
			
 
				-	op = rcu_dereference_raw(ftrace_control_list);
			
 
				-	while (op != &ftrace_list_end) {
			
 
				+	do_for_each_ftrace_op(op, ftrace_control_list) {
			
 
				 		if (!ftrace_function_local_disabled(op) &&
			
 
				 		    ftrace_ops_test(op, ip))
			
 
				 			op->func(ip, parent_ip, op, regs);
			
 
				-
			
 
				-		op = rcu_dereference_raw(op->next);
			
 
				-	};
			
 
				+	} while_for_each_ftrace_op(op);
			
 
				 	trace_recursion_clear(TRACE_CONTROL_BIT);
			
 
				 	preempt_enable_notrace();
			
 
				 }
			
@@ -4112,27 +4135,26 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 
				 		       struct ftrace_ops *ignored, struct pt_regs *regs)
			
 
				 {
			
 
				 	struct ftrace_ops *op;
			
 
				+	int bit;
			
 
				 
			
 
				 	if (function_trace_stop)
			
 
				 		return;
			
 
				 
			
 
				-	if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
			
 
				+	bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
			
 
				+	if (bit < 0)
			
 
				 		return;
			
 
				 
			
 
				-	trace_recursion_set(TRACE_INTERNAL_BIT);
			
 
				 	/*
			
 
				 	 * Some of the ops may be dynamically allocated,
			
 
				 	 * they must be freed after a synchronize_sched().
			
 
				 	 */
			
 
				 	preempt_disable_notrace();
			
 
				-	op = rcu_dereference_raw(ftrace_ops_list);
			
 
				-	while (op != &ftrace_list_end) {
			
 
				+	do_for_each_ftrace_op(op, ftrace_ops_list) {
			
 
				 		if (ftrace_ops_test(op, ip))
			
 
				 			op->func(ip, parent_ip, op, regs);
			
 
				-		op = rcu_dereference_raw(op->next);
			
 
				-	};
			
 
				+	} while_for_each_ftrace_op(op);
			
 
				 	preempt_enable_notrace();
			
 
				-	trace_recursion_clear(TRACE_INTERNAL_BIT);
			
 
				+	trace_clear_recursion(bit);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -4143,8 +4165,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 
				  * Archs are to support both the regs and ftrace_ops at the same time.
			
 
				  * If they support ftrace_ops, it is assumed they support regs.
			
 
				  * If call backs want to use regs, they must either check for regs
			
 
				- * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS.
			
 
				- * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved.
			
 
				+ * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
			
 
				+ * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
			
 
				  * An architecture can pass partial regs with ftrace_ops and still
			
 
				  * set the ARCH_SUPPORT_FTARCE_OPS.
			
 
				  */
			
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3,8 +3,10 @@
 
				  *
			
 
				  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
			
 
				  */
			
 
				+#include <linux/ftrace_event.h>
			
 
				 #include <linux/ring_buffer.h>
			
 
				 #include <linux/trace_clock.h>
			
 
				+#include <linux/trace_seq.h>
			
 
				 #include <linux/spinlock.h>
			
 
				 #include <linux/debugfs.h>
			
 
				 #include <linux/uaccess.h>
			
@@ -21,7 +23,6 @@
 
				 #include <linux/fs.h>
			
 
				 
			
 
				 #include <asm/local.h>
			
 
				-#include "trace.h"
			
 
				 
			
 
				 static void update_pages_handler(struct work_struct *work);
			
 
				 
			
@@ -2432,41 +2433,76 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 
				 
			
 
				 #ifdef CONFIG_TRACING
			
 
				 
			
 
				-#define TRACE_RECURSIVE_DEPTH 16
			
 
				+/*
			
 
				+ * The lock and unlock are done within a preempt disable section.
			
 
				+ * The current_context per_cpu variable can only be modified
			
 
				+ * by the current task between lock and unlock. But it can
			
 
				+ * be modified more than once via an interrupt. To pass this
			
 
				+ * information from the lock to the unlock without having to
			
 
				+ * access the 'in_interrupt()' functions again (which do show
			
 
				+ * a bit of overhead in something as critical as function tracing,
			
 
				+ * we use a bitmask trick.
			
 
				+ *
			
 
				+ *  bit 0 =  NMI context
			
 
				+ *  bit 1 =  IRQ context
			
 
				+ *  bit 2 =  SoftIRQ context
			
 
				+ *  bit 3 =  normal context.
			
 
				+ *
			
 
				+ * This works because this is the order of contexts that can
			
 
				+ * preempt other contexts. A SoftIRQ never preempts an IRQ
			
 
				+ * context.
			
 
				+ *
			
 
				+ * When the context is determined, the corresponding bit is
			
 
				+ * checked and set (if it was set, then a recursion of that context
			
 
				+ * happened).
			
 
				+ *
			
 
				+ * On unlock, we need to clear this bit. To do so, just subtract
			
 
				+ * 1 from the current_context and AND it to itself.
			
 
				+ *
			
 
				+ * (binary)
			
 
				+ *  101 - 1 = 100
			
 
				+ *  101 & 100 = 100 (clearing bit zero)
			
 
				+ *
			
 
				+ *  1010 - 1 = 1001
			
 
				+ *  1010 & 1001 = 1000 (clearing bit 1)
			
 
				+ *
			
 
				+ * The least significant bit can be cleared this way, and it
			
 
				+ * just so happens that it is the same bit corresponding to
			
 
				+ * the current context.
			
 
				+ */
			
 
				+static DEFINE_PER_CPU(unsigned int, current_context);
			
 
				 
			
 
				-/* Keep this code out of the fast path cache */
			
 
				-static noinline void trace_recursive_fail(void)
			
 
				+static __always_inline int trace_recursive_lock(void)
			
 
				 {
			
 
				-	/* Disable all tracing before we do anything else */
			
 
				-	tracing_off_permanent();
			
 
				+	unsigned int val = this_cpu_read(current_context);
			
 
				+	int bit;
			
 
				 
			
 
				-	printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
			
 
				-		    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
			
 
				-		    trace_recursion_buffer(),
			
 
				-		    hardirq_count() >> HARDIRQ_SHIFT,
			
 
				-		    softirq_count() >> SOFTIRQ_SHIFT,
			
 
				-		    in_nmi());
			
 
				-
			
 
				-	WARN_ON_ONCE(1);
			
 
				-}
			
 
				-
			
 
				-static inline int trace_recursive_lock(void)
			
 
				-{
			
 
				-	trace_recursion_inc();
			
 
				+	if (in_interrupt()) {
			
 
				+		if (in_nmi())
			
 
				+			bit = 0;
			
 
				+		else if (in_irq())
			
 
				+			bit = 1;
			
 
				+		else
			
 
				+			bit = 2;
			
 
				+	} else
			
 
				+		bit = 3;
			
 
				 
			
 
				-	if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
			
 
				-		return 0;
			
 
				+	if (unlikely(val & (1 << bit)))
			
 
				+		return 1;
			
 
				 
			
 
				-	trace_recursive_fail();
			
 
				+	val |= (1 << bit);
			
 
				+	this_cpu_write(current_context, val);
			
 
				 
			
 
				-	return -1;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline void trace_recursive_unlock(void)
			
 
				+static __always_inline void trace_recursive_unlock(void)
			
 
				 {
			
 
				-	WARN_ON_ONCE(!trace_recursion_buffer());
			
 
				+	unsigned int val = this_cpu_read(current_context);
			
 
				 
			
 
				-	trace_recursion_dec();
			
 
				+	val--;
			
 
				+	val &= this_cpu_read(current_context);
			
 
				+	this_cpu_write(current_context, val);
			
 
				 }
			
 
				 
			
 
				 #else
			
@@ -3425,7 +3461,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 
				 	/* check for end of page padding */
			
 
				 	if ((iter->head >= rb_page_size(iter->head_page)) &&
			
 
				 	    (iter->head_page != cpu_buffer->commit_page))
			
 
				-		rb_advance_iter(iter);
			
 
				+		rb_inc_iter(iter);
			
 
				 }
			
 
				 
			
 
				 static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
			
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -709,10 +709,14 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
				 		return;
			
 
				 
			
 
				 	WARN_ON_ONCE(!irqs_disabled());
			
 
				-	if (!current_trace->use_max_tr) {
			
 
				-		WARN_ON_ONCE(1);
			
 
				+
			
 
				+	/* If we disabled the tracer, stop now */
			
 
				+	if (current_trace == &nop_trace)
			
 
				 		return;
			
 
				-	}
			
 
				+
			
 
				+	if (WARN_ON_ONCE(!current_trace->use_max_tr))
			
 
				+		return;
			
 
				+
			
 
				 	arch_spin_lock(&ftrace_max_lock);
			
 
				 
			
 
				 	tr->buffer = max_tr.buffer;
			
@@ -922,6 +926,9 @@ void tracing_reset(struct trace_array *tr, int cpu)
 
				 {
			
 
				 	struct ring_buffer *buffer = tr->buffer;
			
 
				 
			
 
				+	if (!buffer)
			
 
				+		return;
			
 
				+
			
 
				 	ring_buffer_record_disable(buffer);
			
 
				 
			
 
				 	/* Make sure all commits have finished */
			
@@ -936,6 +943,9 @@ void tracing_reset_online_cpus(struct trace_array *tr)
 
				 	struct ring_buffer *buffer = tr->buffer;
			
 
				 	int cpu;
			
 
				 
			
 
				+	if (!buffer)
			
 
				+		return;
			
 
				+
			
 
				 	ring_buffer_record_disable(buffer);
			
 
				 
			
 
				 	/* Make sure all commits have finished */
			
@@ -1167,7 +1177,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
				 
			
 
				 	entry->preempt_count		= pc & 0xff;
			
 
				 	entry->pid			= (tsk) ? tsk->pid : 0;
			
 
				-	entry->padding			= 0;
			
 
				 	entry->flags =
			
 
				 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
			
 
				 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
			
@@ -1517,7 +1526,6 @@ static struct trace_buffer_struct *trace_percpu_nmi_buffer;
 
				 static char *get_trace_buf(void)
			
 
				 {
			
 
				 	struct trace_buffer_struct *percpu_buffer;
			
 
				-	struct trace_buffer_struct *buffer;
			
 
				 
			
 
				 	/*
			
 
				 	 * If we have allocated per cpu buffers, then we do not
			
@@ -1535,9 +1543,7 @@ static char *get_trace_buf(void)
 
				 	if (!percpu_buffer)
			
 
				 		return NULL;
			
 
				 
			
 
				-	buffer = per_cpu_ptr(percpu_buffer, smp_processor_id());
			
 
				-
			
 
				-	return buffer->buffer;
			
 
				+	return this_cpu_ptr(&percpu_buffer->buffer[0]);
			
 
				 }
			
 
				 
			
 
				 static int alloc_percpu_trace_buffer(void)
			
@@ -3183,6 +3189,7 @@ static int tracing_set_tracer(const char *buf)
 
				 	static struct trace_option_dentry *topts;
			
 
				 	struct trace_array *tr = &global_trace;
			
 
				 	struct tracer *t;
			
 
				+	bool had_max_tr;
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	mutex_lock(&trace_types_lock);
			
@@ -3209,7 +3216,19 @@ static int tracing_set_tracer(const char *buf)
 
				 	trace_branch_disable();
			
 
				 	if (current_trace && current_trace->reset)
			
 
				 		current_trace->reset(tr);
			
 
				-	if (current_trace && current_trace->use_max_tr) {
			
 
				+
			
 
				+	had_max_tr = current_trace && current_trace->use_max_tr;
			
 
				+	current_trace = &nop_trace;
			
 
				+
			
 
				+	if (had_max_tr && !t->use_max_tr) {
			
 
				+		/*
			
 
				+		 * We need to make sure that the update_max_tr sees that
			
 
				+		 * current_trace changed to nop_trace to keep it from
			
 
				+		 * swapping the buffers after we resize it.
			
 
				+		 * The update_max_tr is called from interrupts disabled
			
 
				+		 * so a synchronized_sched() is sufficient.
			
 
				+		 */
			
 
				+		synchronize_sched();
			
 
				 		/*
			
 
				 		 * We don't free the ring buffer. instead, resize it because
			
 
				 		 * The max_tr ring buffer has some state (e.g. ring->clock) and
			
@@ -3220,10 +3239,8 @@ static int tracing_set_tracer(const char *buf)
 
				 	}
			
 
				 	destroy_trace_option_files(topts);
			
 
				 
			
 
				-	current_trace = &nop_trace;
			
 
				-
			
 
				 	topts = create_trace_option_files(t);
			
 
				-	if (t->use_max_tr) {
			
 
				+	if (t->use_max_tr && !had_max_tr) {
			
 
				 		/* we need to make per cpu buffer sizes equivalent */
			
 
				 		ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
			
 
				 						   RING_BUFFER_ALL_CPUS);
			
@@ -4037,8 +4054,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
 
				 	 * Reset the buffer so that it doesn't have incomparable timestamps.
			
 
				 	 */
			
 
				 	tracing_reset_online_cpus(&global_trace);
			
 
				-	if (max_tr.buffer)
			
 
				-		tracing_reset_online_cpus(&max_tr);
			
 
				+	tracing_reset_online_cpus(&max_tr);
			
 
				 
			
 
				 	mutex_unlock(&trace_types_lock);
			
 
				 
			
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -291,16 +291,57 @@ struct tracer {
 
				 
			
 
				 
			
 
				 /* Only current can touch trace_recursion */
			
 
				-#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
			
 
				-#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
			
 
				 
			
 
				-/* Ring buffer has the 10 LSB bits to count */
			
 
				-#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
			
 
				-
			
 
				-/* for function tracing recursion */
			
 
				-#define TRACE_INTERNAL_BIT		(1<<11)
			
 
				-#define TRACE_GLOBAL_BIT		(1<<12)
			
 
				-#define TRACE_CONTROL_BIT		(1<<13)
			
 
				+/*
			
 
				+ * For function tracing recursion:
			
 
				+ *  The order of these bits are important.
			
 
				+ *
			
 
				+ *  When function tracing occurs, the following steps are made:
			
 
				+ *   If arch does not support a ftrace feature:
			
 
				+ *    call internal function (uses INTERNAL bits) which calls...
			
 
				+ *   If callback is registered to the "global" list, the list
			
 
				+ *    function is called and recursion checks the GLOBAL bits.
			
 
				+ *    then this function calls...
			
 
				+ *   The function callback, which can use the FTRACE bits to
			
 
				+ *    check for recursion.
			
 
				+ *
			
 
				+ * Now if the arch does not suppport a feature, and it calls
			
 
				+ * the global list function which calls the ftrace callback
			
 
				+ * all three of these steps will do a recursion protection.
			
 
				+ * There's no reason to do one if the previous caller already
			
 
				+ * did. The recursion that we are protecting against will
			
 
				+ * go through the same steps again.
			
 
				+ *
			
 
				+ * To prevent the multiple recursion checks, if a recursion
			
 
				+ * bit is set that is higher than the MAX bit of the current
			
 
				+ * check, then we know that the check was made by the previous
			
 
				+ * caller, and we can skip the current check.
			
 
				+ */
			
 
				+enum {
			
 
				+	TRACE_BUFFER_BIT,
			
 
				+	TRACE_BUFFER_NMI_BIT,
			
 
				+	TRACE_BUFFER_IRQ_BIT,
			
 
				+	TRACE_BUFFER_SIRQ_BIT,
			
 
				+
			
 
				+	/* Start of function recursion bits */
			
 
				+	TRACE_FTRACE_BIT,
			
 
				+	TRACE_FTRACE_NMI_BIT,
			
 
				+	TRACE_FTRACE_IRQ_BIT,
			
 
				+	TRACE_FTRACE_SIRQ_BIT,
			
 
				+
			
 
				+	/* GLOBAL_BITs must be greater than FTRACE_BITs */
			
 
				+	TRACE_GLOBAL_BIT,
			
 
				+	TRACE_GLOBAL_NMI_BIT,
			
 
				+	TRACE_GLOBAL_IRQ_BIT,
			
 
				+	TRACE_GLOBAL_SIRQ_BIT,
			
 
				+
			
 
				+	/* INTERNAL_BITs must be greater than GLOBAL_BITs */
			
 
				+	TRACE_INTERNAL_BIT,
			
 
				+	TRACE_INTERNAL_NMI_BIT,
			
 
				+	TRACE_INTERNAL_IRQ_BIT,
			
 
				+	TRACE_INTERNAL_SIRQ_BIT,
			
 
				+
			
 
				+	TRACE_CONTROL_BIT,
			
 
				 
			
 
				 /*
			
 
				  * Abuse of the trace_recursion.
			
@@ -309,11 +350,77 @@ struct tracer {
 
				  * was called in irq context but we have irq tracing off. Since this
			
 
				  * can only be modified by current, we can reuse trace_recursion.
			
 
				  */
			
 
				-#define TRACE_IRQ_BIT			(1<<13)
			
 
				+	TRACE_IRQ_BIT,
			
 
				+};
			
 
				+
			
 
				+#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (1<<(bit)); } while (0)
			
 
				+#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(1<<(bit)); } while (0)
			
 
				+#define trace_recursion_test(bit)	((current)->trace_recursion & (1<<(bit)))
			
 
				+
			
 
				+#define TRACE_CONTEXT_BITS	4
			
 
				+
			
 
				+#define TRACE_FTRACE_START	TRACE_FTRACE_BIT
			
 
				+#define TRACE_FTRACE_MAX	((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
			
 
				+
			
 
				+#define TRACE_GLOBAL_START	TRACE_GLOBAL_BIT
			
 
				+#define TRACE_GLOBAL_MAX	((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
			
 
				+
			
 
				+#define TRACE_LIST_START	TRACE_INTERNAL_BIT
			
 
				+#define TRACE_LIST_MAX		((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
			
 
				+
			
 
				+#define TRACE_CONTEXT_MASK	TRACE_LIST_MAX
			
 
				+
			
 
				+static __always_inline int trace_get_context_bit(void)
			
 
				+{
			
 
				+	int bit;
			
 
				 
			
 
				-#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (bit); } while (0)
			
 
				-#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(bit); } while (0)
			
 
				-#define trace_recursion_test(bit)	((current)->trace_recursion & (bit))
			
 
				+	if (in_interrupt()) {
			
 
				+		if (in_nmi())
			
 
				+			bit = 0;
			
 
				+
			
 
				+		else if (in_irq())
			
 
				+			bit = 1;
			
 
				+		else
			
 
				+			bit = 2;
			
 
				+	} else
			
 
				+		bit = 3;
			
 
				+
			
 
				+	return bit;
			
 
				+}
			
 
				+
			
 
				+static __always_inline int trace_test_and_set_recursion(int start, int max)
			
 
				+{
			
 
				+	unsigned int val = current->trace_recursion;
			
 
				+	int bit;
			
 
				+
			
 
				+	/* A previous recursion check was made */
			
 
				+	if ((val & TRACE_CONTEXT_MASK) > max)
			
 
				+		return 0;
			
 
				+
			
 
				+	bit = trace_get_context_bit() + start;
			
 
				+	if (unlikely(val & (1 << bit)))
			
 
				+		return -1;
			
 
				+
			
 
				+	val |= 1 << bit;
			
 
				+	current->trace_recursion = val;
			
 
				+	barrier();
			
 
				+
			
 
				+	return bit;
			
 
				+}
			
 
				+
			
 
				+static __always_inline void trace_clear_recursion(int bit)
			
 
				+{
			
 
				+	unsigned int val = current->trace_recursion;
			
 
				+
			
 
				+	if (!bit)
			
 
				+		return;
			
 
				+
			
 
				+	bit = 1 << bit;
			
 
				+	val &= ~bit;
			
 
				+
			
 
				+	barrier();
			
 
				+	current->trace_recursion = val;
			
 
				+}
			
 
				 
			
 
				 #define TRACE_PIPE_ALL_CPU	-1
			
 
				 
			
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -21,8 +21,6 @@
 
				 #include <linux/ktime.h>
			
 
				 #include <linux/trace_clock.h>
			
 
				 
			
 
				-#include "trace.h"
			
 
				-
			
 
				 /*
			
 
				  * trace_clock_local(): the simplest and least coherent tracing clock.
			
 
				  *
			
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
 
				 	__common_field(unsigned char, flags);
			
 
				 	__common_field(unsigned char, preempt_count);
			
 
				 	__common_field(int, pid);
			
 
				-	__common_field(int, padding);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -47,34 +47,6 @@ static void function_trace_start(struct trace_array *tr)
 
				 	tracing_reset_online_cpus(tr);
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				-function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip,
			
 
				-				 struct ftrace_ops *op, struct pt_regs *pt_regs)
			
 
				-{
			
 
				-	struct trace_array *tr = func_trace;
			
 
				-	struct trace_array_cpu *data;
			
 
				-	unsigned long flags;
			
 
				-	long disabled;
			
 
				-	int cpu;
			
 
				-	int pc;
			
 
				-
			
 
				-	if (unlikely(!ftrace_function_enabled))
			
 
				-		return;
			
 
				-
			
 
				-	pc = preempt_count();
			
 
				-	preempt_disable_notrace();
			
 
				-	local_save_flags(flags);
			
 
				-	cpu = raw_smp_processor_id();
			
 
				-	data = tr->data[cpu];
			
 
				-	disabled = atomic_inc_return(&data->disabled);
			
 
				-
			
 
				-	if (likely(disabled == 1))
			
 
				-		trace_function(tr, ip, parent_ip, flags, pc);
			
 
				-
			
 
				-	atomic_dec(&data->disabled);
			
 
				-	preempt_enable_notrace();
			
 
				-}
			
 
				-
			
 
				 /* Our option */
			
 
				 enum {
			
 
				 	TRACE_FUNC_OPT_STACK	= 0x1,
			
@@ -85,34 +57,34 @@ static struct tracer_flags func_flags;
 
				 static void
			
 
				 function_trace_call(unsigned long ip, unsigned long parent_ip,
			
 
				 		    struct ftrace_ops *op, struct pt_regs *pt_regs)
			
 
				-
			
 
				 {
			
 
				 	struct trace_array *tr = func_trace;
			
 
				 	struct trace_array_cpu *data;
			
 
				 	unsigned long flags;
			
 
				-	long disabled;
			
 
				+	unsigned int bit;
			
 
				 	int cpu;
			
 
				 	int pc;
			
 
				 
			
 
				 	if (unlikely(!ftrace_function_enabled))
			
 
				 		return;
			
 
				 
			
 
				-	/*
			
 
				-	 * Need to use raw, since this must be called before the
			
 
				-	 * recursive protection is performed.
			
 
				-	 */
			
 
				-	local_irq_save(flags);
			
 
				-	cpu = raw_smp_processor_id();
			
 
				-	data = tr->data[cpu];
			
 
				-	disabled = atomic_inc_return(&data->disabled);
			
 
				+	pc = preempt_count();
			
 
				+	preempt_disable_notrace();
			
 
				 
			
 
				-	if (likely(disabled == 1)) {
			
 
				-		pc = preempt_count();
			
 
				+	bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
			
 
				+	if (bit < 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	cpu = smp_processor_id();
			
 
				+	data = tr->data[cpu];
			
 
				+	if (!atomic_read(&data->disabled)) {
			
 
				+		local_save_flags(flags);
			
 
				 		trace_function(tr, ip, parent_ip, flags, pc);
			
 
				 	}
			
 
				+	trace_clear_recursion(bit);
			
 
				 
			
 
				-	atomic_dec(&data->disabled);
			
 
				-	local_irq_restore(flags);
			
 
				+ out:
			
 
				+	preempt_enable_notrace();
			
 
				 }
			
 
				 
			
 
				 static void
			
@@ -185,11 +157,6 @@ static void tracing_start_function_trace(void)
 
				 {
			
 
				 	ftrace_function_enabled = 0;
			
 
				 
			
 
				-	if (trace_flags & TRACE_ITER_PREEMPTONLY)
			
 
				-		trace_ops.func = function_trace_call_preempt_only;
			
 
				-	else
			
 
				-		trace_ops.func = function_trace_call;
			
 
				-
			
 
				 	if (func_flags.val & TRACE_FUNC_OPT_STACK)
			
 
				 		register_ftrace_function(&trace_stack_ops);
			
 
				 	else
			
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -47,6 +47,8 @@ struct fgraph_data {
 
				 #define TRACE_GRAPH_PRINT_ABS_TIME	0x20
			
 
				 #define TRACE_GRAPH_PRINT_IRQS		0x40
			
 
				 
			
 
				+static unsigned int max_depth;
			
 
				+
			
 
				 static struct tracer_opt trace_opts[] = {
			
 
				 	/* Display overruns? (for self-debug purpose) */
			
 
				 	{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
			
@@ -250,8 +252,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 
				 		return 0;
			
 
				 
			
 
				 	/* trace it when it is-nested-in or is a function enabled. */
			
 
				-	if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
			
 
				-	      ftrace_graph_ignore_irqs())
			
 
				+	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
			
 
				+	     ftrace_graph_ignore_irqs()) ||
			
 
				+	    (max_depth && trace->depth >= max_depth))
			
 
				 		return 0;
			
 
				 
			
 
				 	local_irq_save(flags);
			
@@ -1457,6 +1460,59 @@ static struct tracer graph_trace __read_mostly = {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				+
			
 
				+static ssize_t
			
 
				+graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt,
			
 
				+		  loff_t *ppos)
			
 
				+{
			
 
				+	unsigned long val;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	max_depth = val;
			
 
				+
			
 
				+	*ppos += cnt;
			
 
				+
			
 
				+	return cnt;
			
 
				+}
			
 
				+
			
 
				+static ssize_t
			
 
				+graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt,
			
 
				+		 loff_t *ppos)
			
 
				+{
			
 
				+	char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/
			
 
				+	int n;
			
 
				+
			
 
				+	n = sprintf(buf, "%d\n", max_depth);
			
 
				+
			
 
				+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations graph_depth_fops = {
			
 
				+	.open		= tracing_open_generic,
			
 
				+	.write		= graph_depth_write,
			
 
				+	.read		= graph_depth_read,
			
 
				+	.llseek		= generic_file_llseek,
			
 
				+};
			
 
				+
			
 
				+static __init int init_graph_debugfs(void)
			
 
				+{
			
 
				+	struct dentry *d_tracer;
			
 
				+
			
 
				+	d_tracer = tracing_init_dentry();
			
 
				+	if (!d_tracer)
			
 
				+		return 0;
			
 
				+
			
 
				+	trace_create_file("max_graph_depth", 0644, d_tracer,
			
 
				+			  NULL, &graph_depth_fops);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+fs_initcall(init_graph_debugfs);
			
 
				+
			
 
				 static __init int init_graph_trace(void)
			
 
				 {
			
 
				 	max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
			
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -415,7 +415,8 @@ static void trace_selftest_test_recursion_func(unsigned long ip,
 
				 	 * The ftrace infrastructure should provide the recursion
			
 
				 	 * protection. If not, this will crash the kernel!
			
 
				 	 */
			
 
				-	trace_selftest_recursion_cnt++;
			
 
				+	if (trace_selftest_recursion_cnt++ > 10)
			
 
				+		return;
			
 
				 	DYN_FTRACE_TEST_NAME();
			
 
				 }
			
 
				 
			
@@ -452,7 +453,6 @@ trace_selftest_function_recursion(void)
 
				 	char *func_name;
			
 
				 	int len;
			
 
				 	int ret;
			
 
				-	int cnt;
			
 
				 
			
 
				 	/* The previous test PASSED */
			
 
				 	pr_cont("PASSED\n");
			
@@ -510,19 +510,10 @@ trace_selftest_function_recursion(void)
 
				 
			
 
				 	unregister_ftrace_function(&test_recsafe_probe);
			
 
				 
			
 
				-	/*
			
 
				-	 * If arch supports all ftrace features, and no other task
			
 
				-	 * was on the list, we should be fine.
			
 
				-	 */
			
 
				-	if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC)
			
 
				-		cnt = 2; /* Should have recursed */
			
 
				-	else
			
 
				-		cnt = 1;
			
 
				-
			
 
				 	ret = -1;
			
 
				-	if (trace_selftest_recursion_cnt != cnt) {
			
 
				-		pr_cont("*callback not called expected %d times (%d)* ",
			
 
				-			cnt, trace_selftest_recursion_cnt);
			
 
				+	if (trace_selftest_recursion_cnt != 2) {
			
 
				+		pr_cont("*callback not called expected 2 times (%d)* ",
			
 
				+			trace_selftest_recursion_cnt);
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
@@ -568,7 +559,7 @@ trace_selftest_function_regs(void)
 
				 	int ret;
			
 
				 	int supported = 0;
			
 
				 
			
 
				-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
			
 
				+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
			
 
				 	supported = 1;
			
 
				 #endif
			
 
				 
			
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -77,7 +77,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
 
				 	return syscalls_metadata[nr];
			
 
				 }
			
 
				 
			
 
				-enum print_line_t
			
 
				+static enum print_line_t
			
 
				 print_syscall_enter(struct trace_iterator *iter, int flags,
			
 
				 		    struct trace_event *event)
			
 
				 {
			
@@ -130,7 +130,7 @@ end:
 
				 	return TRACE_TYPE_HANDLED;
			
 
				 }
			
 
				 
			
 
				-enum print_line_t
			
 
				+static enum print_line_t
			
 
				 print_syscall_exit(struct trace_iterator *iter, int flags,
			
 
				 		   struct trace_event *event)
			
 
				 {
			
@@ -270,7 +270,7 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
			
 
				+static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
			
 
				 {
			
 
				 	struct syscall_trace_enter *entry;
			
 
				 	struct syscall_metadata *sys_data;
			
@@ -305,7 +305,7 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 
				 		trace_current_buffer_unlock_commit(buffer, event, 0, 0);
			
 
				 }
			
 
				 
			
 
				-void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
			
 
				+static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
			
 
				 {
			
 
				 	struct syscall_trace_exit *entry;
			
 
				 	struct syscall_metadata *sys_data;
			
@@ -337,7 +337,7 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 
				 		trace_current_buffer_unlock_commit(buffer, event, 0, 0);
			
 
				 }
			
 
				 
			
 
				-int reg_event_syscall_enter(struct ftrace_event_call *call)
			
 
				+static int reg_event_syscall_enter(struct ftrace_event_call *call)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 	int num;
			
@@ -356,7 +356,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void unreg_event_syscall_enter(struct ftrace_event_call *call)
			
 
				+static void unreg_event_syscall_enter(struct ftrace_event_call *call)
			
 
				 {
			
 
				 	int num;
			
 
				 
			
@@ -371,7 +371,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
 
				 	mutex_unlock(&syscall_trace_lock);
			
 
				 }
			
 
				 
			
 
				-int reg_event_syscall_exit(struct ftrace_event_call *call)
			
 
				+static int reg_event_syscall_exit(struct ftrace_event_call *call)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 	int num;
			
@@ -390,7 +390,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void unreg_event_syscall_exit(struct ftrace_event_call *call)
			
 
				+static void unreg_event_syscall_exit(struct ftrace_event_call *call)
			
 
				 {
			
 
				 	int num;
			
 
				 
			
@@ -459,7 +459,7 @@ unsigned long __init __weak arch_syscall_addr(int nr)
 
				 	return (unsigned long)sys_call_table[nr];
			
 
				 }
			
 
				 
			
 
				-int __init init_ftrace_syscalls(void)
			
 
				+static int __init init_ftrace_syscalls(void)
			
 
				 {
			
 
				 	struct syscall_metadata *meta;
			
 
				 	unsigned long addr;
			
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -258,6 +258,10 @@ static int create_trace_uprobe(int argc, char **argv)
 
				 		goto fail_address_parse;
			
 
				 
			
 
				 	inode = igrab(path.dentry->d_inode);
			
 
				+	if (!S_ISREG(inode->i_mode)) {
			
 
				+		ret = -EINVAL;
			
 
				+		goto fail_address_parse;
			
 
				+	}
			
 
				 
			
 
				 	argc -= 2;
			
 
				 	argv += 2;
			
@@ -356,7 +360,7 @@ fail_address_parse:
 
				 	if (inode)
			
 
				 		iput(inode);
			
 
				 
			
 
				-	pr_info("Failed to parse address.\n");
			
 
				+	pr_info("Failed to parse address or file.\n");
			
 
				 
			
 
				 	return ret;
			
 
				 }