Просмотр исходного кода

Merge branches 'tracing/branch-tracer', 'tracing/ftrace', 'tracing/function-graph-tracer', 'tracing/markers', 'tracing/powerpc', 'tracing/stack-tracer' and 'tracing/tracepoints' into tracing/core

Ingo Molnar 16 лет назад

+ 6 - 2
Documentation/ftrace.txt

@@ -1251,7 +1251,11 @@ These are the only wild cards which are supported.
 
 
   <match>*<match> will not work.
   <match>*<match> will not work.
 
 
- # echo hrtimer_* > /debug/tracing/set_ftrace_filter
+Note: It is better to use quotes to enclose the wild cards, otherwise
+  the shell may expand the parameters into names of files in the local
+  directory.
+
+ # echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter
 
 
 Produces:
 Produces:
 
 
@@ -1306,7 +1310,7 @@ Again, now we want to append.
  # echo sys_nanosleep > /debug/tracing/set_ftrace_filter
  # echo sys_nanosleep > /debug/tracing/set_ftrace_filter
  # cat /debug/tracing/set_ftrace_filter
  # cat /debug/tracing/set_ftrace_filter
 sys_nanosleep
 sys_nanosleep
- # echo hrtimer_* >> /debug/tracing/set_ftrace_filter
+ # echo 'hrtimer_*' >> /debug/tracing/set_ftrace_filter
  # cat /debug/tracing/set_ftrace_filter
  # cat /debug/tracing/set_ftrace_filter
 hrtimer_run_queues
 hrtimer_run_queues
 hrtimer_run_pending
 hrtimer_run_pending

+ 10 - 5
Documentation/markers.txt

@@ -51,11 +51,16 @@ to call) for the specific marker through marker_probe_register() and can be
 activated by calling marker_arm(). Marker deactivation can be done by calling
 activated by calling marker_arm(). Marker deactivation can be done by calling
 marker_disarm() as many times as marker_arm() has been called. Removing a probe
 marker_disarm() as many times as marker_arm() has been called. Removing a probe
 is done through marker_probe_unregister(); it will disarm the probe.
 is done through marker_probe_unregister(); it will disarm the probe.
-marker_synchronize_unregister() must be called before the end of the module exit
-function to make sure there is no caller left using the probe. This, and the
-fact that preemption is disabled around the probe call, make sure that probe
-removal and module unload are safe. See the "Probe example" section below for a
-sample probe module.
+
+marker_synchronize_unregister() must be called between probe unregistration and
+the first occurrence of
+- the end of module exit function,
+  to make sure there is no caller left using the probe;
+- the free of any resource used by the probes,
+  to make sure the probes wont be accessing invalid data.
+This, and the fact that preemption is disabled around the probe call, make sure
+that probe removal and module unload are safe. See the "Probe example" section
+below for a sample probe module.
 
 
 The marker mechanism supports inserting multiple instances of the same marker.
 The marker mechanism supports inserting multiple instances of the same marker.
 Markers can be put in inline functions, inlined static functions, and
 Markers can be put in inline functions, inlined static functions, and

+ 2 - 2
Documentation/tracepoints.txt

@@ -45,7 +45,7 @@ In include/trace/subsys.h :
 #include <linux/tracepoint.h>
 #include <linux/tracepoint.h>
 
 
 DECLARE_TRACE(subsys_eventname,
 DECLARE_TRACE(subsys_eventname,
-	TPPTOTO(int firstarg, struct task_struct *p),
+	TPPROTO(int firstarg, struct task_struct *p),
 	TPARGS(firstarg, p));
 	TPARGS(firstarg, p));
 
 
 In subsys/file.c (where the tracing statement must be added) :
 In subsys/file.c (where the tracing statement must be added) :
@@ -66,7 +66,7 @@ Where :
     - subsys is the name of your subsystem.
     - subsys is the name of your subsystem.
     - eventname is the name of the event to trace.
     - eventname is the name of the event to trace.
 
 
-- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the
+- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the
   function called by this tracepoint.
   function called by this tracepoint.
 
 
 - TPARGS(firstarg, p) are the parameters names, same as found in the
 - TPARGS(firstarg, p) are the parameters names, same as found in the

+ 1 - 0
arch/powerpc/kernel/Makefile

@@ -17,6 +17,7 @@ ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
 CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
 CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
 CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
 CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
 CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog
 
 
 ifdef CONFIG_DYNAMIC_FTRACE
 ifdef CONFIG_DYNAMIC_FTRACE
 # dynamic ftrace setup.
 # dynamic ftrace setup.

+ 9 - 31
arch/powerpc/kernel/entry_32.S

@@ -1162,39 +1162,17 @@ machine_check_in_rtas:
 #ifdef CONFIG_DYNAMIC_FTRACE
 #ifdef CONFIG_DYNAMIC_FTRACE
 _GLOBAL(mcount)
 _GLOBAL(mcount)
 _GLOBAL(_mcount)
 _GLOBAL(_mcount)
-	stwu	r1,-48(r1)
-	stw	r3, 12(r1)
-	stw	r4, 16(r1)
-	stw	r5, 20(r1)
-	stw	r6, 24(r1)
-	mflr	r3
-	stw	r7, 28(r1)
-	mfcr	r5
-	stw	r8, 32(r1)
-	stw	r9, 36(r1)
-	stw	r10,40(r1)
-	stw	r3, 44(r1)
-	stw	r5, 8(r1)
-	subi	r3, r3, MCOUNT_INSN_SIZE
-	.globl mcount_call
-mcount_call:
-	bl	ftrace_stub
-	nop
-	lwz	r6, 8(r1)
-	lwz	r0, 44(r1)
-	lwz	r3, 12(r1)
+	/*
+	 * It is required that _mcount on PPC32 must preserve the
+	 * link register. But we have r0 to play with. We use r0
+	 * to push the return address back to the caller of mcount
+	 * into the ctr register, restore the link register and
+	 * then jump back using the ctr register.
+	 */
+	mflr	r0
 	mtctr	r0
 	mtctr	r0
-	lwz	r4, 16(r1)
-	mtcr	r6
-	lwz	r5, 20(r1)
-	lwz	r6, 24(r1)
-	lwz	r0, 52(r1)
-	lwz	r7, 28(r1)
-	lwz	r8, 32(r1)
+	lwz	r0, 4(r1)
 	mtlr	r0
 	mtlr	r0
-	lwz	r9, 36(r1)
-	lwz	r10,40(r1)
-	addi	r1, r1, 48
 	bctr
 	bctr
 
 
 _GLOBAL(ftrace_caller)
 _GLOBAL(ftrace_caller)

+ 0 - 12
arch/powerpc/kernel/entry_64.S

@@ -888,18 +888,6 @@ _GLOBAL(enter_prom)
 #ifdef CONFIG_DYNAMIC_FTRACE
 #ifdef CONFIG_DYNAMIC_FTRACE
 _GLOBAL(mcount)
 _GLOBAL(mcount)
 _GLOBAL(_mcount)
 _GLOBAL(_mcount)
-	/* Taken from output of objdump from lib64/glibc */
-	mflr	r3
-	stdu	r1, -112(r1)
-	std	r3, 128(r1)
-	subi	r3, r3, MCOUNT_INSN_SIZE
-	.globl mcount_call
-mcount_call:
-	bl	ftrace_stub
-	nop
-	ld	r0, 128(r1)
-	mtlr	r0
-	addi	r1, r1, 112
 	blr
 	blr
 
 
 _GLOBAL(ftrace_caller)
 _GLOBAL(ftrace_caller)

+ 85 - 97
arch/powerpc/kernel/ftrace.c

@@ -114,19 +114,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
  */
  */
 static int test_24bit_addr(unsigned long ip, unsigned long addr)
 static int test_24bit_addr(unsigned long ip, unsigned long addr)
 {
 {
-	long diff;
 
 
-	/*
-	 * Can we get to addr from ip in 24 bits?
-	 *  (26 really, since we mulitply by 4 for 4 byte alignment)
-	 */
-	diff = addr - ip;
-
-	/*
-	 * Return true if diff is less than 1 << 25
-	 *  and greater than -1 << 26.
-	 */
-	return (diff < (1 << 25)) && (diff > (-1 << 26));
+	/* use the create_branch to verify that this offset can be branched */
+	return create_branch((unsigned int *)ip, addr, 0);
 }
 }
 
 
 static int is_bl_op(unsigned int op)
 static int is_bl_op(unsigned int op)
@@ -134,11 +124,6 @@ static int is_bl_op(unsigned int op)
 	return (op & 0xfc000003) == 0x48000001;
 	return (op & 0xfc000003) == 0x48000001;
 }
 }
 
 
-static int test_offset(unsigned long offset)
-{
-	return (offset + 0x2000000 > 0x3ffffff) || ((offset & 3) != 0);
-}
-
 static unsigned long find_bl_target(unsigned long ip, unsigned int op)
 static unsigned long find_bl_target(unsigned long ip, unsigned int op)
 {
 {
 	static int offset;
 	static int offset;
@@ -151,37 +136,30 @@ static unsigned long find_bl_target(unsigned long ip, unsigned int op)
 	return ip + (long)offset;
 	return ip + (long)offset;
 }
 }
 
 
-static unsigned int branch_offset(unsigned long offset)
-{
-	/* return "bl ip+offset" */
-	return 0x48000001 | (offset & 0x03fffffc);
-}
-
 #ifdef CONFIG_PPC64
 #ifdef CONFIG_PPC64
 static int
 static int
 __ftrace_make_nop(struct module *mod,
 __ftrace_make_nop(struct module *mod,
 		  struct dyn_ftrace *rec, unsigned long addr)
 		  struct dyn_ftrace *rec, unsigned long addr)
 {
 {
-	unsigned char replaced[MCOUNT_INSN_SIZE * 2];
-	unsigned int *op = (unsigned *)&replaced;
-	unsigned char jmp[8];
-	unsigned long *ptr = (unsigned long *)&jmp;
+	unsigned int op;
+	unsigned int jmp[5];
+	unsigned long ptr;
 	unsigned long ip = rec->ip;
 	unsigned long ip = rec->ip;
 	unsigned long tramp;
 	unsigned long tramp;
 	int offset;
 	int offset;
 
 
 	/* read where this goes */
 	/* read where this goes */
-	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+	if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
 		return -EFAULT;
 		return -EFAULT;
 
 
 	/* Make sure that that this is still a 24bit jump */
 	/* Make sure that that this is still a 24bit jump */
-	if (!is_bl_op(*op)) {
-		printk(KERN_ERR "Not expected bl: opcode is %x\n", *op);
+	if (!is_bl_op(op)) {
+		printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
 	/* lets find where the pointer goes */
 	/* lets find where the pointer goes */
-	tramp = find_bl_target(ip, *op);
+	tramp = find_bl_target(ip, op);
 
 
 	/*
 	/*
 	 * On PPC64 the trampoline looks like:
 	 * On PPC64 the trampoline looks like:
@@ -200,19 +178,25 @@ __ftrace_make_nop(struct module *mod,
 	DEBUGP("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
 	DEBUGP("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
 
 
 	/* Find where the trampoline jumps to */
 	/* Find where the trampoline jumps to */
-	if (probe_kernel_read(jmp, (void *)tramp, 8)) {
+	if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
 		printk(KERN_ERR "Failed to read %lx\n", tramp);
 		printk(KERN_ERR "Failed to read %lx\n", tramp);
 		return -EFAULT;
 		return -EFAULT;
 	}
 	}
 
 
-	DEBUGP(" %08x %08x",
-	       (unsigned)(*ptr >> 32),
-	       (unsigned)*ptr);
+	DEBUGP(" %08x %08x", jmp[0], jmp[1]);
+
+	/* verify that this is what we expect it to be */
+	if (((jmp[0] & 0xffff0000) != 0x3d820000) ||
+	    ((jmp[1] & 0xffff0000) != 0x398c0000) ||
+	    (jmp[2] != 0xf8410028) ||
+	    (jmp[3] != 0xe96c0020) ||
+	    (jmp[4] != 0xe84c0028)) {
+		printk(KERN_ERR "Not a trampoline\n");
+		return -EINVAL;
+	}
 
 
-	offset = (unsigned)jmp[2] << 24 |
-		(unsigned)jmp[3] << 16 |
-		(unsigned)jmp[6] << 8 |
-		(unsigned)jmp[7];
+	offset = (unsigned)((unsigned short)jmp[0]) << 16 |
+		(unsigned)((unsigned short)jmp[1]);
 
 
 	DEBUGP(" %x ", offset);
 	DEBUGP(" %x ", offset);
 
 
@@ -225,13 +209,13 @@ __ftrace_make_nop(struct module *mod,
 		return -EFAULT;
 		return -EFAULT;
 	}
 	}
 
 
-	DEBUGP(" %08x %08x\n",
-	       (unsigned)(*ptr >> 32),
-	       (unsigned)*ptr);
+	DEBUGP(" %08x %08x\n", jmp[0], jmp[1]);
+
+	ptr = ((unsigned long)jmp[0] << 32) + jmp[1];
 
 
 	/* This should match what was called */
 	/* This should match what was called */
-	if (*ptr != GET_ADDR(addr)) {
-		printk(KERN_ERR "addr does not match %lx\n", *ptr);
+	if (ptr != GET_ADDR(addr)) {
+		printk(KERN_ERR "addr does not match %lx\n", ptr);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
@@ -240,11 +224,11 @@ __ftrace_make_nop(struct module *mod,
 	 *  0xe8, 0x41, 0x00, 0x28   ld r2,40(r1)
 	 *  0xe8, 0x41, 0x00, 0x28   ld r2,40(r1)
 	 * This needs to be turned to a nop too.
 	 * This needs to be turned to a nop too.
 	 */
 	 */
-	if (probe_kernel_read(replaced, (void *)(ip+4), MCOUNT_INSN_SIZE))
+	if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE))
 		return -EFAULT;
 		return -EFAULT;
 
 
-	if (*op != 0xe8410028) {
-		printk(KERN_ERR "Next line is not ld! (%08x)\n", *op);
+	if (op != 0xe8410028) {
+		printk(KERN_ERR "Next line is not ld! (%08x)\n", op);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
@@ -261,11 +245,14 @@ __ftrace_make_nop(struct module *mod,
 	 *   ld r2,40(r1)
 	 *   ld r2,40(r1)
 	 *  1:
 	 *  1:
 	 */
 	 */
-	op[0] = 0x48000008;	/* b +8 */
+	op = 0x48000008;	/* b +8 */
 
 
-	if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE))
+	if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
 		return -EPERM;
 		return -EPERM;
 
 
+
+	flush_icache_range(ip, ip + 8);
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -274,46 +261,52 @@ static int
 __ftrace_make_nop(struct module *mod,
 __ftrace_make_nop(struct module *mod,
 		  struct dyn_ftrace *rec, unsigned long addr)
 		  struct dyn_ftrace *rec, unsigned long addr)
 {
 {
-	unsigned char replaced[MCOUNT_INSN_SIZE];
-	unsigned int *op = (unsigned *)&replaced;
-	unsigned char jmp[8];
-	unsigned int *ptr = (unsigned int *)&jmp;
+	unsigned int op;
+	unsigned int jmp[4];
 	unsigned long ip = rec->ip;
 	unsigned long ip = rec->ip;
 	unsigned long tramp;
 	unsigned long tramp;
-	int offset;
 
 
-	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+	if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
 		return -EFAULT;
 		return -EFAULT;
 
 
 	/* Make sure that that this is still a 24bit jump */
 	/* Make sure that that this is still a 24bit jump */
-	if (!is_bl_op(*op)) {
-		printk(KERN_ERR "Not expected bl: opcode is %x\n", *op);
+	if (!is_bl_op(op)) {
+		printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
 	/* lets find where the pointer goes */
 	/* lets find where the pointer goes */
-	tramp = find_bl_target(ip, *op);
+	tramp = find_bl_target(ip, op);
 
 
 	/*
 	/*
 	 * On PPC32 the trampoline looks like:
 	 * On PPC32 the trampoline looks like:
-	 * lis r11,sym@ha
-	 * addi r11,r11,sym@l
-	 * mtctr r11
-	 * bctr
+	 *  0x3d, 0x60, 0x00, 0x00  lis r11,sym@ha
+	 *  0x39, 0x6b, 0x00, 0x00  addi r11,r11,sym@l
+	 *  0x7d, 0x69, 0x03, 0xa6  mtctr r11
+	 *  0x4e, 0x80, 0x04, 0x20  bctr
 	 */
 	 */
 
 
 	DEBUGP("ip:%lx jumps to %lx", ip, tramp);
 	DEBUGP("ip:%lx jumps to %lx", ip, tramp);
 
 
 	/* Find where the trampoline jumps to */
 	/* Find where the trampoline jumps to */
-	if (probe_kernel_read(jmp, (void *)tramp, 8)) {
+	if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
 		printk(KERN_ERR "Failed to read %lx\n", tramp);
 		printk(KERN_ERR "Failed to read %lx\n", tramp);
 		return -EFAULT;
 		return -EFAULT;
 	}
 	}
 
 
-	DEBUGP(" %08x %08x ", ptr[0], ptr[1]);
+	DEBUGP(" %08x %08x ", jmp[0], jmp[1]);
+
+	/* verify that this is what we expect it to be */
+	if (((jmp[0] & 0xffff0000) != 0x3d600000) ||
+	    ((jmp[1] & 0xffff0000) != 0x396b0000) ||
+	    (jmp[2] != 0x7d6903a6) ||
+	    (jmp[3] != 0x4e800420)) {
+		printk(KERN_ERR "Not a trampoline\n");
+		return -EINVAL;
+	}
 
 
-	tramp = (ptr[1] & 0xffff) |
-		((ptr[0] & 0xffff) << 16);
+	tramp = (jmp[1] & 0xffff) |
+		((jmp[0] & 0xffff) << 16);
 	if (tramp & 0x8000)
 	if (tramp & 0x8000)
 		tramp -= 0x10000;
 		tramp -= 0x10000;
 
 
@@ -326,11 +319,13 @@ __ftrace_make_nop(struct module *mod,
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	op[0] = PPC_NOP_INSTR;
+	op = PPC_NOP_INSTR;
 
 
-	if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE))
+	if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
 		return -EPERM;
 		return -EPERM;
 
 
+	flush_icache_range(ip, ip + 8);
+
 	return 0;
 	return 0;
 }
 }
 #endif /* PPC64 */
 #endif /* PPC64 */
@@ -384,13 +379,11 @@ int ftrace_make_nop(struct module *mod,
 static int
 static int
 __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
 {
-	unsigned char replaced[MCOUNT_INSN_SIZE * 2];
-	unsigned int *op = (unsigned *)&replaced;
+	unsigned int op[2];
 	unsigned long ip = rec->ip;
 	unsigned long ip = rec->ip;
-	unsigned long offset;
 
 
 	/* read where this goes */
 	/* read where this goes */
-	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE * 2))
+	if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2))
 		return -EFAULT;
 		return -EFAULT;
 
 
 	/*
 	/*
@@ -409,43 +402,40 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	/* now calculate a jump to the ftrace caller trampoline */
-	offset = rec->arch.mod->arch.tramp - ip;
-
-	if (test_offset(offset)) {
-		printk(KERN_ERR "REL24 %li out of range!\n",
-		       (long int)offset);
+	/* create the branch to the trampoline */
+	op[0] = create_branch((unsigned int *)ip,
+			      rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
+	if (!op[0]) {
+		printk(KERN_ERR "REL24 out of range!\n");
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	/* Set to "bl addr" */
-	op[0] = branch_offset(offset);
 	/* ld r2,40(r1) */
 	/* ld r2,40(r1) */
 	op[1] = 0xe8410028;
 	op[1] = 0xe8410028;
 
 
 	DEBUGP("write to %lx\n", rec->ip);
 	DEBUGP("write to %lx\n", rec->ip);
 
 
-	if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE * 2))
+	if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2))
 		return -EPERM;
 		return -EPERM;
 
 
+	flush_icache_range(ip, ip + 8);
+
 	return 0;
 	return 0;
 }
 }
 #else
 #else
 static int
 static int
 __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
 {
-	unsigned char replaced[MCOUNT_INSN_SIZE];
-	unsigned int *op = (unsigned *)&replaced;
+	unsigned int op;
 	unsigned long ip = rec->ip;
 	unsigned long ip = rec->ip;
-	unsigned long offset;
 
 
 	/* read where this goes */
 	/* read where this goes */
-	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+	if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
 		return -EFAULT;
 		return -EFAULT;
 
 
 	/* It should be pointing to a nop */
 	/* It should be pointing to a nop */
-	if (op[0] != PPC_NOP_INSTR) {
-		printk(KERN_ERR "Expected NOP but have %x\n", op[0]);
+	if (op != PPC_NOP_INSTR) {
+		printk(KERN_ERR "Expected NOP but have %x\n", op);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
@@ -455,23 +445,21 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	/* now calculate a jump to the ftrace caller trampoline */
-	offset = rec->arch.mod->arch.tramp - ip;
-
-	if (test_offset(offset)) {
-		printk(KERN_ERR "REL24 %li out of range!\n",
-		       (long int)offset);
+	/* create the branch to the trampoline */
+	op = create_branch((unsigned int *)ip,
+			   rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
+	if (!op) {
+		printk(KERN_ERR "REL24 out of range!\n");
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	/* Set to "bl addr" */
-	op[0] = branch_offset(offset);
-
 	DEBUGP("write to %lx\n", rec->ip);
 	DEBUGP("write to %lx\n", rec->ip);
 
 
-	if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE))
+	if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
 		return -EPERM;
 		return -EPERM;
 
 
+	flush_icache_range(ip, ip + 8);
+
 	return 0;
 	return 0;
 }
 }
 #endif /* CONFIG_PPC64 */
 #endif /* CONFIG_PPC64 */

+ 3 - 0
arch/powerpc/lib/Makefile

@@ -6,6 +6,9 @@ ifeq ($(CONFIG_PPC64),y)
 EXTRA_CFLAGS		+= -mno-minimal-toc
 EXTRA_CFLAGS		+= -mno-minimal-toc
 endif
 endif
 
 
+CFLAGS_REMOVE_code-patching.o = -pg
+CFLAGS_REMOVE_feature-fixups.o = -pg
+
 obj-y			:= string.o alloc.o \
 obj-y			:= string.o alloc.o \
 			   checksum_$(CONFIG_WORD_SIZE).o
 			   checksum_$(CONFIG_WORD_SIZE).o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o crtsavres.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o crtsavres.o

+ 1 - 1
fs/seq_file.c

@@ -387,7 +387,7 @@ char *mangle_path(char *s, char *p, char *esc)
 	}
 	}
 	return NULL;
 	return NULL;
 }
 }
-EXPORT_SYMBOL_GPL(mangle_path);
+EXPORT_SYMBOL(mangle_path);
 
 
 /*
 /*
  * return the absolute path of 'dentry' residing in mount 'mnt'.
  * return the absolute path of 'dentry' residing in mount 'mnt'.

+ 4 - 2
include/linux/marker.h

@@ -211,8 +211,10 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
 
 
 /*
 /*
  * marker_synchronize_unregister must be called between the last marker probe
  * marker_synchronize_unregister must be called between the last marker probe
- * unregistration and the end of module exit to make sure there is no caller
- * executing a probe when it is freed.
+ * unregistration and the first one of
+ * - the end of module exit function
+ * - the free of any resource used by the probes
+ * to ensure the code and data are valid for any possibly running probes.
  */
  */
 #define marker_synchronize_unregister() synchronize_sched()
 #define marker_synchronize_unregister() synchronize_sched()
 
 

+ 10 - 17
kernel/trace/ftrace.c

@@ -786,7 +786,6 @@ enum {
 #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
 #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
 
 
 struct ftrace_iterator {
 struct ftrace_iterator {
-	loff_t			pos;
 	struct ftrace_page	*pg;
 	struct ftrace_page	*pg;
 	unsigned		idx;
 	unsigned		idx;
 	unsigned		flags;
 	unsigned		flags;
@@ -811,6 +810,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 			iter->pg = iter->pg->next;
 			iter->pg = iter->pg->next;
 			iter->idx = 0;
 			iter->idx = 0;
 			goto retry;
 			goto retry;
+		} else {
+			iter->idx = -1;
 		}
 		}
 	} else {
 	} else {
 		rec = &iter->pg->records[iter->idx++];
 		rec = &iter->pg->records[iter->idx++];
@@ -833,8 +834,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 	}
 	}
 	spin_unlock(&ftrace_lock);
 	spin_unlock(&ftrace_lock);
 
 
-	iter->pos = *pos;
-
 	return rec;
 	return rec;
 }
 }
 
 
@@ -842,13 +841,15 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 {
 {
 	struct ftrace_iterator *iter = m->private;
 	struct ftrace_iterator *iter = m->private;
 	void *p = NULL;
 	void *p = NULL;
-	loff_t l = -1;
 
 
-	if (*pos > iter->pos)
-		*pos = iter->pos;
+	if (*pos > 0) {
+		if (iter->idx < 0)
+			return p;
+		(*pos)--;
+		iter->idx--;
+	}
 
 
-	l = *pos;
-	p = t_next(m, p, &l);
+	p = t_next(m, p, pos);
 
 
 	return p;
 	return p;
 }
 }
@@ -859,21 +860,15 @@ static void t_stop(struct seq_file *m, void *p)
 
 
 static int t_show(struct seq_file *m, void *v)
 static int t_show(struct seq_file *m, void *v)
 {
 {
-	struct ftrace_iterator *iter = m->private;
 	struct dyn_ftrace *rec = v;
 	struct dyn_ftrace *rec = v;
 	char str[KSYM_SYMBOL_LEN];
 	char str[KSYM_SYMBOL_LEN];
-	int ret = 0;
 
 
 	if (!rec)
 	if (!rec)
 		return 0;
 		return 0;
 
 
 	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
 	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
 
 
-	ret = seq_printf(m, "%s\n", str);
-	if (ret < 0) {
-		iter->pos--;
-		iter->idx--;
-	}
+	seq_printf(m, "%s\n", str);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -899,7 +894,6 @@ ftrace_avail_open(struct inode *inode, struct file *file)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
 	iter->pg = ftrace_pages_start;
 	iter->pg = ftrace_pages_start;
-	iter->pos = 0;
 
 
 	ret = seq_open(file, &show_ftrace_seq_ops);
 	ret = seq_open(file, &show_ftrace_seq_ops);
 	if (!ret) {
 	if (!ret) {
@@ -986,7 +980,6 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
 
 
 	if (file->f_mode & FMODE_READ) {
 	if (file->f_mode & FMODE_READ) {
 		iter->pg = ftrace_pages_start;
 		iter->pg = ftrace_pages_start;
-		iter->pos = 0;
 		iter->flags = enable ? FTRACE_ITER_FILTER :
 		iter->flags = enable ? FTRACE_ITER_FILTER :
 			FTRACE_ITER_NOTRACE;
 			FTRACE_ITER_NOTRACE;
 
 

+ 2 - 0
kernel/trace/trace.c

@@ -983,6 +983,7 @@ static void ftrace_trace_userstack(struct trace_array *tr,
 		   struct trace_array_cpu *data,
 		   struct trace_array_cpu *data,
 		   unsigned long flags, int pc)
 		   unsigned long flags, int pc)
 {
 {
+#ifdef CONFIG_STACKTRACE
 	struct ring_buffer_event *event;
 	struct ring_buffer_event *event;
 	struct userstack_entry *entry;
 	struct userstack_entry *entry;
 	struct stack_trace trace;
 	struct stack_trace trace;
@@ -1008,6 +1009,7 @@ static void ftrace_trace_userstack(struct trace_array *tr,
 
 
 	save_stack_trace_user(&trace);
 	save_stack_trace_user(&trace);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+#endif
 }
 }
 
 
 void __trace_userstack(struct trace_array *tr,
 void __trace_userstack(struct trace_array *tr,

+ 1 - 0
kernel/trace/trace_branch.c

@@ -6,6 +6,7 @@
 #include <linux/kallsyms.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
+#include <linux/irqflags.h>
 #include <linux/debugfs.h>
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/module.h>

+ 246 - 21
kernel/trace/trace_functions_graph.c

@@ -15,15 +15,24 @@
 
 
 #define TRACE_GRAPH_INDENT	2
 #define TRACE_GRAPH_INDENT	2
 
 
+/* Flag options */
 #define TRACE_GRAPH_PRINT_OVERRUN	0x1
 #define TRACE_GRAPH_PRINT_OVERRUN	0x1
+#define TRACE_GRAPH_PRINT_CPU		0x2
+#define TRACE_GRAPH_PRINT_OVERHEAD	0x4
+
 static struct tracer_opt trace_opts[] = {
 static struct tracer_opt trace_opts[] = {
-	/* Display overruns or not */
-	{ TRACER_OPT(overrun, TRACE_GRAPH_PRINT_OVERRUN) },
+	/* Display overruns ? */
+	{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
+	/* Display CPU ? */
+	{ TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
+	/* Display Overhead ? */
+	{ TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
 	{ } /* Empty entry */
 	{ } /* Empty entry */
 };
 };
 
 
 static struct tracer_flags tracer_flags = {
 static struct tracer_flags tracer_flags = {
-	.val = 0, /* Don't display overruns by default */
+	/* Don't display overruns by default */
+	.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
 	.opts = trace_opts
 	.opts = trace_opts
 };
 };
 
 
@@ -52,37 +61,208 @@ static void graph_trace_reset(struct trace_array *tr)
 	unregister_ftrace_graph();
 	unregister_ftrace_graph();
 }
 }
 
 
+static inline int log10_cpu(int nb)
+{
+	if (nb / 100)
+		return 3;
+	if (nb / 10)
+		return 2;
+	return 1;
+}
+
+static enum print_line_t
+print_graph_cpu(struct trace_seq *s, int cpu)
+{
+	int i;
+	int ret;
+	int log10_this = log10_cpu(cpu);
+	int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
+
+
+	/*
+	 * Start with a space character - to make it stand out
+	 * to the right a bit when trace output is pasted into
+	 * email:
+	 */
+	ret = trace_seq_printf(s, " ");
+
+	/*
+	 * Tricky - we space the CPU field according to the max
+	 * number of online CPUs. On a 2-cpu system it would take
+	 * a maximum of 1 digit - on a 128 cpu system it would
+	 * take up to 3 digits:
+	 */
+	for (i = 0; i < log10_all - log10_this; i++) {
+		ret = trace_seq_printf(s, " ");
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+	ret = trace_seq_printf(s, "%d) ", cpu);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+
 /* If the pid changed since the last trace, output this event */
 /* If the pid changed since the last trace, output this event */
 static int verif_pid(struct trace_seq *s, pid_t pid, int cpu)
 static int verif_pid(struct trace_seq *s, pid_t pid, int cpu)
 {
 {
-	char *comm;
+	char *comm, *prev_comm;
+	pid_t prev_pid;
+	int ret;
 
 
 	if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
 	if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
 		return 1;
 		return 1;
 
 
+	prev_pid = last_pid[cpu];
 	last_pid[cpu] = pid;
 	last_pid[cpu] = pid;
+
 	comm = trace_find_cmdline(pid);
 	comm = trace_find_cmdline(pid);
+	prev_comm = trace_find_cmdline(prev_pid);
 
 
-	return trace_seq_printf(s, "\nCPU[%03d]"
-				    " ------------8<---------- thread %s-%d"
-				    " ------------8<----------\n\n",
-				    cpu, comm, pid);
+/*
+ * Context-switch trace line:
+
+ ------------------------------------------
+ | 1)  migration/0--1  =>  sshd-1755
+ ------------------------------------------
+
+ */
+	ret = trace_seq_printf(s,
+		" ------------------------------------------\n");
+	ret += trace_seq_printf(s, " | %d)  %s-%d  =>  %s-%d\n",
+				  cpu, prev_comm, prev_pid, comm, pid);
+	ret += trace_seq_printf(s,
+		" ------------------------------------------\n\n");
+	return ret;
 }
 }
 
 
+static bool
+trace_branch_is_leaf(struct trace_iterator *iter,
+		struct ftrace_graph_ent_entry *curr)
+{
+	struct ring_buffer_iter *ring_iter;
+	struct ring_buffer_event *event;
+	struct ftrace_graph_ret_entry *next;
+
+	ring_iter = iter->buffer_iter[iter->cpu];
+
+	if (!ring_iter)
+		return false;
+
+	event = ring_buffer_iter_peek(ring_iter, NULL);
+
+	if (!event)
+		return false;
+
+	next = ring_buffer_event_data(event);
+
+	if (next->ent.type != TRACE_GRAPH_RET)
+		return false;
+
+	if (curr->ent.pid != next->ent.pid ||
+			curr->graph_ent.func != next->ret.func)
+		return false;
+
+	return true;
+}
+
+
+static inline int
+print_graph_duration(unsigned long long duration, struct trace_seq *s)
+{
+	unsigned long nsecs_rem = do_div(duration, 1000);
+	return trace_seq_printf(s, "%4llu.%3lu us |  ", duration, nsecs_rem);
+}
+
+/* Signal a overhead of time execution to the output */
+static int
+print_graph_overhead(unsigned long long duration, struct trace_seq *s)
+{
+	/* Duration exceeded 100 msecs */
+	if (duration > 100000ULL)
+		return trace_seq_printf(s, "! ");
+
+	/* Duration exceeded 10 msecs */
+	if (duration > 10000ULL)
+		return trace_seq_printf(s, "+ ");
+
+	return trace_seq_printf(s, "  ");
+}
+
+/* Case of a leaf function on its call entry */
 static enum print_line_t
 static enum print_line_t
-print_graph_entry(struct ftrace_graph_ent *call, struct trace_seq *s,
-		  struct trace_entry *ent, int cpu)
+print_graph_entry_leaf(struct trace_iterator *iter,
+		struct ftrace_graph_ent_entry *entry, struct trace_seq *s)
 {
 {
-	int i;
+	struct ftrace_graph_ret_entry *ret_entry;
+	struct ftrace_graph_ret *graph_ret;
+	struct ring_buffer_event *event;
+	struct ftrace_graph_ent *call;
+	unsigned long long duration;
 	int ret;
 	int ret;
+	int i;
 
 
-	if (!verif_pid(s, ent->pid, cpu))
+	event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+	ret_entry = ring_buffer_event_data(event);
+	graph_ret = &ret_entry->ret;
+	call = &entry->graph_ent;
+	duration = graph_ret->rettime - graph_ret->calltime;
+
+	/* Must not exceed 8 characters: 9999.999 us */
+	if (duration > 10000000ULL)
+		duration = 9999999ULL;
+
+	/* Overhead */
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+		ret = print_graph_overhead(duration, s);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
+	/* Duration */
+	ret = print_graph_duration(duration, s);
+	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 		return TRACE_TYPE_PARTIAL_LINE;
 
 
-	ret = trace_seq_printf(s, "CPU[%03d] ", cpu);
+	/* Function */
+	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
+		ret = trace_seq_printf(s, " ");
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
+	ret = seq_print_ip_sym(s, call->func, 0);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	ret = trace_seq_printf(s, "();\n");
 	if (!ret)
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 		return TRACE_TYPE_PARTIAL_LINE;
 
 
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
+			struct trace_seq *s)
+{
+	int i;
+	int ret;
+	struct ftrace_graph_ent *call = &entry->graph_ent;
+
+	/* No overhead */
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+		ret = trace_seq_printf(s, "  ");
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
+	/* No time */
+	ret = trace_seq_printf(s, "            |  ");
+
+	/* Function */
 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
 		ret = trace_seq_printf(s, " ");
 		ret = trace_seq_printf(s, " ");
 		if (!ret)
 		if (!ret)
@@ -96,37 +276,82 @@ print_graph_entry(struct ftrace_graph_ent *call, struct trace_seq *s,
 	ret = trace_seq_printf(s, "() {\n");
 	ret = trace_seq_printf(s, "() {\n");
 	if (!ret)
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 		return TRACE_TYPE_PARTIAL_LINE;
+
 	return TRACE_TYPE_HANDLED;
 	return TRACE_TYPE_HANDLED;
 }
 }
 
 
+static enum print_line_t
+print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
+			struct trace_iterator *iter, int cpu)
+{
+	int ret;
+	struct trace_entry *ent = iter->ent;
+
+	/* Pid */
+	if (!verif_pid(s, ent->pid, cpu))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	/* Cpu */
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+		ret = print_graph_cpu(s, cpu);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
+	if (trace_branch_is_leaf(iter, field))
+		return print_graph_entry_leaf(iter, field, s);
+	else
+		return print_graph_entry_nested(field, s);
+
+}
+
 static enum print_line_t
 static enum print_line_t
 print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 		   struct trace_entry *ent, int cpu)
 		   struct trace_entry *ent, int cpu)
 {
 {
 	int i;
 	int i;
 	int ret;
 	int ret;
+	unsigned long long duration = trace->rettime - trace->calltime;
+
+	/* Must not exceed 8 characters: xxxx.yyy us */
+	if (duration > 10000000ULL)
+		duration = 9999999ULL;
 
 
+	/* Pid */
 	if (!verif_pid(s, ent->pid, cpu))
 	if (!verif_pid(s, ent->pid, cpu))
 		return TRACE_TYPE_PARTIAL_LINE;
 		return TRACE_TYPE_PARTIAL_LINE;
 
 
-	ret = trace_seq_printf(s, "CPU[%03d] ", cpu);
+	/* Cpu */
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+		ret = print_graph_cpu(s, cpu);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
+	/* Overhead */
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+		ret = print_graph_overhead(duration, s);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
+	/* Duration */
+	ret = print_graph_duration(duration, s);
 	if (!ret)
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 		return TRACE_TYPE_PARTIAL_LINE;
 
 
+	/* Closing brace */
 	for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
 	for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
 		ret = trace_seq_printf(s, " ");
 		ret = trace_seq_printf(s, " ");
 		if (!ret)
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
 	}
 
 
-	ret = trace_seq_printf(s, "} ");
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	ret = trace_seq_printf(s, "%llu\n", trace->rettime - trace->calltime);
+	ret = trace_seq_printf(s, "}\n");
 	if (!ret)
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 		return TRACE_TYPE_PARTIAL_LINE;
 
 
+	/* Overrun */
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
 		ret = trace_seq_printf(s, " (Overruns: %lu)\n",
 		ret = trace_seq_printf(s, " (Overruns: %lu)\n",
 					trace->overrun);
 					trace->overrun);
@@ -146,7 +371,7 @@ print_graph_function(struct trace_iterator *iter)
 	case TRACE_GRAPH_ENT: {
 	case TRACE_GRAPH_ENT: {
 		struct ftrace_graph_ent_entry *field;
 		struct ftrace_graph_ent_entry *field;
 		trace_assign_type(field, entry);
 		trace_assign_type(field, entry);
-		return print_graph_entry(&field->graph_ent, s, entry,
+		return print_graph_entry(field, s, iter,
 					 iter->cpu);
 					 iter->cpu);
 	}
 	}
 	case TRACE_GRAPH_RET: {
 	case TRACE_GRAPH_RET: {
@@ -160,7 +385,7 @@ print_graph_function(struct trace_iterator *iter)
 }
 }
 
 
 static struct tracer graph_trace __read_mostly = {
 static struct tracer graph_trace __read_mostly = {
-	.name	     = "function-graph",
+	.name	     = "function_graph",
 	.init	     = graph_trace_init,
 	.init	     = graph_trace_init,
 	.reset	     = graph_trace_reset,
 	.reset	     = graph_trace_reset,
 	.print_line = print_graph_function,
 	.print_line = print_graph_function,