فهرست منبع

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits)
  perf tools: Fix compile error on x86_64 Ubuntu
  perf report: Fix --stdio output alignment when --showcpuutilization used
  perf annotate: Get rid of field_sep check
  perf annotate: Fix usage string
  perf kmem: Fix a memory leak
  perf kmem: Add missing closedir() calls
  perf top: Add error message for EMFILE
  perf test: Change type of '-v' option to INCR
  perf script: Add missing closedir() calls
  tracing: Fix compile error when static ftrace is enabled
  recordmcount: Fix handling of elf64 big-endian objects.
  perf tools: Add const.h to MANIFEST to make perf-tar-src-pkg work again
  perf tools: Add support for guest/host-only profiling
  perf kvm: Do guest-only counting by default
  perf top: Don't update total_period on process_sample
  perf hists: Stop using 'self' for struct hist_entry
  perf hists: Rename total_session to total_period
  x86: Add counter when debug stack is used with interrupts enabled
  x86: Allow NMIs to hit breakpoints in i386
  x86: Keep current stack in NMI breakpoints
  ...
Linus Torvalds 13 سال پیش
والد
کامیت
83c2f912b4

+ 8 - 0
Documentation/kernel-parameters.txt

@@ -2475,6 +2475,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	stacktrace	[FTRACE]
 	stacktrace	[FTRACE]
 			Enabled the stack tracer on boot up.
 			Enabled the stack tracer on boot up.
 
 
+	stacktrace_filter=[function-list]
+			[FTRACE] Limit the functions that the stack tracer
+			will trace at boot up. function-list is a comma separated
+			list of functions. This list can be changed at run
+			time by the stack_trace_filter file in the debugfs
+			tracing directory. Note, this enables stack tracing
+			and the stacktrace above is not needed.
+
 	sti=		[PARISC,HW]
 	sti=		[PARISC,HW]
 			Format: <num>
 			Format: <num>
 			Set the STI (builtin display/keyboard on the HP-PARISC
 			Set the STI (builtin display/keyboard on the HP-PARISC

+ 22 - 0
arch/x86/include/asm/debugreg.h

@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump);
 
 
 extern void hw_breakpoint_restore(void);
 extern void hw_breakpoint_restore(void);
 
 
+#ifdef CONFIG_X86_64
+DECLARE_PER_CPU(int, debug_stack_usage);
+static inline void debug_stack_usage_inc(void)
+{
+	__get_cpu_var(debug_stack_usage)++;
+}
+static inline void debug_stack_usage_dec(void)
+{
+	__get_cpu_var(debug_stack_usage)--;
+}
+int is_debug_stack(unsigned long addr);
+void debug_stack_set_zero(void);
+void debug_stack_reset(void);
+#else /* !X86_64 */
+static inline int is_debug_stack(unsigned long addr) { return 0; }
+static inline void debug_stack_set_zero(void) { }
+static inline void debug_stack_reset(void) { }
+static inline void debug_stack_usage_inc(void) { }
+static inline void debug_stack_usage_dec(void) { }
+#endif /* X86_64 */
+
+
 #endif	/* __KERNEL__ */
 #endif	/* __KERNEL__ */
 
 
 #endif /* _ASM_X86_DEBUGREG_H */
 #endif /* _ASM_X86_DEBUGREG_H */

+ 12 - 0
arch/x86/include/asm/desc.h

@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
 
 
 extern struct desc_ptr idt_descr;
 extern struct desc_ptr idt_descr;
 extern gate_desc idt_table[];
 extern gate_desc idt_table[];
+extern struct desc_ptr nmi_idt_descr;
+extern gate_desc nmi_idt_table[];
 
 
 struct gdt_page {
 struct gdt_page {
 	struct desc_struct gdt[GDT_ENTRIES];
 	struct desc_struct gdt[GDT_ENTRIES];
@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
 	desc->limit = (limit >> 16) & 0xf;
 	desc->limit = (limit >> 16) & 0xf;
 }
 }
 
 
+#ifdef CONFIG_X86_64
+static inline void set_nmi_gate(int gate, void *addr)
+{
+	gate_desc s;
+
+	pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
+	write_idt_entry(nmi_idt_table, gate, &s);
+}
+#endif
+
 static inline void _set_gate(int gate, unsigned type, void *addr,
 static inline void _set_gate(int gate, unsigned type, void *addr,
 			     unsigned dpl, unsigned ist, unsigned seg)
 			     unsigned dpl, unsigned ist, unsigned seg)
 {
 {

+ 24 - 0
arch/x86/kernel/cpu/common.c

@@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid);
 
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
+struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
+				    (unsigned long) nmi_idt_table };
 
 
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
 		     irq_stack_union) __aligned(PAGE_SIZE);
 		     irq_stack_union) __aligned(PAGE_SIZE);
@@ -1085,6 +1087,26 @@ unsigned long kernel_eflags;
  */
  */
 DEFINE_PER_CPU(struct orig_ist, orig_ist);
 DEFINE_PER_CPU(struct orig_ist, orig_ist);
 
 
+static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
+DEFINE_PER_CPU(int, debug_stack_usage);
+
+int is_debug_stack(unsigned long addr)
+{
+	return __get_cpu_var(debug_stack_usage) ||
+		(addr <= __get_cpu_var(debug_stack_addr) &&
+		 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
+}
+
+void debug_stack_set_zero(void)
+{
+	load_idt((const struct desc_ptr *)&nmi_idt_descr);
+}
+
+void debug_stack_reset(void)
+{
+	load_idt((const struct desc_ptr *)&idt_descr);
+}
+
 #else	/* CONFIG_X86_64 */
 #else	/* CONFIG_X86_64 */
 
 
 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
@@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void)
 			estacks += exception_stack_sizes[v];
 			estacks += exception_stack_sizes[v];
 			oist->ist[v] = t->x86_tss.ist[v] =
 			oist->ist[v] = t->x86_tss.ist[v] =
 					(unsigned long)estacks;
 					(unsigned long)estacks;
+			if (v == DEBUG_STACK-1)
+				per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
 		}
 		}
 	}
 	}
 
 

+ 185 - 33
arch/x86/kernel/entry_64.S

@@ -1480,62 +1480,214 @@ ENTRY(error_exit)
 	CFI_ENDPROC
 	CFI_ENDPROC
 END(error_exit)
 END(error_exit)
 
 
+/*
+ * Test if a given stack is an NMI stack or not.
+ */
+	.macro test_in_nmi reg stack nmi_ret normal_ret
+	cmpq %\reg, \stack
+	ja \normal_ret
+	subq $EXCEPTION_STKSZ, %\reg
+	cmpq %\reg, \stack
+	jb \normal_ret
+	jmp \nmi_ret
+	.endm
 
 
 	/* runs on exception stack */
 	/* runs on exception stack */
 ENTRY(nmi)
 ENTRY(nmi)
 	INTR_FRAME
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
-	pushq_cfi $-1
+	/*
+	 * We allow breakpoints in NMIs. If a breakpoint occurs, then
+	 * the iretq it performs will take us out of NMI context.
+	 * This means that we can have nested NMIs where the next
+	 * NMI is using the top of the stack of the previous NMI. We
+	 * can't let it execute because the nested NMI will corrupt the
+	 * stack of the previous NMI. NMI handlers are not re-entrant
+	 * anyway.
+	 *
+	 * To handle this case we do the following:
+	 *  Check the a special location on the stack that contains
+	 *  a variable that is set when NMIs are executing.
+	 *  The interrupted task's stack is also checked to see if it
+	 *  is an NMI stack.
+	 *  If the variable is not set and the stack is not the NMI
+	 *  stack then:
+	 *    o Set the special variable on the stack
+	 *    o Copy the interrupt frame into a "saved" location on the stack
+	 *    o Copy the interrupt frame into a "copy" location on the stack
+	 *    o Continue processing the NMI
+	 *  If the variable is set or the previous stack is the NMI stack:
+	 *    o Modify the "copy" location to jump to the repeate_nmi
+	 *    o return back to the first NMI
+	 *
+	 * Now on exit of the first NMI, we first clear the stack variable
+	 * The NMI stack will tell any nested NMIs at that point that it is
+	 * nested. Then we pop the stack normally with iret, and if there was
+	 * a nested NMI that updated the copy interrupt stack frame, a
+	 * jump will be made to the repeat_nmi code that will handle the second
+	 * NMI.
+	 */
+
+	/* Use %rdx as out temp variable throughout */
+	pushq_cfi %rdx
+
+	/*
+	 * Check the special variable on the stack to see if NMIs are
+	 * executing.
+	 */
+	cmp $1, -8(%rsp)
+	je nested_nmi
+
+	/*
+	 * Now test if the previous stack was an NMI stack.
+	 * We need the double check. We check the NMI stack to satisfy the
+	 * race when the first NMI clears the variable before returning.
+	 * We check the variable because the first NMI could be in a
+	 * breakpoint routine using a breakpoint stack.
+	 */
+	lea 6*8(%rsp), %rdx
+	test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
+
+nested_nmi:
+	/*
+	 * Do nothing if we interrupted the fixup in repeat_nmi.
+	 * It's about to repeat the NMI handler, so we are fine
+	 * with ignoring this one.
+	 */
+	movq $repeat_nmi, %rdx
+	cmpq 8(%rsp), %rdx
+	ja 1f
+	movq $end_repeat_nmi, %rdx
+	cmpq 8(%rsp), %rdx
+	ja nested_nmi_out
+
+1:
+	/* Set up the interrupted NMIs stack to jump to repeat_nmi */
+	leaq -6*8(%rsp), %rdx
+	movq %rdx, %rsp
+	CFI_ADJUST_CFA_OFFSET 6*8
+	pushq_cfi $__KERNEL_DS
+	pushq_cfi %rdx
+	pushfq_cfi
+	pushq_cfi $__KERNEL_CS
+	pushq_cfi $repeat_nmi
+
+	/* Put stack back */
+	addq $(11*8), %rsp
+	CFI_ADJUST_CFA_OFFSET -11*8
+
+nested_nmi_out:
+	popq_cfi %rdx
+
+	/* No need to check faults here */
+	INTERRUPT_RETURN
+
+first_nmi:
+	/*
+	 * Because nested NMIs will use the pushed location that we
+	 * stored in rdx, we must keep that space available.
+	 * Here's what our stack frame will look like:
+	 * +-------------------------+
+	 * | original SS             |
+	 * | original Return RSP     |
+	 * | original RFLAGS         |
+	 * | original CS             |
+	 * | original RIP            |
+	 * +-------------------------+
+	 * | temp storage for rdx    |
+	 * +-------------------------+
+	 * | NMI executing variable  |
+	 * +-------------------------+
+	 * | Saved SS                |
+	 * | Saved Return RSP        |
+	 * | Saved RFLAGS            |
+	 * | Saved CS                |
+	 * | Saved RIP               |
+	 * +-------------------------+
+	 * | copied SS               |
+	 * | copied Return RSP       |
+	 * | copied RFLAGS           |
+	 * | copied CS               |
+	 * | copied RIP              |
+	 * +-------------------------+
+	 * | pt_regs                 |
+	 * +-------------------------+
+	 *
+	 * The saved RIP is used to fix up the copied RIP that a nested
+	 * NMI may zero out. The original stack frame and the temp storage
+	 * is also used by nested NMIs and can not be trusted on exit.
+	 */
+	/* Set the NMI executing variable on the stack. */
+	pushq_cfi $1
+
+	/* Copy the stack frame to the Saved frame */
+	.rept 5
+	pushq_cfi 6*8(%rsp)
+	.endr
+
+	/* Make another copy, this one may be modified by nested NMIs */
+	.rept 5
+	pushq_cfi 4*8(%rsp)
+	.endr
+
+	/* Do not pop rdx, nested NMIs will corrupt it */
+	movq 11*8(%rsp), %rdx
+
+	/*
+	 * Everything below this point can be preempted by a nested
+	 * NMI if the first NMI took an exception. Repeated NMIs
+	 * caused by an exception and nested NMI will start here, and
+	 * can still be preempted by another NMI.
+	 */
+restart_nmi:
+	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
 	subq $ORIG_RAX-R15, %rsp
 	subq $ORIG_RAX-R15, %rsp
 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	/*
+	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
+	 * as we should not be calling schedule in NMI context.
+	 * Even with normal interrupts enabled. An NMI should not be
+	 * setting NEED_RESCHED or anything that normal interrupts and
+	 * exceptions might do.
+	 */
 	call save_paranoid
 	call save_paranoid
 	DEFAULT_FRAME 0
 	DEFAULT_FRAME 0
 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
 	movq %rsp,%rdi
 	movq %rsp,%rdi
 	movq $-1,%rsi
 	movq $-1,%rsi
 	call do_nmi
 	call do_nmi
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* paranoidexit; without TRACE_IRQS_OFF */
-	/* ebx:	no swapgs flag */
-	DISABLE_INTERRUPTS(CLBR_NONE)
 	testl %ebx,%ebx				/* swapgs needed? */
 	testl %ebx,%ebx				/* swapgs needed? */
 	jnz nmi_restore
 	jnz nmi_restore
-	testl $3,CS(%rsp)
-	jnz nmi_userspace
 nmi_swapgs:
 nmi_swapgs:
 	SWAPGS_UNSAFE_STACK
 	SWAPGS_UNSAFE_STACK
 nmi_restore:
 nmi_restore:
 	RESTORE_ALL 8
 	RESTORE_ALL 8
+	/* Clear the NMI executing stack variable */
+	movq $0, 10*8(%rsp)
 	jmp irq_return
 	jmp irq_return
-nmi_userspace:
-	GET_THREAD_INFO(%rcx)
-	movl TI_flags(%rcx),%ebx
-	andl $_TIF_WORK_MASK,%ebx
-	jz nmi_swapgs
-	movq %rsp,%rdi			/* &pt_regs */
-	call sync_regs
-	movq %rax,%rsp			/* switch stack for scheduling */
-	testl $_TIF_NEED_RESCHED,%ebx
-	jnz nmi_schedule
-	movl %ebx,%edx			/* arg3: thread flags */
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	xorl %esi,%esi 			/* arg2: oldset */
-	movq %rsp,%rdi 			/* arg1: &pt_regs */
-	call do_notify_resume
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	jmp nmi_userspace
-nmi_schedule:
-	ENABLE_INTERRUPTS(CLBR_ANY)
-	call schedule
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	jmp nmi_userspace
-	CFI_ENDPROC
-#else
-	jmp paranoid_exit
 	CFI_ENDPROC
 	CFI_ENDPROC
-#endif
 END(nmi)
 END(nmi)
 
 
+	/*
+	 * If an NMI hit an iret because of an exception or breakpoint,
+	 * it can lose its NMI context, and a nested NMI may come in.
+	 * In that case, the nested NMI will change the preempted NMI's
+	 * stack to jump to here when it does the final iret.
+	 */
+repeat_nmi:
+	INTR_FRAME
+	/* Update the stack variable to say we are still in NMI */
+	movq $1, 5*8(%rsp)
+
+	/* copy the saved stack back to copy stack */
+	.rept 5
+	pushq_cfi 4*8(%rsp)
+	.endr
+
+	jmp restart_nmi
+	CFI_ENDPROC
+end_repeat_nmi:
+
 ENTRY(ignore_sysret)
 ENTRY(ignore_sysret)
 	CFI_STARTPROC
 	CFI_STARTPROC
 	mov $-ENOSYS,%eax
 	mov $-ENOSYS,%eax

+ 4 - 0
arch/x86/kernel/head_64.S

@@ -417,6 +417,10 @@ ENTRY(phys_base)
 ENTRY(idt_table)
 ENTRY(idt_table)
 	.skip IDT_ENTRIES * 16
 	.skip IDT_ENTRIES * 16
 
 
+	.align L1_CACHE_BYTES
+ENTRY(nmi_idt_table)
+	.skip IDT_ENTRIES * 16
+
 	__PAGE_ALIGNED_BSS
 	__PAGE_ALIGNED_BSS
 	.align PAGE_SIZE
 	.align PAGE_SIZE
 ENTRY(empty_zero_page)
 ENTRY(empty_zero_page)

+ 102 - 0
arch/x86/kernel/nmi.c

@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 		unknown_nmi_error(reason, regs);
 		unknown_nmi_error(reason, regs);
 }
 }
 
 
+/*
+ * NMIs can hit breakpoints which will cause it to lose its
+ * NMI context with the CPU when the breakpoint does an iret.
+ */
+#ifdef CONFIG_X86_32
+/*
+ * For i386, NMIs use the same stack as the kernel, and we can
+ * add a workaround to the iret problem in C. Simply have 3 states
+ * the NMI can be in.
+ *
+ *  1) not running
+ *  2) executing
+ *  3) latched
+ *
+ * When no NMI is in progress, it is in the "not running" state.
+ * When an NMI comes in, it goes into the "executing" state.
+ * Normally, if another NMI is triggered, it does not interrupt
+ * the running NMI and the HW will simply latch it so that when
+ * the first NMI finishes, it will restart the second NMI.
+ * (Note, the latch is binary, thus multiple NMIs triggering,
+ *  when one is running, are ignored. Only one NMI is restarted.)
+ *
+ * If an NMI hits a breakpoint that executes an iret, another
+ * NMI can preempt it. We do not want to allow this new NMI
+ * to run, but we want to execute it when the first one finishes.
+ * We set the state to "latched", and the first NMI will perform
+ * an cmpxchg on the state, and if it doesn't successfully
+ * reset the state to "not running" it will restart the next
+ * NMI.
+ */
+enum nmi_states {
+	NMI_NOT_RUNNING,
+	NMI_EXECUTING,
+	NMI_LATCHED,
+};
+static DEFINE_PER_CPU(enum nmi_states, nmi_state);
+
+#define nmi_nesting_preprocess(regs)					\
+	do {								\
+		if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) {	\
+			__get_cpu_var(nmi_state) = NMI_LATCHED;		\
+			return;						\
+		}							\
+	nmi_restart:							\
+		__get_cpu_var(nmi_state) = NMI_EXECUTING;		\
+	} while (0)
+
+#define nmi_nesting_postprocess()					\
+	do {								\
+		if (cmpxchg(&__get_cpu_var(nmi_state),			\
+		    NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING)	\
+			goto nmi_restart;				\
+	} while (0)
+#else /* x86_64 */
+/*
+ * In x86_64 things are a bit more difficult. This has the same problem
+ * where an NMI hitting a breakpoint that calls iret will remove the
+ * NMI context, allowing a nested NMI to enter. What makes this more
+ * difficult is that both NMIs and breakpoints have their own stack.
+ * When a new NMI or breakpoint is executed, the stack is set to a fixed
+ * point. If an NMI is nested, it will have its stack set at that same
+ * fixed address that the first NMI had, and will start corrupting the
+ * stack. This is handled in entry_64.S, but the same problem exists with
+ * the breakpoint stack.
+ *
+ * If a breakpoint is being processed, and the debug stack is being used,
+ * if an NMI comes in and also hits a breakpoint, the stack pointer
+ * will be set to the same fixed address as the breakpoint that was
+ * interrupted, causing that stack to be corrupted. To handle this case,
+ * check if the stack that was interrupted is the debug stack, and if
+ * so, change the IDT so that new breakpoints will use the current stack
+ * and not switch to the fixed address. On return of the NMI, switch back
+ * to the original IDT.
+ */
+static DEFINE_PER_CPU(int, update_debug_stack);
+
+static inline void nmi_nesting_preprocess(struct pt_regs *regs)
+{
+	/*
+	 * If we interrupted a breakpoint, it is possible that
+	 * the nmi handler will have breakpoints too. We need to
+	 * change the IDT such that breakpoints that happen here
+	 * continue to use the NMI stack.
+	 */
+	if (unlikely(is_debug_stack(regs->sp))) {
+		debug_stack_set_zero();
+		__get_cpu_var(update_debug_stack) = 1;
+	}
+}
+
+static inline void nmi_nesting_postprocess(void)
+{
+	if (unlikely(__get_cpu_var(update_debug_stack)))
+		debug_stack_reset();
+}
+#endif
+
 dotraplinkage notrace __kprobes void
 dotraplinkage notrace __kprobes void
 do_nmi(struct pt_regs *regs, long error_code)
 do_nmi(struct pt_regs *regs, long error_code)
 {
 {
+	nmi_nesting_preprocess(regs);
+
 	nmi_enter();
 	nmi_enter();
 
 
 	inc_irq_stat(__nmi_count);
 	inc_irq_stat(__nmi_count);
@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code)
 		default_do_nmi(regs);
 		default_do_nmi(regs);
 
 
 	nmi_exit();
 	nmi_exit();
+
+	/* On i386, may loop back to preprocess */
+	nmi_nesting_postprocess();
 }
 }
 
 
 void stop_nmi(void)
 void stop_nmi(void)

+ 20 - 0
arch/x86/kernel/traps.c

@@ -311,9 +311,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 			== NOTIFY_STOP)
 			== NOTIFY_STOP)
 		return;
 		return;
 
 
+	/*
+	 * Let others (NMI) know that the debug stack is in use
+	 * as we may switch to the interrupt stack.
+	 */
+	debug_stack_usage_inc();
 	preempt_conditional_sti(regs);
 	preempt_conditional_sti(regs);
 	do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
 	do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
 	preempt_conditional_cli(regs);
 	preempt_conditional_cli(regs);
+	debug_stack_usage_dec();
 }
 }
 
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
@@ -406,6 +412,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 							SIGTRAP) == NOTIFY_STOP)
 							SIGTRAP) == NOTIFY_STOP)
 		return;
 		return;
 
 
+	/*
+	 * Let others (NMI) know that the debug stack is in use
+	 * as we may switch to the interrupt stack.
+	 */
+	debug_stack_usage_inc();
+
 	/* It's safe to allow irq's after DR6 has been saved */
 	/* It's safe to allow irq's after DR6 has been saved */
 	preempt_conditional_sti(regs);
 	preempt_conditional_sti(regs);
 
 
@@ -413,6 +425,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 		handle_vm86_trap((struct kernel_vm86_regs *) regs,
 		handle_vm86_trap((struct kernel_vm86_regs *) regs,
 				error_code, 1);
 				error_code, 1);
 		preempt_conditional_cli(regs);
 		preempt_conditional_cli(regs);
+		debug_stack_usage_dec();
 		return;
 		return;
 	}
 	}
 
 
@@ -432,6 +445,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
 	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
 		send_sigtrap(tsk, regs, error_code, si_code);
 		send_sigtrap(tsk, regs, error_code, si_code);
 	preempt_conditional_cli(regs);
 	preempt_conditional_cli(regs);
+	debug_stack_usage_dec();
 
 
 	return;
 	return;
 }
 }
@@ -718,4 +732,10 @@ void __init trap_init(void)
 	cpu_init();
 	cpu_init();
 
 
 	x86_init.irqs.trap_init();
 	x86_init.irqs.trap_init();
+
+#ifdef CONFIG_X86_64
+	memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
+	set_nmi_gate(1, &debug);
+	set_nmi_gate(3, &int3);
+#endif
 }
 }

+ 5 - 0
include/linux/compiler-gcc.h

@@ -50,6 +50,11 @@
 # define inline		inline		__attribute__((always_inline))
 # define inline		inline		__attribute__((always_inline))
 # define __inline__	__inline__	__attribute__((always_inline))
 # define __inline__	__inline__	__attribute__((always_inline))
 # define __inline	__inline	__attribute__((always_inline))
 # define __inline	__inline	__attribute__((always_inline))
+#else
+/* A lot of inline functions can cause havoc with function tracing */
+# define inline		inline		notrace
+# define __inline__	__inline__	notrace
+# define __inline	__inline	notrace
 #endif
 #endif
 
 
 #define __deprecated			__attribute__((deprecated))
 #define __deprecated			__attribute__((deprecated))

+ 72 - 5
include/linux/ftrace.h

@@ -133,6 +133,8 @@ struct ftrace_func_command {
 int ftrace_arch_code_modify_prepare(void);
 int ftrace_arch_code_modify_prepare(void);
 int ftrace_arch_code_modify_post_process(void);
 int ftrace_arch_code_modify_post_process(void);
 
 
+void ftrace_bug(int err, unsigned long ip);
+
 struct seq_file;
 struct seq_file;
 
 
 struct ftrace_probe_ops {
 struct ftrace_probe_ops {
@@ -161,7 +163,6 @@ extern int ftrace_text_reserved(void *start, void *end);
 
 
 enum {
 enum {
 	FTRACE_FL_ENABLED	= (1 << 30),
 	FTRACE_FL_ENABLED	= (1 << 30),
-	FTRACE_FL_FREE		= (1 << 31),
 };
 };
 
 
 #define FTRACE_FL_MASK		(0x3UL << 30)
 #define FTRACE_FL_MASK		(0x3UL << 30)
@@ -172,10 +173,7 @@ struct dyn_ftrace {
 		unsigned long		ip; /* address of mcount call-site */
 		unsigned long		ip; /* address of mcount call-site */
 		struct dyn_ftrace	*freelist;
 		struct dyn_ftrace	*freelist;
 	};
 	};
-	union {
-		unsigned long		flags;
-		struct dyn_ftrace	*newlist;
-	};
+	unsigned long		flags;
 	struct dyn_arch_ftrace		arch;
 	struct dyn_arch_ftrace		arch;
 };
 };
 
 
@@ -190,6 +188,56 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
 int register_ftrace_command(struct ftrace_func_command *cmd);
 int register_ftrace_command(struct ftrace_func_command *cmd);
 int unregister_ftrace_command(struct ftrace_func_command *cmd);
 int unregister_ftrace_command(struct ftrace_func_command *cmd);
 
 
+enum {
+	FTRACE_UPDATE_CALLS		= (1 << 0),
+	FTRACE_DISABLE_CALLS		= (1 << 1),
+	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
+	FTRACE_START_FUNC_RET		= (1 << 3),
+	FTRACE_STOP_FUNC_RET		= (1 << 4),
+};
+
+enum {
+	FTRACE_UPDATE_IGNORE,
+	FTRACE_UPDATE_MAKE_CALL,
+	FTRACE_UPDATE_MAKE_NOP,
+};
+
+enum {
+	FTRACE_ITER_FILTER	= (1 << 0),
+	FTRACE_ITER_NOTRACE	= (1 << 1),
+	FTRACE_ITER_PRINTALL	= (1 << 2),
+	FTRACE_ITER_DO_HASH	= (1 << 3),
+	FTRACE_ITER_HASH	= (1 << 4),
+	FTRACE_ITER_ENABLED	= (1 << 5),
+};
+
+void arch_ftrace_update_code(int command);
+
+struct ftrace_rec_iter;
+
+struct ftrace_rec_iter *ftrace_rec_iter_start(void);
+struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter);
+struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
+
+int ftrace_update_record(struct dyn_ftrace *rec, int enable);
+int ftrace_test_record(struct dyn_ftrace *rec, int enable);
+void ftrace_run_stop_machine(int command);
+int ftrace_location(unsigned long ip);
+
+extern ftrace_func_t ftrace_trace_function;
+
+int ftrace_regex_open(struct ftrace_ops *ops, int flag,
+		  struct inode *inode, struct file *file);
+ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
+			    size_t cnt, loff_t *ppos);
+ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
+			     size_t cnt, loff_t *ppos);
+loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
+int ftrace_regex_release(struct inode *inode, struct file *file);
+
+void __init
+ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
+
 /* defined in arch */
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void *data);
 extern int ftrace_dyn_arch_init(void *data);
@@ -284,6 +332,25 @@ static inline int ftrace_text_reserved(void *start, void *end)
 {
 {
 	return 0;
 	return 0;
 }
 }
+
+/*
+ * Again users of functions that have ftrace_ops may not
+ * have them defined when ftrace is not enabled, but these
+ * functions may still be called. Use a macro instead of inline.
+ */
+#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
+#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
+
+static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
+			    size_t cnt, loff_t *ppos) { return -ENODEV; }
+static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
+			     size_t cnt, loff_t *ppos) { return -ENODEV; }
+static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
+{
+	return -ENODEV;
+}
+static inline int
+ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 
 /* totally disable ftrace - can not re-enable after this */
 /* totally disable ftrace - can not re-enable after this */

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 423 - 190
kernel/trace/ftrace.c


+ 142 - 141
kernel/trace/trace_events_filter.c

@@ -1738,11 +1738,121 @@ static int replace_system_preds(struct event_subsystem *system,
 	return -ENOMEM;
 	return -ENOMEM;
 }
 }
 
 
+static int create_filter_start(char *filter_str, bool set_str,
+			       struct filter_parse_state **psp,
+			       struct event_filter **filterp)
+{
+	struct event_filter *filter;
+	struct filter_parse_state *ps = NULL;
+	int err = 0;
+
+	WARN_ON_ONCE(*psp || *filterp);
+
+	/* allocate everything, and if any fails, free all and fail */
+	filter = __alloc_filter();
+	if (filter && set_str)
+		err = replace_filter_string(filter, filter_str);
+
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+
+	if (!filter || !ps || err) {
+		kfree(ps);
+		__free_filter(filter);
+		return -ENOMEM;
+	}
+
+	/* we're committed to creating a new filter */
+	*filterp = filter;
+	*psp = ps;
+
+	parse_init(ps, filter_ops, filter_str);
+	err = filter_parse(ps);
+	if (err && set_str)
+		append_filter_err(ps, filter);
+	return err;
+}
+
+static void create_filter_finish(struct filter_parse_state *ps)
+{
+	if (ps) {
+		filter_opstack_clear(ps);
+		postfix_clear(ps);
+		kfree(ps);
+	}
+}
+
+/**
+ * create_filter - create a filter for a ftrace_event_call
+ * @call: ftrace_event_call to create a filter for
+ * @filter_str: filter string
+ * @set_str: remember @filter_str and enable detailed error in filter
+ * @filterp: out param for created filter (always updated on return)
+ *
+ * Creates a filter for @call with @filter_str.  If @set_str is %true,
+ * @filter_str is copied and recorded in the new filter.
+ *
+ * On success, returns 0 and *@filterp points to the new filter.  On
+ * failure, returns -errno and *@filterp may point to %NULL or to a new
+ * filter.  In the latter case, the returned filter contains error
+ * information if @set_str is %true and the caller is responsible for
+ * freeing it.
+ */
+static int create_filter(struct ftrace_event_call *call,
+			 char *filter_str, bool set_str,
+			 struct event_filter **filterp)
+{
+	struct event_filter *filter = NULL;
+	struct filter_parse_state *ps = NULL;
+	int err;
+
+	err = create_filter_start(filter_str, set_str, &ps, &filter);
+	if (!err) {
+		err = replace_preds(call, filter, ps, filter_str, false);
+		if (err && set_str)
+			append_filter_err(ps, filter);
+	}
+	create_filter_finish(ps);
+
+	*filterp = filter;
+	return err;
+}
+
+/**
+ * create_system_filter - create a filter for an event_subsystem
+ * @system: event_subsystem to create a filter for
+ * @filter_str: filter string
+ * @filterp: out param for created filter (always updated on return)
+ *
+ * Identical to create_filter() except that it creates a subsystem filter
+ * and always remembers @filter_str.
+ */
+static int create_system_filter(struct event_subsystem *system,
+				char *filter_str, struct event_filter **filterp)
+{
+	struct event_filter *filter = NULL;
+	struct filter_parse_state *ps = NULL;
+	int err;
+
+	err = create_filter_start(filter_str, true, &ps, &filter);
+	if (!err) {
+		err = replace_system_preds(system, ps, filter_str);
+		if (!err) {
+			/* System filters just show a default message */
+			kfree(filter->filter_string);
+			filter->filter_string = NULL;
+		} else {
+			append_filter_err(ps, filter);
+		}
+	}
+	create_filter_finish(ps);
+
+	*filterp = filter;
+	return err;
+}
+
 int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 {
 {
-	struct filter_parse_state *ps;
 	struct event_filter *filter;
 	struct event_filter *filter;
-	struct event_filter *tmp;
 	int err = 0;
 	int err = 0;
 
 
 	mutex_lock(&event_mutex);
 	mutex_lock(&event_mutex);
@@ -1759,49 +1869,30 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 		goto out_unlock;
 		goto out_unlock;
 	}
 	}
 
 
-	err = -ENOMEM;
-	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
-	if (!ps)
-		goto out_unlock;
-
-	filter = __alloc_filter();
-	if (!filter) {
-		kfree(ps);
-		goto out_unlock;
-	}
-
-	replace_filter_string(filter, filter_string);
-
-	parse_init(ps, filter_ops, filter_string);
-	err = filter_parse(ps);
-	if (err) {
-		append_filter_err(ps, filter);
-		goto out;
-	}
+	err = create_filter(call, filter_string, true, &filter);
 
 
-	err = replace_preds(call, filter, ps, filter_string, false);
-	if (err) {
-		filter_disable(call);
-		append_filter_err(ps, filter);
-	} else
-		call->flags |= TRACE_EVENT_FL_FILTERED;
-out:
 	/*
 	/*
 	 * Always swap the call filter with the new filter
 	 * Always swap the call filter with the new filter
 	 * even if there was an error. If there was an error
 	 * even if there was an error. If there was an error
 	 * in the filter, we disable the filter and show the error
 	 * in the filter, we disable the filter and show the error
 	 * string
 	 * string
 	 */
 	 */
-	tmp = call->filter;
-	rcu_assign_pointer(call->filter, filter);
-	if (tmp) {
-		/* Make sure the call is done with the filter */
-		synchronize_sched();
-		__free_filter(tmp);
+	if (filter) {
+		struct event_filter *tmp = call->filter;
+
+		if (!err)
+			call->flags |= TRACE_EVENT_FL_FILTERED;
+		else
+			filter_disable(call);
+
+		rcu_assign_pointer(call->filter, filter);
+
+		if (tmp) {
+			/* Make sure the call is done with the filter */
+			synchronize_sched();
+			__free_filter(tmp);
+		}
 	}
 	}
-	filter_opstack_clear(ps);
-	postfix_clear(ps);
-	kfree(ps);
 out_unlock:
 out_unlock:
 	mutex_unlock(&event_mutex);
 	mutex_unlock(&event_mutex);
 
 
@@ -1811,7 +1902,6 @@ out_unlock:
 int apply_subsystem_event_filter(struct event_subsystem *system,
 int apply_subsystem_event_filter(struct event_subsystem *system,
 				 char *filter_string)
 				 char *filter_string)
 {
 {
-	struct filter_parse_state *ps;
 	struct event_filter *filter;
 	struct event_filter *filter;
 	int err = 0;
 	int err = 0;
 
 
@@ -1835,48 +1925,19 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 		goto out_unlock;
 		goto out_unlock;
 	}
 	}
 
 
-	err = -ENOMEM;
-	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
-	if (!ps)
-		goto out_unlock;
-
-	filter = __alloc_filter();
-	if (!filter)
-		goto out;
-
-	/* System filters just show a default message */
-	kfree(filter->filter_string);
-	filter->filter_string = NULL;
-
-	/*
-	 * No event actually uses the system filter
-	 * we can free it without synchronize_sched().
-	 */
-	__free_filter(system->filter);
-	system->filter = filter;
-
-	parse_init(ps, filter_ops, filter_string);
-	err = filter_parse(ps);
-	if (err)
-		goto err_filter;
-
-	err = replace_system_preds(system, ps, filter_string);
-	if (err)
-		goto err_filter;
-
-out:
-	filter_opstack_clear(ps);
-	postfix_clear(ps);
-	kfree(ps);
+	err = create_system_filter(system, filter_string, &filter);
+	if (filter) {
+		/*
+		 * No event actually uses the system filter
+		 * we can free it without synchronize_sched().
+		 */
+		__free_filter(system->filter);
+		system->filter = filter;
+	}
 out_unlock:
 out_unlock:
 	mutex_unlock(&event_mutex);
 	mutex_unlock(&event_mutex);
 
 
 	return err;
 	return err;
-
-err_filter:
-	replace_filter_string(filter, filter_string);
-	append_filter_err(ps, system->filter);
-	goto out;
 }
 }
 
 
 #ifdef CONFIG_PERF_EVENTS
 #ifdef CONFIG_PERF_EVENTS
@@ -1894,7 +1955,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
 {
 {
 	int err;
 	int err;
 	struct event_filter *filter;
 	struct event_filter *filter;
-	struct filter_parse_state *ps;
 	struct ftrace_event_call *call;
 	struct ftrace_event_call *call;
 
 
 	mutex_lock(&event_mutex);
 	mutex_lock(&event_mutex);
@@ -1909,33 +1969,10 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
 	if (event->filter)
 	if (event->filter)
 		goto out_unlock;
 		goto out_unlock;
 
 
-	filter = __alloc_filter();
-	if (!filter) {
-		err = PTR_ERR(filter);
-		goto out_unlock;
-	}
-
-	err = -ENOMEM;
-	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
-	if (!ps)
-		goto free_filter;
-
-	parse_init(ps, filter_ops, filter_str);
-	err = filter_parse(ps);
-	if (err)
-		goto free_ps;
-
-	err = replace_preds(call, filter, ps, filter_str, false);
+	err = create_filter(call, filter_str, false, &filter);
 	if (!err)
 	if (!err)
 		event->filter = filter;
 		event->filter = filter;
-
-free_ps:
-	filter_opstack_clear(ps);
-	postfix_clear(ps);
-	kfree(ps);
-
-free_filter:
-	if (err)
+	else
 		__free_filter(filter);
 		__free_filter(filter);
 
 
 out_unlock:
 out_unlock:
@@ -1954,43 +1991,6 @@ out_unlock:
 #define CREATE_TRACE_POINTS
 #define CREATE_TRACE_POINTS
 #include "trace_events_filter_test.h"
 #include "trace_events_filter_test.h"
 
 
-static int test_get_filter(char *filter_str, struct ftrace_event_call *call,
-			   struct event_filter **pfilter)
-{
-	struct event_filter *filter;
-	struct filter_parse_state *ps;
-	int err = -ENOMEM;
-
-	filter = __alloc_filter();
-	if (!filter)
-		goto out;
-
-	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
-	if (!ps)
-		goto free_filter;
-
-	parse_init(ps, filter_ops, filter_str);
-	err = filter_parse(ps);
-	if (err)
-		goto free_ps;
-
-	err = replace_preds(call, filter, ps, filter_str, false);
-	if (!err)
-		*pfilter = filter;
-
- free_ps:
-	filter_opstack_clear(ps);
-	postfix_clear(ps);
-	kfree(ps);
-
- free_filter:
-	if (err)
-		__free_filter(filter);
-
- out:
-	return err;
-}
-
 #define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
 #define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
 { \
 { \
 	.filter = FILTER, \
 	.filter = FILTER, \
@@ -2109,12 +2109,13 @@ static __init int ftrace_test_event_filter(void)
 		struct test_filter_data_t *d = &test_filter_data[i];
 		struct test_filter_data_t *d = &test_filter_data[i];
 		int err;
 		int err;
 
 
-		err = test_get_filter(d->filter, &event_ftrace_test_filter,
-				      &filter);
+		err = create_filter(&event_ftrace_test_filter, d->filter,
+				    false, &filter);
 		if (err) {
 		if (err) {
 			printk(KERN_INFO
 			printk(KERN_INFO
 			       "Failed to get filter for '%s', err %d\n",
 			       "Failed to get filter for '%s', err %d\n",
 			       d->filter, err);
 			       d->filter, err);
+			__free_filter(filter);
 			break;
 			break;
 		}
 		}
 
 

+ 29 - 1
kernel/trace/trace_stack.c

@@ -13,6 +13,9 @@
 #include <linux/sysctl.h>
 #include <linux/sysctl.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/fs.h>
+
+#include <asm/setup.h>
+
 #include "trace.h"
 #include "trace.h"
 
 
 #define STACK_TRACE_ENTRIES 500
 #define STACK_TRACE_ENTRIES 500
@@ -133,7 +136,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 static struct ftrace_ops trace_ops __read_mostly =
 {
 {
 	.func = stack_trace_call,
 	.func = stack_trace_call,
-	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 };
 
 
 static ssize_t
 static ssize_t
@@ -311,6 +313,21 @@ static const struct file_operations stack_trace_fops = {
 	.release	= seq_release,
 	.release	= seq_release,
 };
 };
 
 
+static int
+stack_trace_filter_open(struct inode *inode, struct file *file)
+{
+	return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
+				 inode, file);
+}
+
+static const struct file_operations stack_trace_filter_fops = {
+	.open = stack_trace_filter_open,
+	.read = seq_read,
+	.write = ftrace_filter_write,
+	.llseek = ftrace_regex_lseek,
+	.release = ftrace_regex_release,
+};
+
 int
 int
 stack_trace_sysctl(struct ctl_table *table, int write,
 stack_trace_sysctl(struct ctl_table *table, int write,
 		   void __user *buffer, size_t *lenp,
 		   void __user *buffer, size_t *lenp,
@@ -338,8 +355,13 @@ stack_trace_sysctl(struct ctl_table *table, int write,
 	return ret;
 	return ret;
 }
 }
 
 
+static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
+
 static __init int enable_stacktrace(char *str)
 static __init int enable_stacktrace(char *str)
 {
 {
+	if (strncmp(str, "_filter=", 8) == 0)
+		strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
+
 	stack_tracer_enabled = 1;
 	stack_tracer_enabled = 1;
 	last_stack_tracer_enabled = 1;
 	last_stack_tracer_enabled = 1;
 	return 1;
 	return 1;
@@ -358,6 +380,12 @@ static __init int stack_trace_init(void)
 	trace_create_file("stack_trace", 0444, d_tracer,
 	trace_create_file("stack_trace", 0444, d_tracer,
 			NULL, &stack_trace_fops);
 			NULL, &stack_trace_fops);
 
 
+	trace_create_file("stack_trace_filter", 0444, d_tracer,
+			NULL, &stack_trace_filter_fops);
+
+	if (stack_trace_filter_buf[0])
+		ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
+
 	if (stack_tracer_enabled)
 	if (stack_tracer_enabled)
 		register_ftrace_function(&trace_ops);
 		register_ftrace_function(&trace_ops);
 
 

+ 1 - 1
scripts/recordmcount.h

@@ -462,7 +462,7 @@ __has_rel_mcount(Elf_Shdr const *const relhdr,  /* is SHT_REL or SHT_RELA */
 		succeed_file();
 		succeed_file();
 	}
 	}
 	if (w(txthdr->sh_type) != SHT_PROGBITS ||
 	if (w(txthdr->sh_type) != SHT_PROGBITS ||
-	    !(w(txthdr->sh_flags) & SHF_EXECINSTR))
+	    !(_w(txthdr->sh_flags) & SHF_EXECINSTR))
 		return NULL;
 		return NULL;
 	return txtname;
 	return txtname;
 }
 }

+ 2 - 0
tools/perf/Documentation/perf-list.txt

@@ -21,6 +21,8 @@ EVENT MODIFIERS
 Events can optionally have a modifer by appending a colon and one or
 Events can optionally have a modifer by appending a colon and one or
 more modifiers.  Modifiers allow the user to restrict when events are
 more modifiers.  Modifiers allow the user to restrict when events are
 counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
 counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
+Additional modifiers are 'G' for guest counting (in KVM guests) and 'H'
+for host counting (not in KVM guests).
 
 
 The 'p' modifier can be used for specifying how precise the instruction
 The 'p' modifier can be used for specifying how precise the instruction
 address should be. The 'p' modifier is currently only implemented for
 address should be. The 'p' modifier is currently only implemented for

+ 1 - 0
tools/perf/MANIFEST

@@ -1,4 +1,5 @@
 tools/perf
 tools/perf
+include/linux/const.h
 include/linux/perf_event.h
 include/linux/perf_event.h
 include/linux/rbtree.h
 include/linux/rbtree.h
 include/linux/list.h
 include/linux/list.h

+ 1 - 6
tools/perf/builtin-annotate.c

@@ -235,7 +235,7 @@ out_delete:
 }
 }
 
 
 static const char * const annotate_usage[] = {
 static const char * const annotate_usage[] = {
-	"perf annotate [<options>] <command>",
+	"perf annotate [<options>]",
 	NULL
 	NULL
 };
 };
 
 
@@ -313,10 +313,5 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
 		annotate.sym_hist_filter = argv[0];
 		annotate.sym_hist_filter = argv[0];
 	}
 	}
 
 
-	if (field_sep && *field_sep == '.') {
-		pr_err("'.' is the only non valid --field-separator argument\n");
-		return -1;
-	}
-
 	return __cmd_annotate(&annotate);
 	return __cmd_annotate(&annotate);
 }
 }

+ 3 - 0
tools/perf/builtin-kmem.c

@@ -108,7 +108,9 @@ static void setup_cpunode_map(void)
 				continue;
 				continue;
 			cpunode_map[cpu] = mem;
 			cpunode_map[cpu] = mem;
 		}
 		}
+		closedir(dir2);
 	}
 	}
+	closedir(dir1);
 }
 }
 
 
 static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
 static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
@@ -645,6 +647,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
 			break;
 			break;
 		if (sort_dimension__add(tok, sort_list) < 0) {
 		if (sort_dimension__add(tok, sort_list) < 0) {
 			error("Unknown --sort key: '%s'", tok);
 			error("Unknown --sort key: '%s'", tok);
+			free(str);
 			return -1;
 			return -1;
 		}
 		}
 	}
 	}

+ 2 - 4
tools/perf/builtin-kvm.c

@@ -22,9 +22,6 @@
 static const char		*file_name;
 static const char		*file_name;
 static char			name_buffer[256];
 static char			name_buffer[256];
 
 
-bool				perf_host = 1;
-bool				perf_guest;
-
 static const char * const kvm_usage[] = {
 static const char * const kvm_usage[] = {
 	"perf kvm [<options>] {top|record|report|diff|buildid-list}",
 	"perf kvm [<options>] {top|record|report|diff|buildid-list}",
 	NULL
 	NULL
@@ -107,7 +104,8 @@ static int __cmd_buildid_list(int argc, const char **argv)
 
 
 int cmd_kvm(int argc, const char **argv, const char *prefix __used)
 int cmd_kvm(int argc, const char **argv, const char *prefix __used)
 {
 {
-	perf_host = perf_guest = 0;
+	perf_host  = 0;
+	perf_guest = 1;
 
 
 	argc = parse_options(argc, argv, kvm_options, kvm_usage,
 	argc = parse_options(argc, argv, kvm_options, kvm_usage,
 			PARSE_OPT_STOP_AT_NON_OPTION);
 			PARSE_OPT_STOP_AT_NON_OPTION);

+ 4 - 0
tools/perf/builtin-script.c

@@ -1018,13 +1018,17 @@ static char *get_script_path(const char *script_root, const char *suffix)
 			__script_root = get_script_root(&script_dirent, suffix);
 			__script_root = get_script_root(&script_dirent, suffix);
 			if (__script_root && !strcmp(script_root, __script_root)) {
 			if (__script_root && !strcmp(script_root, __script_root)) {
 				free(__script_root);
 				free(__script_root);
+				closedir(lang_dir);
+				closedir(scripts_dir);
 				snprintf(script_path, MAXPATHLEN, "%s/%s",
 				snprintf(script_path, MAXPATHLEN, "%s/%s",
 					 lang_path, script_dirent.d_name);
 					 lang_path, script_dirent.d_name);
 				return strdup(script_path);
 				return strdup(script_path);
 			}
 			}
 			free(__script_root);
 			free(__script_root);
 		}
 		}
+		closedir(lang_dir);
 	}
 	}
+	closedir(scripts_dir);
 
 
 	return NULL;
 	return NULL;
 }
 }

+ 1 - 1
tools/perf/builtin-test.c

@@ -1396,7 +1396,7 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
 	NULL,
 	NULL,
 	};
 	};
 	const struct option test_options[] = {
 	const struct option test_options[] = {
-	OPT_INTEGER('v', "verbose", &verbose,
+	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
 		    "be more verbose (show symbol address, etc)"),
 	OPT_END()
 	OPT_END()
 	};
 	};

+ 4 - 1
tools/perf/builtin-top.c

@@ -235,7 +235,6 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 	if (he == NULL)
 	if (he == NULL)
 		return NULL;
 		return NULL;
 
 
-	evsel->hists.stats.total_period += sample->period;
 	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
 	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
 	return he;
 	return he;
 }
 }
@@ -889,6 +888,10 @@ try_again:
 				ui__warning("The %s event is not supported.\n",
 				ui__warning("The %s event is not supported.\n",
 					    event_name(counter));
 					    event_name(counter));
 				goto out_err;
 				goto out_err;
+			} else if (err == EMFILE) {
+				ui__warning("Too many events are opened.\n"
+					    "Try again after reducing the number of events\n");
+				goto out_err;
 			}
 			}
 
 
 			ui__warning("The sys_perf_event_open() syscall "
 			ui__warning("The sys_perf_event_open() syscall "

+ 4 - 1
tools/perf/util/evlist.c

@@ -111,8 +111,11 @@ int perf_evlist__add_default(struct perf_evlist *evlist)
 		.type = PERF_TYPE_HARDWARE,
 		.type = PERF_TYPE_HARDWARE,
 		.config = PERF_COUNT_HW_CPU_CYCLES,
 		.config = PERF_COUNT_HW_CPU_CYCLES,
 	};
 	};
-	struct perf_evsel *evsel = perf_evsel__new(&attr, 0);
+	struct perf_evsel *evsel;
+
+	event_attr_init(&attr);
 
 
+	evsel = perf_evsel__new(&attr, 0);
 	if (evsel == NULL)
 	if (evsel == NULL)
 		goto error;
 		goto error;
 
 

+ 68 - 63
tools/perf/util/hist.c

@@ -76,21 +76,21 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 	}
 	}
 }
 }
 
 
-static void hist_entry__add_cpumode_period(struct hist_entry *self,
+static void hist_entry__add_cpumode_period(struct hist_entry *he,
 					   unsigned int cpumode, u64 period)
 					   unsigned int cpumode, u64 period)
 {
 {
 	switch (cpumode) {
 	switch (cpumode) {
 	case PERF_RECORD_MISC_KERNEL:
 	case PERF_RECORD_MISC_KERNEL:
-		self->period_sys += period;
+		he->period_sys += period;
 		break;
 		break;
 	case PERF_RECORD_MISC_USER:
 	case PERF_RECORD_MISC_USER:
-		self->period_us += period;
+		he->period_us += period;
 		break;
 		break;
 	case PERF_RECORD_MISC_GUEST_KERNEL:
 	case PERF_RECORD_MISC_GUEST_KERNEL:
-		self->period_guest_sys += period;
+		he->period_guest_sys += period;
 		break;
 		break;
 	case PERF_RECORD_MISC_GUEST_USER:
 	case PERF_RECORD_MISC_GUEST_USER:
-		self->period_guest_us += period;
+		he->period_guest_us += period;
 		break;
 		break;
 	default:
 	default:
 		break;
 		break;
@@ -165,18 +165,18 @@ void hists__decay_entries_threaded(struct hists *hists,
 static struct hist_entry *hist_entry__new(struct hist_entry *template)
 static struct hist_entry *hist_entry__new(struct hist_entry *template)
 {
 {
 	size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
 	size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
-	struct hist_entry *self = malloc(sizeof(*self) + callchain_size);
+	struct hist_entry *he = malloc(sizeof(*he) + callchain_size);
 
 
-	if (self != NULL) {
-		*self = *template;
-		self->nr_events = 1;
-		if (self->ms.map)
-			self->ms.map->referenced = true;
+	if (he != NULL) {
+		*he = *template;
+		he->nr_events = 1;
+		if (he->ms.map)
+			he->ms.map->referenced = true;
 		if (symbol_conf.use_callchain)
 		if (symbol_conf.use_callchain)
-			callchain_init(self->callchain);
+			callchain_init(he->callchain);
 	}
 	}
 
 
-	return self;
+	return he;
 }
 }
 
 
 static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
 static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
@@ -677,15 +677,16 @@ static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
 	return ret;
 	return ret;
 }
 }
 
 
-static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
-					    u64 total_samples, int left_margin)
+static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
+					    u64 total_samples, int left_margin,
+					    FILE *fp)
 {
 {
 	struct rb_node *rb_node;
 	struct rb_node *rb_node;
 	struct callchain_node *chain;
 	struct callchain_node *chain;
 	size_t ret = 0;
 	size_t ret = 0;
 	u32 entries_printed = 0;
 	u32 entries_printed = 0;
 
 
-	rb_node = rb_first(&self->sorted_chain);
+	rb_node = rb_first(&he->sorted_chain);
 	while (rb_node) {
 	while (rb_node) {
 		double percent;
 		double percent;
 
 
@@ -730,35 +731,35 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows)
 	}
 	}
 }
 }
 
 
-static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s,
+static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s,
 				     size_t size, struct hists *pair_hists,
 				     size_t size, struct hists *pair_hists,
 				     bool show_displacement, long displacement,
 				     bool show_displacement, long displacement,
-				     bool color, u64 session_total)
+				     bool color, u64 total_period)
 {
 {
 	u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
 	u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
 	u64 nr_events;
 	u64 nr_events;
 	const char *sep = symbol_conf.field_sep;
 	const char *sep = symbol_conf.field_sep;
 	int ret;
 	int ret;
 
 
-	if (symbol_conf.exclude_other && !self->parent)
+	if (symbol_conf.exclude_other && !he->parent)
 		return 0;
 		return 0;
 
 
 	if (pair_hists) {
 	if (pair_hists) {
-		period = self->pair ? self->pair->period : 0;
-		nr_events = self->pair ? self->pair->nr_events : 0;
+		period = he->pair ? he->pair->period : 0;
+		nr_events = he->pair ? he->pair->nr_events : 0;
 		total = pair_hists->stats.total_period;
 		total = pair_hists->stats.total_period;
-		period_sys = self->pair ? self->pair->period_sys : 0;
-		period_us = self->pair ? self->pair->period_us : 0;
-		period_guest_sys = self->pair ? self->pair->period_guest_sys : 0;
-		period_guest_us = self->pair ? self->pair->period_guest_us : 0;
+		period_sys = he->pair ? he->pair->period_sys : 0;
+		period_us = he->pair ? he->pair->period_us : 0;
+		period_guest_sys = he->pair ? he->pair->period_guest_sys : 0;
+		period_guest_us = he->pair ? he->pair->period_guest_us : 0;
 	} else {
 	} else {
-		period = self->period;
-		nr_events = self->nr_events;
-		total = session_total;
-		period_sys = self->period_sys;
-		period_us = self->period_us;
-		period_guest_sys = self->period_guest_sys;
-		period_guest_us = self->period_guest_us;
+		period = he->period;
+		nr_events = he->nr_events;
+		total = total_period;
+		period_sys = he->period_sys;
+		period_us = he->period_us;
+		period_guest_sys = he->period_guest_sys;
+		period_guest_us = he->period_guest_us;
 	}
 	}
 
 
 	if (total) {
 	if (total) {
@@ -812,8 +813,8 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s,
 
 
 		if (total > 0)
 		if (total > 0)
 			old_percent = (period * 100.0) / total;
 			old_percent = (period * 100.0) / total;
-		if (session_total > 0)
-			new_percent = (self->period * 100.0) / session_total;
+		if (total_period > 0)
+			new_percent = (he->period * 100.0) / total_period;
 
 
 		diff = new_percent - old_percent;
 		diff = new_percent - old_percent;
 
 
@@ -862,9 +863,10 @@ int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size,
 	return ret;
 	return ret;
 }
 }
 
 
-int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
-			struct hists *pair_hists, bool show_displacement,
-			long displacement, FILE *fp, u64 session_total)
+static int hist_entry__fprintf(struct hist_entry *he, size_t size,
+			       struct hists *hists, struct hists *pair_hists,
+			       bool show_displacement, long displacement,
+			       u64 total_period, FILE *fp)
 {
 {
 	char bf[512];
 	char bf[512];
 	int ret;
 	int ret;
@@ -874,14 +876,14 @@ int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
 
 
 	ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists,
 	ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists,
 					show_displacement, displacement,
 					show_displacement, displacement,
-					true, session_total);
+					true, total_period);
 	hist_entry__snprintf(he, bf + ret, size - ret, hists);
 	hist_entry__snprintf(he, bf + ret, size - ret, hists);
 	return fprintf(fp, "%s\n", bf);
 	return fprintf(fp, "%s\n", bf);
 }
 }
 
 
-static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
-					    struct hists *hists, FILE *fp,
-					    u64 session_total)
+static size_t hist_entry__fprintf_callchain(struct hist_entry *he,
+					    struct hists *hists,
+					    u64 total_period, FILE *fp)
 {
 {
 	int left_margin = 0;
 	int left_margin = 0;
 
 
@@ -889,11 +891,10 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
 		struct sort_entry *se = list_first_entry(&hist_entry__sort_list,
 		struct sort_entry *se = list_first_entry(&hist_entry__sort_list,
 							 typeof(*se), list);
 							 typeof(*se), list);
 		left_margin = hists__col_len(hists, se->se_width_idx);
 		left_margin = hists__col_len(hists, se->se_width_idx);
-		left_margin -= thread__comm_len(self->thread);
+		left_margin -= thread__comm_len(he->thread);
 	}
 	}
 
 
-	return hist_entry_callchain__fprintf(fp, self, session_total,
-					     left_margin);
+	return hist_entry_callchain__fprintf(he, total_period, left_margin, fp);
 }
 }
 
 
 size_t hists__fprintf(struct hists *hists, struct hists *pair,
 size_t hists__fprintf(struct hists *hists, struct hists *pair,
@@ -903,6 +904,7 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 	struct sort_entry *se;
 	struct sort_entry *se;
 	struct rb_node *nd;
 	struct rb_node *nd;
 	size_t ret = 0;
 	size_t ret = 0;
+	u64 total_period;
 	unsigned long position = 1;
 	unsigned long position = 1;
 	long displacement = 0;
 	long displacement = 0;
 	unsigned int width;
 	unsigned int width;
@@ -917,20 +919,6 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 
 
 	fprintf(fp, "# %s", pair ? "Baseline" : "Overhead");
 	fprintf(fp, "# %s", pair ? "Baseline" : "Overhead");
 
 
-	if (symbol_conf.show_nr_samples) {
-		if (sep)
-			fprintf(fp, "%cSamples", *sep);
-		else
-			fputs("  Samples  ", fp);
-	}
-
-	if (symbol_conf.show_total_period) {
-		if (sep)
-			ret += fprintf(fp, "%cPeriod", *sep);
-		else
-			ret += fprintf(fp, "   Period    ");
-	}
-
 	if (symbol_conf.show_cpu_utilization) {
 	if (symbol_conf.show_cpu_utilization) {
 		if (sep) {
 		if (sep) {
 			ret += fprintf(fp, "%csys", *sep);
 			ret += fprintf(fp, "%csys", *sep);
@@ -940,8 +928,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 				ret += fprintf(fp, "%cguest us", *sep);
 				ret += fprintf(fp, "%cguest us", *sep);
 			}
 			}
 		} else {
 		} else {
-			ret += fprintf(fp, "  sys  ");
-			ret += fprintf(fp, "  us  ");
+			ret += fprintf(fp, "     sys  ");
+			ret += fprintf(fp, "      us  ");
 			if (perf_guest) {
 			if (perf_guest) {
 				ret += fprintf(fp, "  guest sys  ");
 				ret += fprintf(fp, "  guest sys  ");
 				ret += fprintf(fp, "  guest us  ");
 				ret += fprintf(fp, "  guest us  ");
@@ -949,6 +937,20 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 		}
 		}
 	}
 	}
 
 
+	if (symbol_conf.show_nr_samples) {
+		if (sep)
+			fprintf(fp, "%cSamples", *sep);
+		else
+			fputs("  Samples  ", fp);
+	}
+
+	if (symbol_conf.show_total_period) {
+		if (sep)
+			ret += fprintf(fp, "%cPeriod", *sep);
+		else
+			ret += fprintf(fp, "   Period    ");
+	}
+
 	if (pair) {
 	if (pair) {
 		if (sep)
 		if (sep)
 			ret += fprintf(fp, "%cDelta", *sep);
 			ret += fprintf(fp, "%cDelta", *sep);
@@ -993,6 +995,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 		goto print_entries;
 		goto print_entries;
 
 
 	fprintf(fp, "# ........");
 	fprintf(fp, "# ........");
+	if (symbol_conf.show_cpu_utilization)
+		fprintf(fp, "   .......   .......");
 	if (symbol_conf.show_nr_samples)
 	if (symbol_conf.show_nr_samples)
 		fprintf(fp, " ..........");
 		fprintf(fp, " ..........");
 	if (symbol_conf.show_total_period)
 	if (symbol_conf.show_total_period)
@@ -1025,6 +1029,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
 		goto out;
 		goto out;
 
 
 print_entries:
 print_entries:
+	total_period = hists->stats.total_period;
+
 	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
 	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
 
@@ -1040,11 +1046,10 @@ print_entries:
 			++position;
 			++position;
 		}
 		}
 		ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement,
 		ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement,
-					   displacement, fp, hists->stats.total_period);
+					   displacement, total_period, fp);
 
 
 		if (symbol_conf.use_callchain)
 		if (symbol_conf.use_callchain)
-			ret += hist_entry__fprintf_callchain(h, hists, fp,
-							     hists->stats.total_period);
+			ret += hist_entry__fprintf_callchain(h, hists, total_period, fp);
 		if (max_rows && ++nr_rows >= max_rows)
 		if (max_rows && ++nr_rows >= max_rows)
 			goto out;
 			goto out;
 
 

+ 2 - 5
tools/perf/util/hist.h

@@ -66,11 +66,8 @@ struct hists {
 struct hist_entry *__hists__add_entry(struct hists *self,
 struct hist_entry *__hists__add_entry(struct hists *self,
 				      struct addr_location *al,
 				      struct addr_location *al,
 				      struct symbol *parent, u64 period);
 				      struct symbol *parent, u64 period);
-extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *);
-extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *);
-int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
-			struct hists *pair_hists, bool show_displacement,
-			long displacement, FILE *fp, u64 session_total);
+int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
 int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
 int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
 			 struct hists *hists);
 			 struct hists *hists);
 void hist_entry__free(struct hist_entry *);
 void hist_entry__free(struct hist_entry *);

+ 13 - 2
tools/perf/util/parse-events.c

@@ -735,8 +735,8 @@ static int
 parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 {
 {
 	const char *str = *strp;
 	const char *str = *strp;
-	int exclude = 0;
-	int eu = 0, ek = 0, eh = 0, precise = 0;
+	int exclude = 0, exclude_GH = 0;
+	int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0;
 
 
 	if (!*str)
 	if (!*str)
 		return 0;
 		return 0;
@@ -760,6 +760,14 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 			if (!exclude)
 			if (!exclude)
 				exclude = eu = ek = eh = 1;
 				exclude = eu = ek = eh = 1;
 			eh = 0;
 			eh = 0;
+		} else if (*str == 'G') {
+			if (!exclude_GH)
+				exclude_GH = eG = eH = 1;
+			eG = 0;
+		} else if (*str == 'H') {
+			if (!exclude_GH)
+				exclude_GH = eG = eH = 1;
+			eH = 0;
 		} else if (*str == 'p') {
 		} else if (*str == 'p') {
 			precise++;
 			precise++;
 		} else
 		} else
@@ -776,6 +784,8 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 	attr->exclude_kernel = ek;
 	attr->exclude_kernel = ek;
 	attr->exclude_hv     = eh;
 	attr->exclude_hv     = eh;
 	attr->precise_ip     = precise;
 	attr->precise_ip     = precise;
+	attr->exclude_host   = eH;
+	attr->exclude_guest  = eG;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -838,6 +848,7 @@ int parse_events(struct perf_evlist *evlist , const char *str, int unset __used)
 	for (;;) {
 	for (;;) {
 		ostr = str;
 		ostr = str;
 		memset(&attr, 0, sizeof(attr));
 		memset(&attr, 0, sizeof(attr));
+		event_attr_init(&attr);
 		ret = parse_event_symbols(evlist, &str, &attr);
 		ret = parse_event_symbols(evlist, &str, &attr);
 		if (ret == EVT_FAILED)
 		if (ret == EVT_FAILED)
 			return -1;
 			return -1;

+ 0 - 1
tools/perf/util/trace-event-info.c

@@ -18,7 +18,6 @@
  *
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
  */
-#include <ctype.h>
 #include "util.h"
 #include "util.h"
 #include <dirent.h>
 #include <dirent.h>
 #include <mntent.h>
 #include <mntent.h>

+ 15 - 0
tools/perf/util/util.c

@@ -1,6 +1,21 @@
+#include "../perf.h"
 #include "util.h"
 #include "util.h"
 #include <sys/mman.h>
 #include <sys/mman.h>
 
 
+/*
+ * XXX We need to find a better place for these things...
+ */
+bool perf_host  = true;
+bool perf_guest = true;
+
+void event_attr_init(struct perf_event_attr *attr)
+{
+	if (!perf_host)
+		attr->exclude_host  = 1;
+	if (!perf_guest)
+		attr->exclude_guest = 1;
+}
+
 int mkdir_p(char *path, mode_t mode)
 int mkdir_p(char *path, mode_t mode)
 {
 {
 	struct stat st;
 	struct stat st;

+ 4 - 0
tools/perf/util/util.h

@@ -242,6 +242,10 @@ int strtailcmp(const char *s1, const char *s2);
 unsigned long convert_unit(unsigned long value, char *unit);
 unsigned long convert_unit(unsigned long value, char *unit);
 int readn(int fd, void *buf, size_t size);
 int readn(int fd, void *buf, size_t size);
 
 
+struct perf_event_attr;
+
+void event_attr_init(struct perf_event_attr *attr);
+
 #define _STR(x) #x
 #define _STR(x) #x
 #define STR(x) _STR(x)
 #define STR(x) _STR(x)
 
 

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است