فهرست منبع

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (162 commits)
  tracing/kprobes: unregister_trace_probe needs to be called under mutex
  perf: expose event__process function
  perf events: Fix mmap offset determination
  perf, powerpc: fsl_emb: Restore setting perf_sample_data.period
  perf, powerpc: Convert the FSL driver to use local64_t
  perf tools: Don't keep unreferenced maps when unmaps are detected
  perf session: Invalidate last_match when removing threads from rb_tree
  perf session: Free the ref_reloc_sym memory at the right place
  x86,mmiotrace: Add support for tracing STOS instruction
  perf, sched migration: Librarize task states and event headers helpers
  perf, sched migration: Librarize the GUI class
  perf, sched migration: Make the GUI class client agnostic
  perf, sched migration: Make it vertically scrollable
  perf, sched migration: Parameterize cpu height and spacing
  perf, sched migration: Fix key bindings
  perf, sched migration: Ignore unhandled task states
  perf, sched migration: Handle ignored migrate out events
  perf: New migration tool overview
  tracing: Drop cpparg() macro
  perf: Use tracepoint_synchronize_unregister() to flush any pending tracepoint call
  ...

Fix up trivial conflicts in Makefile and drivers/cpufreq/cpufreq.c
Linus Torvalds 15 سال پیش
والد
کامیت
4aed2fd8e3
100فایلهای تغییر یافته به همراه1380 افزوده شده و 2491 حذف شده
  1. 0 71
      Documentation/ABI/testing/debugfs-kmemtrace
  2. 2 0
      Documentation/kernel-parameters.txt
  3. 148 5
      Documentation/trace/ftrace-design.txt
  4. 0 126
      Documentation/trace/kmemtrace.txt
  5. 1 1
      Documentation/trace/kprobetrace.txt
  6. 1 8
      MAINTAINERS
  7. 3 1
      Makefile
  8. 7 0
      arch/Kconfig
  9. 1 0
      arch/alpha/include/asm/local64.h
  10. 1 0
      arch/arm/include/asm/local64.h
  11. 9 9
      arch/arm/kernel/perf_event.c
  12. 1 0
      arch/avr32/include/asm/local64.h
  13. 1 0
      arch/blackfin/include/asm/local64.h
  14. 1 0
      arch/cris/include/asm/local64.h
  15. 1 0
      arch/frv/include/asm/local64.h
  16. 1 0
      arch/frv/kernel/local64.h
  17. 1 0
      arch/h8300/include/asm/local64.h
  18. 1 0
      arch/ia64/include/asm/local64.h
  19. 1 0
      arch/m32r/include/asm/local64.h
  20. 1 0
      arch/m68k/include/asm/local64.h
  21. 1 0
      arch/microblaze/include/asm/local64.h
  22. 1 0
      arch/mips/include/asm/local64.h
  23. 1 0
      arch/mn10300/include/asm/local64.h
  24. 1 0
      arch/parisc/include/asm/local64.h
  25. 1 0
      arch/powerpc/include/asm/local64.h
  26. 12 0
      arch/powerpc/include/asm/perf_event.h
  27. 0 26
      arch/powerpc/kernel/misc.S
  28. 21 20
      arch/powerpc/kernel/perf_event.c
  29. 15 14
      arch/powerpc/kernel/perf_event_fsl_emb.c
  30. 1 0
      arch/s390/include/asm/local64.h
  31. 1 0
      arch/score/include/asm/local64.h
  32. 1 0
      arch/sh/include/asm/local64.h
  33. 3 3
      arch/sh/kernel/perf_event.c
  34. 1 0
      arch/sparc/include/asm/local64.h
  35. 8 0
      arch/sparc/include/asm/perf_event.h
  36. 3 3
      arch/sparc/kernel/helpers.S
  37. 13 12
      arch/sparc/kernel/perf_event.c
  38. 1 0
      arch/x86/Kconfig
  39. 1 1
      arch/x86/include/asm/hw_breakpoint.h
  40. 1 0
      arch/x86/include/asm/local64.h
  41. 2 0
      arch/x86/include/asm/nmi.h
  42. 16 2
      arch/x86/include/asm/perf_event.h
  43. 52 47
      arch/x86/include/asm/perf_event_p4.h
  44. 49 0
      arch/x86/include/asm/stacktrace.h
  45. 6 1
      arch/x86/kernel/apic/Makefile
  46. 107 0
      arch/x86/kernel/apic/hw_nmi.c
  47. 0 7
      arch/x86/kernel/apic/nmi.c
  48. 25 37
      arch/x86/kernel/cpu/perf_event.c
  49. 120 36
      arch/x86/kernel/cpu/perf_event_p4.c
  50. 0 1
      arch/x86/kernel/dumpstack.c
  51. 0 56
      arch/x86/kernel/dumpstack.h
  52. 0 2
      arch/x86/kernel/dumpstack_32.c
  53. 0 1
      arch/x86/kernel/dumpstack_64.c
  54. 36 15
      arch/x86/kernel/hw_breakpoint.c
  55. 17 16
      arch/x86/kernel/kprobes.c
  56. 4 0
      arch/x86/kernel/process_32.c
  57. 5 0
      arch/x86/kernel/process_64.c
  58. 16 15
      arch/x86/kernel/stacktrace.c
  59. 7 0
      arch/x86/kernel/traps.c
  60. 17 13
      arch/x86/mm/pf_in.c
  61. 14 2
      arch/x86/oprofile/nmi_int.c
  62. 1 0
      arch/xtensa/include/asm/local64.h
  63. 2 1
      drivers/oprofile/event_buffer.c
  64. 1 0
      fs/exec.c
  65. 96 0
      include/asm-generic/local64.h
  66. 0 4
      include/asm-generic/vmlinux.lds.h
  67. 5 0
      include/linux/ftrace.h
  68. 12 6
      include/linux/ftrace_event.h
  69. 0 5
      include/linux/kernel.h
  70. 0 25
      include/linux/kmemtrace.h
  71. 13 0
      include/linux/nmi.h
  72. 47 48
      include/linux/perf_event.h
  73. 4 20
      include/linux/sched.h
  74. 2 1
      include/linux/slab_def.h
  75. 2 1
      include/linux/slub_def.h
  76. 0 2
      include/linux/syscalls.h
  77. 0 60
      include/trace/boot.h
  78. 7 25
      include/trace/events/sched.h
  79. 32 48
      include/trace/events/timer.h
  80. 8 15
      include/trace/ftrace.h
  81. 0 1
      include/trace/syscall.h
  82. 10 19
      init/main.c
  83. 1 1
      kernel/Makefile
  84. 41 37
      kernel/hw_breakpoint.c
  85. 218 208
      kernel/perf_event.c
  86. 3 3
      kernel/sched.c
  87. 0 293
      kernel/softlockup.c
  88. 33 22
      kernel/sysctl.c
  89. 0 1
      kernel/timer.c
  90. 0 68
      kernel/trace/Kconfig
  91. 0 4
      kernel/trace/Makefile
  92. 2 3
      kernel/trace/ftrace.c
  93. 0 529
      kernel/trace/kmemtrace.c
  94. 9 31
      kernel/trace/ring_buffer.c
  95. 55 72
      kernel/trace/trace.c
  96. 6 84
      kernel/trace/trace.h
  97. 0 185
      kernel/trace/trace_boot.c
  98. 2 3
      kernel/trace/trace_clock.c
  99. 0 94
      kernel/trace/trace_entries.h
  100. 6 21
      kernel/trace/trace_event_perf.c

+ 0 - 71
Documentation/ABI/testing/debugfs-kmemtrace

@@ -1,71 +0,0 @@
-What:		/sys/kernel/debug/kmemtrace/
-Date:		July 2008
-Contact:	Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
-Description:
-
-In kmemtrace-enabled kernels, the following files are created:
-
-/sys/kernel/debug/kmemtrace/
-	cpu<n>		(0400)	Per-CPU tracing data, see below. (binary)
-	total_overruns	(0400)	Total number of bytes which were dropped from
-				cpu<n> files because of full buffer condition,
-				non-binary. (text)
-	abi_version	(0400)	Kernel's kmemtrace ABI version. (text)
-
-Each per-CPU file should be read according to the relay interface. That is,
-the reader should set affinity to that specific CPU and, as currently done by
-the userspace application (though there are other methods), use poll() with
-an infinite timeout before every read(). Otherwise, erroneous data may be
-read. The binary data has the following _core_ format:
-
-	Event ID	(1 byte)	Unsigned integer, one of:
-		0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
-		1 - represents a freeing of previously allocated memory
-		    (KMEMTRACE_EVENT_FREE)
-	Type ID		(1 byte)	Unsigned integer, one of:
-		0 - this is a kmalloc() / kfree()
-		1 - this is a kmem_cache_alloc() / kmem_cache_free()
-		2 - this is a __get_free_pages() et al.
-	Event size	(2 bytes)	Unsigned integer representing the
-					size of this event. Used to extend
-					kmemtrace. Discard the bytes you
-					don't know about.
-	Sequence number	(4 bytes)	Signed integer used to reorder data
-					logged on SMP machines. Wraparound
-					must be taken into account, although
-					it is unlikely.
-	Caller address	(8 bytes)	Return address to the caller.
-	Pointer to mem	(8 bytes)	Pointer to target memory area. Can be
-					NULL, but not all such calls might be
-					recorded.
-
-In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
-
-	Requested bytes	(8 bytes)	Total number of requested bytes,
-					unsigned, must not be zero.
-	Allocated bytes (8 bytes)	Total number of actually allocated
-					bytes, unsigned, must not be lower
-					than requested bytes.
-	Requested flags	(4 bytes)	GFP flags supplied by the caller.
-	Target CPU	(4 bytes)	Signed integer, valid for event id 1.
-					If equal to -1, target CPU is the same
-					as origin CPU, but the reverse might
-					not be true.
-
-The data is made available in the same endianness the machine has.
-
-Other event ids and type ids may be defined and added. Other fields may be
-added by increasing event size, but see below for details.
-Every modification to the ABI, including new id definitions, are followed
-by bumping the ABI version by one.
-
-Adding new data to the packet (features) is done at the end of the mandatory
-data:
-	Feature size	(2 byte)
-	Feature ID	(1 byte)
-	Feature data	(Feature size - 3 bytes)
-
-
-Users:
-	kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
-

+ 2 - 0
Documentation/kernel-parameters.txt

@@ -1816,6 +1816,8 @@ and is between 256 and 4096 characters. It is defined in the file
 
 
 	nousb		[USB] Disable the USB subsystem
 	nousb		[USB] Disable the USB subsystem
 
 
+	nowatchdog	[KNL] Disable the lockup detector.
+
 	nowb		[ARM]
 	nowb		[ARM]
 
 
 	nox2apic	[X86-64,APIC] Do not enable x2APIC mode.
 	nox2apic	[X86-64,APIC] Do not enable x2APIC mode.

+ 148 - 5
Documentation/trace/ftrace-design.txt

@@ -13,6 +13,9 @@ Note that this focuses on architecture implementation details only.  If you
 want more explanation of a feature in terms of common code, review the common
 want more explanation of a feature in terms of common code, review the common
 ftrace.txt file.
 ftrace.txt file.
 
 
+Ideally, everyone who wishes to retain performance while supporting tracing in
+their kernel should make it all the way to dynamic ftrace support.
+
 
 
 Prerequisites
 Prerequisites
 -------------
 -------------
@@ -215,7 +218,7 @@ An arch may pass in a unique value (frame pointer) to both the entering and
 exiting of a function.  On exit, the value is compared and if it does not
 exiting of a function.  On exit, the value is compared and if it does not
 match, then it will panic the kernel.  This is largely a sanity check for bad
 match, then it will panic the kernel.  This is largely a sanity check for bad
 code generation with gcc.  If gcc for your port sanely updates the frame
 code generation with gcc.  If gcc for your port sanely updates the frame
-pointer under different opitmization levels, then ignore this option.
+pointer under different optimization levels, then ignore this option.
 
 
 However, adding support for it isn't terribly difficult.  In your assembly code
 However, adding support for it isn't terribly difficult.  In your assembly code
 that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument.
 that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument.
@@ -234,7 +237,7 @@ If you can't trace NMI functions, then skip this option.
 
 
 
 
 HAVE_SYSCALL_TRACEPOINTS
 HAVE_SYSCALL_TRACEPOINTS
----------------------
+------------------------
 
 
 You need very few things to get the syscalls tracing in an arch.
 You need very few things to get the syscalls tracing in an arch.
 
 
@@ -250,12 +253,152 @@ You need very few things to get the syscalls tracing in an arch.
 HAVE_FTRACE_MCOUNT_RECORD
 HAVE_FTRACE_MCOUNT_RECORD
 -------------------------
 -------------------------
 
 
-See scripts/recordmcount.pl for more info.
+See scripts/recordmcount.pl for more info.  Just fill in the arch-specific
+details for how to locate the addresses of mcount call sites via objdump.
+This option doesn't make much sense without also implementing dynamic ftrace.
 
 
+
+HAVE_DYNAMIC_FTRACE
+-------------------
+
+You will first need HAVE_FTRACE_MCOUNT_RECORD and HAVE_FUNCTION_TRACER, so
+scroll your reader back up if you got over eager.
+
+Once those are out of the way, you will need to implement:
+	- asm/ftrace.h:
+		- MCOUNT_ADDR
+		- ftrace_call_adjust()
+		- struct dyn_arch_ftrace{}
+	- asm code:
+		- mcount() (new stub)
+		- ftrace_caller()
+		- ftrace_call()
+		- ftrace_stub()
+	- C code:
+		- ftrace_dyn_arch_init()
+		- ftrace_make_nop()
+		- ftrace_make_call()
+		- ftrace_update_ftrace_func()
+
+First you will need to fill out some arch details in your asm/ftrace.h.
+
+Define MCOUNT_ADDR as the address of your mcount symbol similar to:
+	#define MCOUNT_ADDR ((unsigned long)mcount)
+Since no one else will have a decl for that function, you will need to:
+	extern void mcount(void);
+
+You will also need the helper function ftrace_call_adjust().  Most people
+will be able to stub it out like so:
+	static inline unsigned long ftrace_call_adjust(unsigned long addr)
+	{
+		return addr;
+	}
 <details to be filled>
 <details to be filled>
 
 
+Lastly you will need the custom dyn_arch_ftrace structure.  If you need
+some extra state when runtime patching arbitrary call sites, this is the
+place.  For now though, create an empty struct:
+	struct dyn_arch_ftrace {
+		/* No extra data needed */
+	};
+
+With the header out of the way, we can fill out the assembly code.  While we
+did already create a mcount() function earlier, dynamic ftrace only wants a
+stub function.  This is because the mcount() will only be used during boot
+and then all references to it will be patched out never to return.  Instead,
+the guts of the old mcount() will be used to create a new ftrace_caller()
+function.  Because the two are hard to merge, it will most likely be a lot
+easier to have two separate definitions split up by #ifdefs.  Same goes for
+the ftrace_stub() as that will now be inlined in ftrace_caller().
+
+Before we get confused anymore, let's check out some pseudo code so you can
+implement your own stuff in assembly:
 
 
-HAVE_DYNAMIC_FTRACE
----------------------
+void mcount(void)
+{
+	return;
+}
+
+void ftrace_caller(void)
+{
+	/* implement HAVE_FUNCTION_TRACE_MCOUNT_TEST if you desire */
+
+	/* save all state needed by the ABI (see paragraph above) */
+
+	unsigned long frompc = ...;
+	unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
+
+ftrace_call:
+	ftrace_stub(frompc, selfpc);
+
+	/* restore all state needed by the ABI */
+
+ftrace_stub:
+	return;
+}
+
+This might look a little odd at first, but keep in mind that we will be runtime
+patching multiple things.  First, only functions that we actually want to trace
+will be patched to call ftrace_caller().  Second, since we only have one tracer
+active at a time, we will patch the ftrace_caller() function itself to call the
+specific tracer in question.  That is the point of the ftrace_call label.
+
+With that in mind, let's move on to the C code that will actually be doing the
+runtime patching.  You'll need a little knowledge of your arch's opcodes in
+order to make it through the next section.
+
+Every arch has an init callback function.  If you need to do something early on
+to initialize some state, this is the time to do that.  Otherwise, this simple
+function below should be sufficient for most people:
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+	/* return value is done indirectly via data */
+	*(unsigned long *)data = 0;
+
+	return 0;
+}
+
+There are two functions that are used to do runtime patching of arbitrary
+functions.  The first is used to turn the mcount call site into a nop (which
+is what helps us retain runtime performance when not tracing).  The second is
+used to turn the mcount call site into a call to an arbitrary location (but
+typically that is ftracer_caller()).  See the general function definition in
+linux/ftrace.h for the functions:
+	ftrace_make_nop()
+	ftrace_make_call()
+The rec->ip value is the address of the mcount call site that was collected
+by the scripts/recordmcount.pl during build time.
+
+The last function is used to do runtime patching of the active tracer.  This
+will be modifying the assembly code at the location of the ftrace_call symbol
+inside of the ftrace_caller() function.  So you should have sufficient padding
+at that location to support the new function calls you'll be inserting.  Some
+people will be using a "call" type instruction while others will be using a
+"branch" type instruction.  Specifically, the function is:
+	ftrace_update_ftrace_func()
+
+
+HAVE_DYNAMIC_FTRACE + HAVE_FUNCTION_GRAPH_TRACER
+------------------------------------------------
+
+The function grapher needs a few tweaks in order to work with dynamic ftrace.
+Basically, you will need to:
+	- update:
+		- ftrace_caller()
+		- ftrace_graph_call()
+		- ftrace_graph_caller()
+	- implement:
+		- ftrace_enable_ftrace_graph_caller()
+		- ftrace_disable_ftrace_graph_caller()
 
 
 <details to be filled>
 <details to be filled>
+Quick notes:
+	- add a nop stub after the ftrace_call location named ftrace_graph_call;
+	  stub needs to be large enough to support a call to ftrace_graph_caller()
+	- update ftrace_graph_caller() to work with being called by the new
+	  ftrace_caller() since some semantics may have changed
+	- ftrace_enable_ftrace_graph_caller() will runtime patch the
+	  ftrace_graph_call location with a call to ftrace_graph_caller()
+	- ftrace_disable_ftrace_graph_caller() will runtime patch the
+	  ftrace_graph_call location with nops

+ 0 - 126
Documentation/trace/kmemtrace.txt

@@ -1,126 +0,0 @@
-			kmemtrace - Kernel Memory Tracer
-
-			  by Eduard - Gabriel Munteanu
-			     <eduard.munteanu@linux360.ro>
-
-I. Introduction
-===============
-
-kmemtrace helps kernel developers figure out two things:
-1) how different allocators (SLAB, SLUB etc.) perform
-2) how kernel code allocates memory and how much
-
-To do this, we trace every allocation and export information to the userspace
-through the relay interface. We export things such as the number of requested
-bytes, the number of bytes actually allocated (i.e. including internal
-fragmentation), whether this is a slab allocation or a plain kmalloc() and so
-on.
-
-The actual analysis is performed by a userspace tool (see section III for
-details on where to get it from). It logs the data exported by the kernel,
-processes it and (as of writing this) can provide the following information:
-- the total amount of memory allocated and fragmentation per call-site
-- the amount of memory allocated and fragmentation per allocation
-- total memory allocated and fragmentation in the collected dataset
-- number of cross-CPU allocation and frees (makes sense in NUMA environments)
-
-Moreover, it can potentially find inconsistent and erroneous behavior in
-kernel code, such as using slab free functions on kmalloc'ed memory or
-allocating less memory than requested (but not truly failed allocations).
-
-kmemtrace also makes provisions for tracing on some arch and analysing the
-data on another.
-
-II. Design and goals
-====================
-
-kmemtrace was designed to handle rather large amounts of data. Thus, it uses
-the relay interface to export whatever is logged to userspace, which then
-stores it. Analysis and reporting is done asynchronously, that is, after the
-data is collected and stored. By design, it allows one to log and analyse
-on different machines and different arches.
-
-As of writing this, the ABI is not considered stable, though it might not
-change much. However, no guarantees are made about compatibility yet. When
-deemed stable, the ABI should still allow easy extension while maintaining
-backward compatibility. This is described further in Documentation/ABI.
-
-Summary of design goals:
-	- allow logging and analysis to be done across different machines
-	- be fast and anticipate usage in high-load environments (*)
-	- be reasonably extensible
-	- make it possible for GNU/Linux distributions to have kmemtrace
-	included in their repositories
-
-(*) - one of the reasons Pekka Enberg's original userspace data analysis
-    tool's code was rewritten from Perl to C (although this is more than a
-    simple conversion)
-
-
-III. Quick usage guide
-======================
-
-1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
-CONFIG_KMEMTRACE).
-
-2) Get the userspace tool and build it:
-$ git clone git://repo.or.cz/kmemtrace-user.git		# current repository
-$ cd kmemtrace-user/
-$ ./autogen.sh
-$ ./configure
-$ make
-
-3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
-'single' runlevel (so that relay buffers don't fill up easily), and run
-kmemtrace:
-# '$' does not mean user, but root here.
-$ mount -t debugfs none /sys/kernel/debug
-$ mount -t proc none /proc
-$ cd path/to/kmemtrace-user/
-$ ./kmemtraced
-Wait a bit, then stop it with CTRL+C.
-$ cat /sys/kernel/debug/kmemtrace/total_overruns	# Check if we didn't
-							# overrun, should
-							# be zero.
-$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
-		check its correctness]
-$ ./kmemtrace-report
-
-Now you should have a nice and short summary of how the allocator performs.
-
-IV. FAQ and known issues
-========================
-
-Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
-this? Should I worry?
-A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
-large the number is. You can fix it by supplying a higher
-'kmemtrace.subbufs=N' kernel parameter.
----
-
-Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
-A: This is a bug and should be reported. It can occur for a variety of
-reasons:
-	- possible bugs in relay code
-	- possible misuse of relay by kmemtrace
-	- timestamps being collected unorderly
-Or you may fix it yourself and send us a patch.
----
-
-Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
-A: This is a known issue and I'm working on it. These might be true errors
-in kernel code, which may have inconsistent behavior (e.g. allocating memory
-with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
-out this behavior may work with SLAB, but may fail with other allocators.
-
-It may also be due to lack of tracing in some unusual allocator functions.
-
-We don't want bug reports regarding this issue yet.
----
-
-V. See also
-===========
-
-Documentation/kernel-parameters.txt
-Documentation/ABI/testing/debugfs-kmemtrace
-

+ 1 - 1
Documentation/trace/kprobetrace.txt

@@ -42,7 +42,7 @@ Synopsis of kprobe_events
   +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
   +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
-		  (u8/u16/u32/u64/s8/s16/s32/s64) are supported.
+		  (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported.
 
 
   (*) only for return probe.
   (*) only for return probe.
   (**) this is useful for fetching a field of data structures.
   (**) this is useful for fetching a field of data structures.

+ 1 - 8
MAINTAINERS

@@ -3403,13 +3403,6 @@ F:	include/linux/kmemleak.h
 F:	mm/kmemleak.c
 F:	mm/kmemleak.c
 F:	mm/kmemleak-test.c
 F:	mm/kmemleak-test.c
 
 
-KMEMTRACE
-M:	Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
-S:	Maintained
-F:	Documentation/trace/kmemtrace.txt
-F:	include/linux/kmemtrace.h
-F:	kernel/trace/kmemtrace.c
-
 KPROBES
 KPROBES
 M:	Ananth N Mavinakayanahalli <ananth@in.ibm.com>
 M:	Ananth N Mavinakayanahalli <ananth@in.ibm.com>
 M:	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
 M:	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
@@ -5685,7 +5678,7 @@ TRACING
 M:	Steven Rostedt <rostedt@goodmis.org>
 M:	Steven Rostedt <rostedt@goodmis.org>
 M:	Frederic Weisbecker <fweisbec@gmail.com>
 M:	Frederic Weisbecker <fweisbec@gmail.com>
 M:	Ingo Molnar <mingo@redhat.com>
 M:	Ingo Molnar <mingo@redhat.com>
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing/core
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perf/core
 S:	Maintained
 S:	Maintained
 F:	Documentation/trace/ftrace.txt
 F:	Documentation/trace/ftrace.txt
 F:	arch/*/*/*/ftrace.h
 F:	arch/*/*/*/ftrace.h

+ 3 - 1
Makefile

@@ -420,7 +420,7 @@ endif
 no-dot-config-targets := clean mrproper distclean \
 no-dot-config-targets := clean mrproper distclean \
 			 cscope TAGS tags help %docs check% coccicheck \
 			 cscope TAGS tags help %docs check% coccicheck \
 			 include/linux/version.h headers_% \
 			 include/linux/version.h headers_% \
-			 kernelversion
+			 kernelversion %src-pkg
 
 
 config-targets := 0
 config-targets := 0
 mixed-targets  := 0
 mixed-targets  := 0
@@ -1168,6 +1168,8 @@ distclean: mrproper
 # rpm target kept for backward compatibility
 # rpm target kept for backward compatibility
 package-dir	:= $(srctree)/scripts/package
 package-dir	:= $(srctree)/scripts/package
 
 
+%src-pkg: FORCE
+	$(Q)$(MAKE) $(build)=$(package-dir) $@
 %pkg: include/config/kernel.release FORCE
 %pkg: include/config/kernel.release FORCE
 	$(Q)$(MAKE) $(build)=$(package-dir) $@
 	$(Q)$(MAKE) $(build)=$(package-dir) $@
 rpm: include/config/kernel.release FORCE
 rpm: include/config/kernel.release FORCE

+ 7 - 0
arch/Kconfig

@@ -151,4 +151,11 @@ config HAVE_MIXED_BREAKPOINTS_REGS
 config HAVE_USER_RETURN_NOTIFIER
 config HAVE_USER_RETURN_NOTIFIER
 	bool
 	bool
 
 
+config HAVE_PERF_EVENTS_NMI
+	bool
+	help
+	  System hardware can generate an NMI using the perf event
+	  subsystem.  Also has support for calculating CPU cycle events
+	  to determine how many clock cycles in a given period.
+
 source "kernel/gcov/Kconfig"
 source "kernel/gcov/Kconfig"

+ 1 - 0
arch/alpha/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/arm/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 9 - 9
arch/arm/kernel/perf_event.c

@@ -164,20 +164,20 @@ armpmu_event_set_period(struct perf_event *event,
 			struct hw_perf_event *hwc,
 			struct hw_perf_event *hwc,
 			int idx)
 			int idx)
 {
 {
-	s64 left = atomic64_read(&hwc->period_left);
+	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	s64 period = hwc->sample_period;
 	int ret = 0;
 	int ret = 0;
 
 
 	if (unlikely(left <= -period)) {
 	if (unlikely(left <= -period)) {
 		left = period;
 		left = period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		hwc->last_period = period;
 		ret = 1;
 		ret = 1;
 	}
 	}
 
 
 	if (unlikely(left <= 0)) {
 	if (unlikely(left <= 0)) {
 		left += period;
 		left += period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		hwc->last_period = period;
 		ret = 1;
 		ret = 1;
 	}
 	}
@@ -185,7 +185,7 @@ armpmu_event_set_period(struct perf_event *event,
 	if (left > (s64)armpmu->max_period)
 	if (left > (s64)armpmu->max_period)
 		left = armpmu->max_period;
 		left = armpmu->max_period;
 
 
-	atomic64_set(&hwc->prev_count, (u64)-left);
+	local64_set(&hwc->prev_count, (u64)-left);
 
 
 	armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
 	armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
 
 
@@ -204,18 +204,18 @@ armpmu_event_update(struct perf_event *event,
 	u64 delta;
 	u64 delta;
 
 
 again:
 again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
+	prev_raw_count = local64_read(&hwc->prev_count);
 	new_raw_count = armpmu->read_counter(idx);
 	new_raw_count = armpmu->read_counter(idx);
 
 
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 			     new_raw_count) != prev_raw_count)
 			     new_raw_count) != prev_raw_count)
 		goto again;
 		goto again;
 
 
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta >>= shift;
 	delta >>= shift;
 
 
-	atomic64_add(delta, &event->count);
-	atomic64_sub(delta, &hwc->period_left);
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
 
 
 	return new_raw_count;
 	return new_raw_count;
 }
 }
@@ -478,7 +478,7 @@ __hw_perf_event_init(struct perf_event *event)
 	if (!hwc->sample_period) {
 	if (!hwc->sample_period) {
 		hwc->sample_period  = armpmu->max_period;
 		hwc->sample_period  = armpmu->max_period;
 		hwc->last_period    = hwc->sample_period;
 		hwc->last_period    = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
+		local64_set(&hwc->period_left, hwc->sample_period);
 	}
 	}
 
 
 	err = 0;
 	err = 0;

+ 1 - 0
arch/avr32/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/blackfin/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/cris/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/frv/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/frv/kernel/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/h8300/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/ia64/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/m32r/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/m68k/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/microblaze/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/mips/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/mn10300/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/parisc/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/powerpc/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 12 - 0
arch/powerpc/include/asm/perf_event.h

@@ -21,3 +21,15 @@
 #ifdef CONFIG_FSL_EMB_PERF_EVENT
 #ifdef CONFIG_FSL_EMB_PERF_EVENT
 #include <asm/perf_event_fsl_emb.h>
 #include <asm/perf_event_fsl_emb.h>
 #endif
 #endif
+
+#ifdef CONFIG_PERF_EVENTS
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+
+#define perf_arch_fetch_caller_regs(regs, __ip)			\
+	do {							\
+		(regs)->nip = __ip;				\
+		(regs)->gpr[1] = *(unsigned long *)__get_SP();	\
+		asm volatile("mfmsr %0" : "=r" ((regs)->msr));	\
+	} while (0)
+#endif

+ 0 - 26
arch/powerpc/kernel/misc.S

@@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7)
 _GLOBAL(__restore_cpu_power7)
 _GLOBAL(__restore_cpu_power7)
 	/* place holder */
 	/* place holder */
 	blr
 	blr
-
-/*
- * Get a minimal set of registers for our caller's nth caller.
- * r3 = regs pointer, r5 = n.
- *
- * We only get R1 (stack pointer), NIP (next instruction pointer)
- * and LR (link register).  These are all we can get in the
- * general case without doing complicated stack unwinding, but
- * fortunately they are enough to do a stack backtrace, which
- * is all we need them for.
- */
-_GLOBAL(perf_arch_fetch_caller_regs)
-	mr	r6,r1
-	cmpwi	r5,0
-	mflr	r4
-	ble	2f
-	mtctr	r5
-1:	PPC_LL	r6,0(r6)
-	bdnz	1b
-	PPC_LL	r4,PPC_LR_STKOFF(r6)
-2:	PPC_LL	r7,0(r6)
-	PPC_LL	r7,PPC_LR_STKOFF(r7)
-	PPC_STL	r6,GPR1-STACK_FRAME_OVERHEAD(r3)
-	PPC_STL	r4,_NIP-STACK_FRAME_OVERHEAD(r3)
-	PPC_STL	r7,_LINK-STACK_FRAME_OVERHEAD(r3)
-	blr

+ 21 - 20
arch/powerpc/kernel/perf_event.c

@@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event)
 	 * Therefore we treat them like NMIs.
 	 * Therefore we treat them like NMIs.
 	 */
 	 */
 	do {
 	do {
-		prev = atomic64_read(&event->hw.prev_count);
+		prev = local64_read(&event->hw.prev_count);
 		barrier();
 		barrier();
 		val = read_pmc(event->hw.idx);
 		val = read_pmc(event->hw.idx);
-	} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
 
 	/* The counters are only 32 bits wide */
 	/* The counters are only 32 bits wide */
 	delta = (val - prev) & 0xfffffffful;
 	delta = (val - prev) & 0xfffffffful;
-	atomic64_add(delta, &event->count);
-	atomic64_sub(delta, &event->hw.period_left);
+	local64_add(delta, &event->count);
+	local64_sub(delta, &event->hw.period_left);
 }
 }
 
 
 /*
 /*
@@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
 		if (!event->hw.idx)
 		if (!event->hw.idx)
 			continue;
 			continue;
 		val = (event->hw.idx == 5) ? pmc5 : pmc6;
 		val = (event->hw.idx == 5) ? pmc5 : pmc6;
-		prev = atomic64_read(&event->hw.prev_count);
+		prev = local64_read(&event->hw.prev_count);
 		event->hw.idx = 0;
 		event->hw.idx = 0;
 		delta = (val - prev) & 0xfffffffful;
 		delta = (val - prev) & 0xfffffffful;
-		atomic64_add(delta, &event->count);
+		local64_add(delta, &event->count);
 	}
 	}
 }
 }
 
 
@@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
 		event = cpuhw->limited_counter[i];
 		event = cpuhw->limited_counter[i];
 		event->hw.idx = cpuhw->limited_hwidx[i];
 		event->hw.idx = cpuhw->limited_hwidx[i];
 		val = (event->hw.idx == 5) ? pmc5 : pmc6;
 		val = (event->hw.idx == 5) ? pmc5 : pmc6;
-		atomic64_set(&event->hw.prev_count, val);
+		local64_set(&event->hw.prev_count, val);
 		perf_event_update_userpage(event);
 		perf_event_update_userpage(event);
 	}
 	}
 }
 }
@@ -666,11 +666,11 @@ void hw_perf_enable(void)
 		}
 		}
 		val = 0;
 		val = 0;
 		if (event->hw.sample_period) {
 		if (event->hw.sample_period) {
-			left = atomic64_read(&event->hw.period_left);
+			left = local64_read(&event->hw.period_left);
 			if (left < 0x80000000L)
 			if (left < 0x80000000L)
 				val = 0x80000000L - left;
 				val = 0x80000000L - left;
 		}
 		}
-		atomic64_set(&event->hw.prev_count, val);
+		local64_set(&event->hw.prev_count, val);
 		event->hw.idx = idx;
 		event->hw.idx = idx;
 		write_pmc(idx, val);
 		write_pmc(idx, val);
 		perf_event_update_userpage(event);
 		perf_event_update_userpage(event);
@@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event)
 	 * skip the schedulability test here, it will be peformed
 	 * skip the schedulability test here, it will be peformed
 	 * at commit time(->commit_txn) as a whole
 	 * at commit time(->commit_txn) as a whole
 	 */
 	 */
-	if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED)
+	if (cpuhw->group_flag & PERF_EVENT_TXN)
 		goto nocheck;
 		goto nocheck;
 
 
 	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
 	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -845,8 +845,8 @@ static void power_pmu_unthrottle(struct perf_event *event)
 	if (left < 0x80000000L)
 	if (left < 0x80000000L)
 		val = 0x80000000L - left;
 		val = 0x80000000L - left;
 	write_pmc(event->hw.idx, val);
 	write_pmc(event->hw.idx, val);
-	atomic64_set(&event->hw.prev_count, val);
-	atomic64_set(&event->hw.period_left, left);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
 	perf_enable();
 	perf_enable();
 	local_irq_restore(flags);
 	local_irq_restore(flags);
@@ -861,7 +861,7 @@ void power_pmu_start_txn(const struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 
-	cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
+	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->n_txn_start = cpuhw->n_events;
 	cpuhw->n_txn_start = cpuhw->n_events;
 }
 }
 
 
@@ -874,7 +874,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 
-	cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
+	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 }
 }
 
 
 /*
 /*
@@ -900,6 +900,7 @@ int power_pmu_commit_txn(const struct pmu *pmu)
 	for (i = cpuhw->n_txn_start; i < n; ++i)
 	for (i = cpuhw->n_txn_start; i < n; ++i)
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 
 
+	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1111,7 +1112,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 	event->hw.config = events[n];
 	event->hw.config = events[n];
 	event->hw.event_base = cflags[n];
 	event->hw.event_base = cflags[n];
 	event->hw.last_period = event->hw.sample_period;
 	event->hw.last_period = event->hw.sample_period;
-	atomic64_set(&event->hw.period_left, event->hw.last_period);
+	local64_set(&event->hw.period_left, event->hw.last_period);
 
 
 	/*
 	/*
 	 * See if we need to reserve the PMU.
 	 * See if we need to reserve the PMU.
@@ -1149,16 +1150,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	int record = 0;
 	int record = 0;
 
 
 	/* we don't have to worry about interrupts here */
 	/* we don't have to worry about interrupts here */
-	prev = atomic64_read(&event->hw.prev_count);
+	prev = local64_read(&event->hw.prev_count);
 	delta = (val - prev) & 0xfffffffful;
 	delta = (val - prev) & 0xfffffffful;
-	atomic64_add(delta, &event->count);
+	local64_add(delta, &event->count);
 
 
 	/*
 	/*
 	 * See if the total period for this event has expired,
 	 * See if the total period for this event has expired,
 	 * and update for the next period.
 	 * and update for the next period.
 	 */
 	 */
 	val = 0;
 	val = 0;
-	left = atomic64_read(&event->hw.period_left) - delta;
+	left = local64_read(&event->hw.period_left) - delta;
 	if (period) {
 	if (period) {
 		if (left <= 0) {
 		if (left <= 0) {
 			left += period;
 			left += period;
@@ -1196,8 +1197,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	}
 	}
 
 
 	write_pmc(event->hw.idx, val);
 	write_pmc(event->hw.idx, val);
-	atomic64_set(&event->hw.prev_count, val);
-	atomic64_set(&event->hw.period_left, left);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
 }
 }
 
 

+ 15 - 14
arch/powerpc/kernel/perf_event_fsl_emb.c

@@ -162,15 +162,15 @@ static void fsl_emb_pmu_read(struct perf_event *event)
 	 * Therefore we treat them like NMIs.
 	 * Therefore we treat them like NMIs.
 	 */
 	 */
 	do {
 	do {
-		prev = atomic64_read(&event->hw.prev_count);
+		prev = local64_read(&event->hw.prev_count);
 		barrier();
 		barrier();
 		val = read_pmc(event->hw.idx);
 		val = read_pmc(event->hw.idx);
-	} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
 
 	/* The counters are only 32 bits wide */
 	/* The counters are only 32 bits wide */
 	delta = (val - prev) & 0xfffffffful;
 	delta = (val - prev) & 0xfffffffful;
-	atomic64_add(delta, &event->count);
-	atomic64_sub(delta, &event->hw.period_left);
+	local64_add(delta, &event->count);
+	local64_sub(delta, &event->hw.period_left);
 }
 }
 
 
 /*
 /*
@@ -296,11 +296,11 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 
 
 	val = 0;
 	val = 0;
 	if (event->hw.sample_period) {
 	if (event->hw.sample_period) {
-		s64 left = atomic64_read(&event->hw.period_left);
+		s64 left = local64_read(&event->hw.period_left);
 		if (left < 0x80000000L)
 		if (left < 0x80000000L)
 			val = 0x80000000L - left;
 			val = 0x80000000L - left;
 	}
 	}
-	atomic64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.prev_count, val);
 	write_pmc(i, val);
 	write_pmc(i, val);
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
 
 
@@ -371,8 +371,8 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
 	if (left < 0x80000000L)
 	if (left < 0x80000000L)
 		val = 0x80000000L - left;
 		val = 0x80000000L - left;
 	write_pmc(event->hw.idx, val);
 	write_pmc(event->hw.idx, val);
-	atomic64_set(&event->hw.prev_count, val);
-	atomic64_set(&event->hw.period_left, left);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
 	perf_enable();
 	perf_enable();
 	local_irq_restore(flags);
 	local_irq_restore(flags);
@@ -500,7 +500,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 		return ERR_PTR(-ENOTSUPP);
 		return ERR_PTR(-ENOTSUPP);
 
 
 	event->hw.last_period = event->hw.sample_period;
 	event->hw.last_period = event->hw.sample_period;
-	atomic64_set(&event->hw.period_left, event->hw.last_period);
+	local64_set(&event->hw.period_left, event->hw.last_period);
 
 
 	/*
 	/*
 	 * See if we need to reserve the PMU.
 	 * See if we need to reserve the PMU.
@@ -541,16 +541,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	int record = 0;
 	int record = 0;
 
 
 	/* we don't have to worry about interrupts here */
 	/* we don't have to worry about interrupts here */
-	prev = atomic64_read(&event->hw.prev_count);
+	prev = local64_read(&event->hw.prev_count);
 	delta = (val - prev) & 0xfffffffful;
 	delta = (val - prev) & 0xfffffffful;
-	atomic64_add(delta, &event->count);
+	local64_add(delta, &event->count);
 
 
 	/*
 	/*
 	 * See if the total period for this event has expired,
 	 * See if the total period for this event has expired,
 	 * and update for the next period.
 	 * and update for the next period.
 	 */
 	 */
 	val = 0;
 	val = 0;
-	left = atomic64_read(&event->hw.period_left) - delta;
+	left = local64_read(&event->hw.period_left) - delta;
 	if (period) {
 	if (period) {
 		if (left <= 0) {
 		if (left <= 0) {
 			left += period;
 			left += period;
@@ -569,6 +569,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 		struct perf_sample_data data;
 		struct perf_sample_data data;
 
 
 		perf_sample_data_init(&data, 0);
 		perf_sample_data_init(&data, 0);
+		data.period = event->hw.last_period;
 
 
 		if (perf_event_overflow(event, nmi, &data, regs)) {
 		if (perf_event_overflow(event, nmi, &data, regs)) {
 			/*
 			/*
@@ -584,8 +585,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	}
 	}
 
 
 	write_pmc(event->hw.idx, val);
 	write_pmc(event->hw.idx, val);
-	atomic64_set(&event->hw.prev_count, val);
-	atomic64_set(&event->hw.period_left, left);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
 }
 }
 
 

+ 1 - 0
arch/s390/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/score/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 1 - 0
arch/sh/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 3 - 3
arch/sh/kernel/perf_event.c

@@ -185,10 +185,10 @@ static void sh_perf_event_update(struct perf_event *event,
 	 * this is the simplest approach for maintaining consistency.
 	 * this is the simplest approach for maintaining consistency.
 	 */
 	 */
 again:
 again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
+	prev_raw_count = local64_read(&hwc->prev_count);
 	new_raw_count = sh_pmu->read(idx);
 	new_raw_count = sh_pmu->read(idx);
 
 
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 			     new_raw_count) != prev_raw_count)
 			     new_raw_count) != prev_raw_count)
 		goto again;
 		goto again;
 
 
@@ -203,7 +203,7 @@ again:
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta >>= shift;
 	delta >>= shift;
 
 
-	atomic64_add(delta, &event->count);
+	local64_add(delta, &event->count);
 }
 }
 
 
 static void sh_pmu_disable(struct perf_event *event)
 static void sh_pmu_disable(struct perf_event *event)

+ 1 - 0
arch/sparc/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 8 - 0
arch/sparc/include/asm/perf_event.h

@@ -6,7 +6,15 @@ extern void set_perf_event_pending(void);
 #define	PERF_EVENT_INDEX_OFFSET	0
 #define	PERF_EVENT_INDEX_OFFSET	0
 
 
 #ifdef CONFIG_PERF_EVENTS
 #ifdef CONFIG_PERF_EVENTS
+#include <asm/ptrace.h>
+
 extern void init_hw_perf_events(void);
 extern void init_hw_perf_events(void);
+
+extern void
+__perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+
+#define perf_arch_fetch_caller_regs(pt_regs, ip)	\
+	__perf_arch_fetch_caller_regs(pt_regs, ip, 1);
 #else
 #else
 static inline void init_hw_perf_events(void)	{ }
 static inline void init_hw_perf_events(void)	{ }
 #endif
 #endif

+ 3 - 3
arch/sparc/kernel/helpers.S

@@ -47,9 +47,9 @@ stack_trace_flush:
 	.size		stack_trace_flush,.-stack_trace_flush
 	.size		stack_trace_flush,.-stack_trace_flush
 
 
 #ifdef CONFIG_PERF_EVENTS
 #ifdef CONFIG_PERF_EVENTS
-	.globl		perf_arch_fetch_caller_regs
-	.type		perf_arch_fetch_caller_regs,#function
-perf_arch_fetch_caller_regs:
+	.globl		__perf_arch_fetch_caller_regs
+	.type		__perf_arch_fetch_caller_regs,#function
+__perf_arch_fetch_caller_regs:
 	/* We always read the %pstate into %o5 since we will use
 	/* We always read the %pstate into %o5 since we will use
 	 * that to construct a fake %tstate to store into the regs.
 	 * that to construct a fake %tstate to store into the regs.
 	 */
 	 */

+ 13 - 12
arch/sparc/kernel/perf_event.c

@@ -572,18 +572,18 @@ static u64 sparc_perf_event_update(struct perf_event *event,
 	s64 delta;
 	s64 delta;
 
 
 again:
 again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
+	prev_raw_count = local64_read(&hwc->prev_count);
 	new_raw_count = read_pmc(idx);
 	new_raw_count = read_pmc(idx);
 
 
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 			     new_raw_count) != prev_raw_count)
 			     new_raw_count) != prev_raw_count)
 		goto again;
 		goto again;
 
 
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta >>= shift;
 	delta >>= shift;
 
 
-	atomic64_add(delta, &event->count);
-	atomic64_sub(delta, &hwc->period_left);
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
 
 
 	return new_raw_count;
 	return new_raw_count;
 }
 }
@@ -591,27 +591,27 @@ again:
 static int sparc_perf_event_set_period(struct perf_event *event,
 static int sparc_perf_event_set_period(struct perf_event *event,
 				       struct hw_perf_event *hwc, int idx)
 				       struct hw_perf_event *hwc, int idx)
 {
 {
-	s64 left = atomic64_read(&hwc->period_left);
+	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	s64 period = hwc->sample_period;
 	int ret = 0;
 	int ret = 0;
 
 
 	if (unlikely(left <= -period)) {
 	if (unlikely(left <= -period)) {
 		left = period;
 		left = period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		hwc->last_period = period;
 		ret = 1;
 		ret = 1;
 	}
 	}
 
 
 	if (unlikely(left <= 0)) {
 	if (unlikely(left <= 0)) {
 		left += period;
 		left += period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		hwc->last_period = period;
 		ret = 1;
 		ret = 1;
 	}
 	}
 	if (left > MAX_PERIOD)
 	if (left > MAX_PERIOD)
 		left = MAX_PERIOD;
 		left = MAX_PERIOD;
 
 
-	atomic64_set(&hwc->prev_count, (u64)-left);
+	local64_set(&hwc->prev_count, (u64)-left);
 
 
 	write_pmc(idx, (u64)(-left) & 0xffffffff);
 	write_pmc(idx, (u64)(-left) & 0xffffffff);
 
 
@@ -1006,7 +1006,7 @@ static int sparc_pmu_enable(struct perf_event *event)
 	 * skip the schedulability test here, it will be peformed
 	 * skip the schedulability test here, it will be peformed
 	 * at commit time(->commit_txn) as a whole
 	 * at commit time(->commit_txn) as a whole
 	 */
 	 */
-	if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+	if (cpuc->group_flag & PERF_EVENT_TXN)
 		goto nocheck;
 		goto nocheck;
 
 
 	if (check_excludes(cpuc->event, n0, 1))
 	if (check_excludes(cpuc->event, n0, 1))
@@ -1088,7 +1088,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 	if (!hwc->sample_period) {
 	if (!hwc->sample_period) {
 		hwc->sample_period = MAX_PERIOD;
 		hwc->sample_period = MAX_PERIOD;
 		hwc->last_period = hwc->sample_period;
 		hwc->last_period = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
+		local64_set(&hwc->period_left, hwc->sample_period);
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -1103,7 +1103,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 
-	cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
+	cpuhw->group_flag |= PERF_EVENT_TXN;
 }
 }
 
 
 /*
 /*
@@ -1115,7 +1115,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 
-	cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
+	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 }
 }
 
 
 /*
 /*
@@ -1138,6 +1138,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
 	if (sparc_check_constraints(cpuc->event, cpuc->events, n))
 	if (sparc_check_constraints(cpuc->event, cpuc->events, n))
 		return -EAGAIN;
 		return -EAGAIN;
 
 
+	cpuc->group_flag &= ~PERF_EVENT_TXN;
 	return 0;
 	return 0;
 }
 }
 
 

+ 1 - 0
arch/x86/Kconfig

@@ -55,6 +55,7 @@ config X86
 	select HAVE_HW_BREAKPOINT
 	select HAVE_HW_BREAKPOINT
 	select HAVE_MIXED_BREAKPOINTS_REGS
 	select HAVE_MIXED_BREAKPOINTS_REGS
 	select PERF_EVENTS
 	select PERF_EVENTS
+	select HAVE_PERF_EVENTS_NMI
 	select ANON_INODES
 	select ANON_INODES
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_USER_RETURN_NOTIFIER
 	select HAVE_USER_RETURN_NOTIFIER

+ 1 - 1
arch/x86/include/asm/hw_breakpoint.h

@@ -20,10 +20,10 @@ struct arch_hw_breakpoint {
 #include <linux/list.h>
 #include <linux/list.h>
 
 
 /* Available HW breakpoint length encodings */
 /* Available HW breakpoint length encodings */
+#define X86_BREAKPOINT_LEN_X		0x00
 #define X86_BREAKPOINT_LEN_1		0x40
 #define X86_BREAKPOINT_LEN_1		0x40
 #define X86_BREAKPOINT_LEN_2		0x44
 #define X86_BREAKPOINT_LEN_2		0x44
 #define X86_BREAKPOINT_LEN_4		0x4c
 #define X86_BREAKPOINT_LEN_4		0x4c
-#define X86_BREAKPOINT_LEN_EXECUTE	0x40
 
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
 #define X86_BREAKPOINT_LEN_8		0x48
 #define X86_BREAKPOINT_LEN_8		0x48

+ 1 - 0
arch/x86/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 2 - 0
arch/x86/include/asm/nmi.h

@@ -17,7 +17,9 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);
 
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern int check_nmi_watchdog(void);
 extern int check_nmi_watchdog(void);
+#if !defined(CONFIG_LOCKUP_DETECTOR)
 extern int nmi_watchdog_enabled;
 extern int nmi_watchdog_enabled;
+#endif
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);

+ 16 - 2
arch/x86/include/asm/perf_event.h

@@ -68,8 +68,9 @@ union cpuid10_eax {
 
 
 union cpuid10_edx {
 union cpuid10_edx {
 	struct {
 	struct {
-		unsigned int num_counters_fixed:4;
-		unsigned int reserved:28;
+		unsigned int num_counters_fixed:5;
+		unsigned int bit_width_fixed:8;
+		unsigned int reserved:19;
 	} split;
 	} split;
 	unsigned int full;
 	unsigned int full;
 };
 };
@@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs)	perf_misc_flags(regs)
 #define perf_misc_flags(regs)	perf_misc_flags(regs)
 
 
+#include <asm/stacktrace.h>
+
+/*
+ * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
+ * and the comment with PERF_EFLAGS_EXACT.
+ */
+#define perf_arch_fetch_caller_regs(regs, __ip)		{	\
+	(regs)->ip = (__ip);					\
+	(regs)->bp = caller_frame_pointer();			\
+	(regs)->cs = __KERNEL_CS;				\
+	regs->flags = 0;					\
+}
+
 #else
 #else
 static inline void init_hw_perf_events(void)		{ }
 static inline void init_hw_perf_events(void)		{ }
 static inline void perf_events_lapic_init(void)	{ }
 static inline void perf_events_lapic_init(void)	{ }

+ 52 - 47
arch/x86/include/asm/perf_event_p4.h

@@ -19,7 +19,6 @@
 #define ARCH_P4_RESERVED_ESCR	(2) /* IQ_ESCR(0,1) not always present */
 #define ARCH_P4_RESERVED_ESCR	(2) /* IQ_ESCR(0,1) not always present */
 #define ARCH_P4_MAX_ESCR	(ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
 #define ARCH_P4_MAX_ESCR	(ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
 #define ARCH_P4_MAX_CCCR	(18)
 #define ARCH_P4_MAX_CCCR	(18)
-#define ARCH_P4_MAX_COUNTER	(ARCH_P4_MAX_CCCR / 2)
 
 
 #define P4_ESCR_EVENT_MASK	0x7e000000U
 #define P4_ESCR_EVENT_MASK	0x7e000000U
 #define P4_ESCR_EVENT_SHIFT	25
 #define P4_ESCR_EVENT_SHIFT	25
@@ -71,10 +70,6 @@
 #define P4_CCCR_THRESHOLD(v)		((v) << P4_CCCR_THRESHOLD_SHIFT)
 #define P4_CCCR_THRESHOLD(v)		((v) << P4_CCCR_THRESHOLD_SHIFT)
 #define P4_CCCR_ESEL(v)			((v) << P4_CCCR_ESCR_SELECT_SHIFT)
 #define P4_CCCR_ESEL(v)			((v) << P4_CCCR_ESCR_SELECT_SHIFT)
 
 
-/* Custom bits in reerved CCCR area */
-#define P4_CCCR_CACHE_OPS_MASK		0x0000003fU
-
-
 /* Non HT mask */
 /* Non HT mask */
 #define P4_CCCR_MASK				\
 #define P4_CCCR_MASK				\
 	(P4_CCCR_OVF			|	\
 	(P4_CCCR_OVF			|	\
@@ -106,8 +101,7 @@
  * ESCR and CCCR but rather an only packed value should
  * ESCR and CCCR but rather an only packed value should
  * be unpacked and written to a proper addresses
  * be unpacked and written to a proper addresses
  *
  *
- * the base idea is to pack as much info as
- * possible
+ * the base idea is to pack as much info as possible
  */
  */
 #define p4_config_pack_escr(v)		(((u64)(v)) << 32)
 #define p4_config_pack_escr(v)		(((u64)(v)) << 32)
 #define p4_config_pack_cccr(v)		(((u64)(v)) & 0xffffffffULL)
 #define p4_config_pack_cccr(v)		(((u64)(v)) & 0xffffffffULL)
@@ -130,8 +124,6 @@
 		t;					\
 		t;					\
 	})
 	})
 
 
-#define p4_config_unpack_cache_event(v)	(((u64)(v)) & P4_CCCR_CACHE_OPS_MASK)
-
 #define P4_CONFIG_HT_SHIFT		63
 #define P4_CONFIG_HT_SHIFT		63
 #define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)
 #define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)
 
 
@@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr)
 	return escr;
 	return escr;
 }
 }
 
 
+/*
+ * This are the events which should be used in "Event Select"
+ * field of ESCR register, they are like unique keys which allow
+ * the kernel to determinate which CCCR and COUNTER should be
+ * used to track an event
+ */
 enum P4_EVENTS {
 enum P4_EVENTS {
 	P4_EVENT_TC_DELIVER_MODE,
 	P4_EVENT_TC_DELIVER_MODE,
 	P4_EVENT_BPU_FETCH_REQUEST,
 	P4_EVENT_BPU_FETCH_REQUEST,
@@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES {
  * a caller should use P4_ESCR_EMASK_NAME helper to
  * a caller should use P4_ESCR_EMASK_NAME helper to
  * pick the EventMask needed, for example
  * pick the EventMask needed, for example
  *
  *
- *	P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD)
+ *	P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)
  */
  */
 enum P4_ESCR_EMASKS {
 enum P4_ESCR_EMASKS {
 	P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0),
 	P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0),
@@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS {
 	P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),
 	P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),
 };
 };
 
 
-/* P4 PEBS: stale for a while */
-#define P4_PEBS_METRIC_MASK	0x00001fffU
-#define P4_PEBS_UOB_TAG		0x01000000U
-#define P4_PEBS_ENABLE		0x02000000U
-
-/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */
-#define P4_PEBS__1stl_cache_load_miss_retired	0x3000001
-#define P4_PEBS__2ndl_cache_load_miss_retired	0x3000002
-#define P4_PEBS__dtlb_load_miss_retired		0x3000004
-#define P4_PEBS__dtlb_store_miss_retired	0x3000004
-#define P4_PEBS__dtlb_all_miss_retired		0x3000004
-#define P4_PEBS__tagged_mispred_branch		0x3018000
-#define P4_PEBS__mob_load_replay_retired	0x3000200
-#define P4_PEBS__split_load_retired		0x3000400
-#define P4_PEBS__split_store_retired		0x3000400
-
-#define P4_VERT__1stl_cache_load_miss_retired	0x0000001
-#define P4_VERT__2ndl_cache_load_miss_retired	0x0000001
-#define P4_VERT__dtlb_load_miss_retired		0x0000001
-#define P4_VERT__dtlb_store_miss_retired	0x0000002
-#define P4_VERT__dtlb_all_miss_retired		0x0000003
-#define P4_VERT__tagged_mispred_branch		0x0000010
-#define P4_VERT__mob_load_replay_retired	0x0000001
-#define P4_VERT__split_load_retired		0x0000001
-#define P4_VERT__split_store_retired		0x0000002
-
-enum P4_CACHE_EVENTS {
-	P4_CACHE__NONE,
-
-	P4_CACHE__1stl_cache_load_miss_retired,
-	P4_CACHE__2ndl_cache_load_miss_retired,
-	P4_CACHE__dtlb_load_miss_retired,
-	P4_CACHE__dtlb_store_miss_retired,
-	P4_CACHE__itlb_reference_hit,
-	P4_CACHE__itlb_reference_miss,
-
-	P4_CACHE__MAX
+/*
+ * P4 PEBS specifics (Replay Event only)
+ *
+ * Format (bits):
+ *   0-6: metric from P4_PEBS_METRIC enum
+ *    7 : reserved
+ *    8 : reserved
+ * 9-11 : reserved
+ *
+ * Note we have UOP and PEBS bits reserved for now
+ * just in case if we will need them once
+ */
+#define P4_PEBS_CONFIG_ENABLE		(1 << 7)
+#define P4_PEBS_CONFIG_UOP_TAG		(1 << 8)
+#define P4_PEBS_CONFIG_METRIC_MASK	0x3f
+#define P4_PEBS_CONFIG_MASK		0xff
+
+/*
+ * mem: Only counters MSR_IQ_COUNTER4 (16) and
+ * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
+ */
+#define P4_PEBS_ENABLE			0x02000000U
+#define P4_PEBS_ENABLE_UOP_TAG		0x01000000U
+
+#define p4_config_unpack_metric(v)	(((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
+#define p4_config_unpack_pebs(v)	(((u64)(v)) & P4_PEBS_CONFIG_MASK)
+
+#define p4_config_pebs_has(v, mask)	(p4_config_unpack_pebs(v) & (mask))
+
+enum P4_PEBS_METRIC {
+	P4_PEBS_METRIC__none,
+
+	P4_PEBS_METRIC__1stl_cache_load_miss_retired,
+	P4_PEBS_METRIC__2ndl_cache_load_miss_retired,
+	P4_PEBS_METRIC__dtlb_load_miss_retired,
+	P4_PEBS_METRIC__dtlb_store_miss_retired,
+	P4_PEBS_METRIC__dtlb_all_miss_retired,
+	P4_PEBS_METRIC__tagged_mispred_branch,
+	P4_PEBS_METRIC__mob_load_replay_retired,
+	P4_PEBS_METRIC__split_load_retired,
+	P4_PEBS_METRIC__split_store_retired,
+
+	P4_PEBS_METRIC__max
 };
 };
 
 
 #endif /* PERF_EVENT_P4_H */
 #endif /* PERF_EVENT_P4_H */
+

+ 49 - 0
arch/x86/include/asm/stacktrace.h

@@ -1,6 +1,13 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+
 #ifndef _ASM_X86_STACKTRACE_H
 #ifndef _ASM_X86_STACKTRACE_H
 #define _ASM_X86_STACKTRACE_H
 #define _ASM_X86_STACKTRACE_H
 
 
+#include <linux/uaccess.h>
+
 extern int kstack_depth_to_print;
 extern int kstack_depth_to_print;
 
 
 struct thread_info;
 struct thread_info;
@@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 		unsigned long *stack, unsigned long bp,
 		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data);
 		const struct stacktrace_ops *ops, void *data);
 
 
+#ifdef CONFIG_X86_32
+#define STACKSLOTS_PER_LINE 8
+#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
+#else
+#define STACKSLOTS_PER_LINE 4
+#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
+#endif
+
+extern void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+		unsigned long *stack, unsigned long bp, char *log_lvl);
+
+extern void
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+		unsigned long *sp, unsigned long bp, char *log_lvl);
+
+extern unsigned int code_bytes;
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+	struct stack_frame *next_frame;
+	unsigned long return_address;
+};
+
+struct stack_frame_ia32 {
+    u32 next_frame;
+    u32 return_address;
+};
+
+static inline unsigned long caller_frame_pointer(void)
+{
+	struct stack_frame *frame;
+
+	get_bp(frame);
+
+#ifdef CONFIG_FRAME_POINTER
+	frame = frame->next_frame;
+#endif
+
+	return (unsigned long)frame;
+}
+
 #endif /* _ASM_X86_STACKTRACE_H */
 #endif /* _ASM_X86_STACKTRACE_H */

+ 6 - 1
arch/x86/kernel/apic/Makefile

@@ -2,7 +2,12 @@
 # Makefile for local APIC drivers and for the IO-APIC code
 # Makefile for local APIC drivers and for the IO-APIC code
 #
 #
 
 
-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o probe_$(BITS).o ipi.o
+ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
+obj-$(CONFIG_X86_LOCAL_APIC)	+= nmi.o
+endif
+obj-$(CONFIG_HARDLOCKUP_DETECTOR)	+= hw_nmi.o
+
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_SMP)		+= ipi.o
 obj-$(CONFIG_SMP)		+= ipi.o
 
 

+ 107 - 0
arch/x86/kernel/apic/hw_nmi.c

@@ -0,0 +1,107 @@
+/*
+ *  HW NMI watchdog support
+ *
+ *  started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ *  Arch specific calls to support NMI watchdog
+ *
+ *  Bits copied from original nmi.c file
+ *
+ */
+#include <asm/apic.h>
+
+#include <linux/cpumask.h>
+#include <linux/kdebug.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/nmi.h>
+#include <linux/module.h>
+
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+
+u64 hw_nmi_get_sample_period(void)
+{
+	return (u64)(cpu_khz) * 1000 * 60;
+}
+
+#ifdef ARCH_HAS_NMI_WATCHDOG
+void arch_trigger_all_cpu_backtrace(void)
+{
+	int i;
+
+	cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+
+	printk(KERN_INFO "sending NMI to all CPUs:\n");
+	apic->send_IPI_all(NMI_VECTOR);
+
+	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
+	for (i = 0; i < 10 * 1000; i++) {
+		if (cpumask_empty(to_cpumask(backtrace_mask)))
+			break;
+		mdelay(1);
+	}
+}
+
+static int __kprobes
+arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
+			 unsigned long cmd, void *__args)
+{
+	struct die_args *args = __args;
+	struct pt_regs *regs;
+	int cpu = smp_processor_id();
+
+	switch (cmd) {
+	case DIE_NMI:
+	case DIE_NMI_IPI:
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	regs = args->regs;
+
+	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
+		static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+		arch_spin_lock(&lock);
+		printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
+		show_regs(regs);
+		dump_stack();
+		arch_spin_unlock(&lock);
+		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
+		return NOTIFY_STOP;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static __read_mostly struct notifier_block backtrace_notifier = {
+	.notifier_call          = arch_trigger_all_cpu_backtrace_handler,
+	.next                   = NULL,
+	.priority               = 1
+};
+
+static int __init register_trigger_all_cpu_backtrace(void)
+{
+	register_die_notifier(&backtrace_notifier);
+	return 0;
+}
+early_initcall(register_trigger_all_cpu_backtrace);
+#endif
+
+/* STUB calls to mimic old nmi_watchdog behaviour */
+#if defined(CONFIG_X86_LOCAL_APIC)
+unsigned int nmi_watchdog = NMI_NONE;
+EXPORT_SYMBOL(nmi_watchdog);
+void acpi_nmi_enable(void) { return; }
+void acpi_nmi_disable(void) { return; }
+#endif
+atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
+EXPORT_SYMBOL(nmi_active);
+int unknown_nmi_panic;
+void cpu_nmi_set_wd_enabled(void) { return; }
+void stop_apic_nmi_watchdog(void *unused) { return; }
+void setup_apic_nmi_watchdog(void *unused) { return; }
+int __init check_nmi_watchdog(void) { return 0; }

+ 0 - 7
arch/x86/kernel/apic/nmi.c

@@ -401,13 +401,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 	int cpu = smp_processor_id();
 	int cpu = smp_processor_id();
 	int rc = 0;
 	int rc = 0;
 
 
-	/* check for other users first */
-	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
-			== NOTIFY_STOP) {
-		rc = 1;
-		touched = 1;
-	}
-
 	sum = get_timer_irqs(cpu);
 	sum = get_timer_irqs(cpu);
 
 
 	if (__get_cpu_var(nmi_touch)) {
 	if (__get_cpu_var(nmi_touch)) {

+ 25 - 37
arch/x86/kernel/cpu/perf_event.c

@@ -220,6 +220,7 @@ struct x86_pmu {
 						 struct perf_event *event);
 						 struct perf_event *event);
 	struct event_constraint *event_constraints;
 	struct event_constraint *event_constraints;
 	void		(*quirks)(void);
 	void		(*quirks)(void);
+	int		perfctr_second_write;
 
 
 	int		(*cpu_prepare)(int cpu);
 	int		(*cpu_prepare)(int cpu);
 	void		(*cpu_starting)(int cpu);
 	void		(*cpu_starting)(int cpu);
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
 	 * count to the generic event atomically:
 	 * count to the generic event atomically:
 	 */
 	 */
 again:
 again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
+	prev_raw_count = local64_read(&hwc->prev_count);
 	rdmsrl(hwc->event_base + idx, new_raw_count);
 	rdmsrl(hwc->event_base + idx, new_raw_count);
 
 
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 					new_raw_count) != prev_raw_count)
 					new_raw_count) != prev_raw_count)
 		goto again;
 		goto again;
 
 
@@ -313,8 +314,8 @@ again:
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta >>= shift;
 	delta >>= shift;
 
 
-	atomic64_add(delta, &event->count);
-	atomic64_sub(delta, &hwc->period_left);
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
 
 
 	return new_raw_count;
 	return new_raw_count;
 }
 }
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
 	if (!hwc->sample_period) {
 	if (!hwc->sample_period) {
 		hwc->sample_period = x86_pmu.max_period;
 		hwc->sample_period = x86_pmu.max_period;
 		hwc->last_period = hwc->sample_period;
 		hwc->last_period = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
+		local64_set(&hwc->period_left, hwc->sample_period);
 	} else {
 	} else {
 		/*
 		/*
 		 * If we have a PMU initialized but no APIC
 		 * If we have a PMU initialized but no APIC
@@ -885,7 +886,7 @@ static int
 x86_perf_event_set_period(struct perf_event *event)
 x86_perf_event_set_period(struct perf_event *event)
 {
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event *hwc = &event->hw;
-	s64 left = atomic64_read(&hwc->period_left);
+	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	s64 period = hwc->sample_period;
 	int ret = 0, idx = hwc->idx;
 	int ret = 0, idx = hwc->idx;
 
 
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
 	 */
 	 */
 	if (unlikely(left <= -period)) {
 	if (unlikely(left <= -period)) {
 		left = period;
 		left = period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		hwc->last_period = period;
 		ret = 1;
 		ret = 1;
 	}
 	}
 
 
 	if (unlikely(left <= 0)) {
 	if (unlikely(left <= 0)) {
 		left += period;
 		left += period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		hwc->last_period = period;
 		ret = 1;
 		ret = 1;
 	}
 	}
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)
 	 * The hw event starts counting from this event offset,
 	 * The hw event starts counting from this event offset,
 	 * mark it to be able to extra future deltas:
 	 * mark it to be able to extra future deltas:
 	 */
 	 */
-	atomic64_set(&hwc->prev_count, (u64)-left);
+	local64_set(&hwc->prev_count, (u64)-left);
 
 
-	wrmsrl(hwc->event_base + idx,
+	wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+
+	/*
+	 * Due to erratum on certan cpu we need
+	 * a second write to be sure the register
+	 * is updated properly
+	 */
+	if (x86_pmu.perfctr_second_write) {
+		wrmsrl(hwc->event_base + idx,
 			(u64)(-left) & x86_pmu.cntval_mask);
 			(u64)(-left) & x86_pmu.cntval_mask);
+	}
 
 
 	perf_event_update_userpage(event);
 	perf_event_update_userpage(event);
 
 
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)
 	 * skip the schedulability test here, it will be peformed
 	 * skip the schedulability test here, it will be peformed
 	 * at commit time(->commit_txn) as a whole
 	 * at commit time(->commit_txn) as a whole
 	 */
 	 */
-	if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+	if (cpuc->group_flag & PERF_EVENT_TXN)
 		goto out;
 		goto out;
 
 
 	ret = x86_pmu.schedule_events(cpuc, n, assign);
 	ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event)
 	 * The events never got scheduled and ->cancel_txn will truncate
 	 * The events never got scheduled and ->cancel_txn will truncate
 	 * the event_list.
 	 * the event_list.
 	 */
 	 */
-	if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+	if (cpuc->group_flag & PERF_EVENT_TXN)
 		return;
 		return;
 
 
 	x86_pmu_stop(event);
 	x86_pmu_stop(event);
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 
-	cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
+	cpuc->group_flag |= PERF_EVENT_TXN;
 	cpuc->n_txn = 0;
 	cpuc->n_txn = 0;
 }
 }
 
 
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
 {
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 
-	cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
+	cpuc->group_flag &= ~PERF_EVENT_TXN;
 	/*
 	/*
 	 * Truncate the collected events.
 	 * Truncate the collected events.
 	 */
 	 */
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
 	 */
 	 */
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 
 
-	/*
-	 * Clear out the txn count so that ->cancel_txn() which gets
-	 * run after ->commit_txn() doesn't undo things.
-	 */
-	cpuc->n_txn = 0;
+	cpuc->group_flag &= ~PERF_EVENT_TXN;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = {
 	.walk_stack		= print_context_stack_bp,
 	.walk_stack		= print_context_stack_bp,
 };
 };
 
 
-#include "../dumpstack.h"
-
 static void
 static void
 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
 {
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 	return entry;
 	return entry;
 }
 }
 
 
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
-	regs->ip = ip;
-	/*
-	 * perf_arch_fetch_caller_regs adds another call, we need to increment
-	 * the skip level
-	 */
-	regs->bp = rewind_frame_pointer(skip + 1);
-	regs->cs = __KERNEL_CS;
-	/*
-	 * We abuse bit 3 to pass exact information, see perf_misc_flags
-	 * and the comment with PERF_EFLAGS_EXACT.
-	 */
-	regs->flags = 0;
-}
-
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
 {
 	unsigned long ip;
 	unsigned long ip;

+ 120 - 36
arch/x86/kernel/cpu/perf_event_p4.c

@@ -21,22 +21,36 @@ struct p4_event_bind {
 	char cntr[2][P4_CNTR_LIMIT];		/* counter index (offset), -1 on abscence */
 	char cntr[2][P4_CNTR_LIMIT];		/* counter index (offset), -1 on abscence */
 };
 };
 
 
-struct p4_cache_event_bind {
+struct p4_pebs_bind {
 	unsigned int metric_pebs;
 	unsigned int metric_pebs;
 	unsigned int metric_vert;
 	unsigned int metric_vert;
 };
 };
 
 
-#define P4_GEN_CACHE_EVENT_BIND(name)		\
-	[P4_CACHE__##name] = {			\
-		.metric_pebs = P4_PEBS__##name,	\
-		.metric_vert = P4_VERT__##name,	\
+/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
+#define P4_GEN_PEBS_BIND(name, pebs, vert)			\
+	[P4_PEBS_METRIC__##name] = {				\
+		.metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG,	\
+		.metric_vert = vert,				\
 	}
 	}
 
 
-static struct p4_cache_event_bind p4_cache_event_bind_map[] = {
-	P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired),
-	P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired),
-	P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired),
-	P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired),
+/*
+ * note we have P4_PEBS_ENABLE_UOP_TAG always set here
+ *
+ * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
+ * event configuration to find out which values are to be
+ * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
+ * resgisters
+ */
+static struct p4_pebs_bind p4_pebs_bind_map[] = {
+	P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired,	0x0000001, 0x0000001),
+	P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired,	0x0000002, 0x0000001),
+	P4_GEN_PEBS_BIND(dtlb_load_miss_retired,	0x0000004, 0x0000001),
+	P4_GEN_PEBS_BIND(dtlb_store_miss_retired,	0x0000004, 0x0000002),
+	P4_GEN_PEBS_BIND(dtlb_all_miss_retired,		0x0000004, 0x0000003),
+	P4_GEN_PEBS_BIND(tagged_mispred_branch,		0x0018000, 0x0000010),
+	P4_GEN_PEBS_BIND(mob_load_replay_retired,	0x0000200, 0x0000001),
+	P4_GEN_PEBS_BIND(split_load_retired,		0x0000400, 0x0000001),
+	P4_GEN_PEBS_BIND(split_store_retired,		0x0000400, 0x0000002),
 };
 };
 
 
 /*
 /*
@@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = {
 	},
 	},
 };
 };
 
 
-#define P4_GEN_CACHE_EVENT(event, bit, cache_event)			  \
+#define P4_GEN_CACHE_EVENT(event, bit, metric)				  \
 	p4_config_pack_escr(P4_ESCR_EVENT(event)			| \
 	p4_config_pack_escr(P4_ESCR_EVENT(event)			| \
 			    P4_ESCR_EMASK_BIT(event, bit))		| \
 			    P4_ESCR_EMASK_BIT(event, bit))		| \
-	p4_config_pack_cccr(cache_event					| \
+	p4_config_pack_cccr(metric					| \
 			    P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
 			    P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
 
 
 static __initconst const u64 p4_hw_cache_event_ids
 static __initconst const u64 p4_hw_cache_event_ids
@@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids
 	[ C(OP_READ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0,
 		[ C(RESULT_ACCESS) ] = 0x0,
 		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
 		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-						P4_CACHE__1stl_cache_load_miss_retired),
+						P4_PEBS_METRIC__1stl_cache_load_miss_retired),
 	},
 	},
  },
  },
  [ C(LL  ) ] = {
  [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0,
 		[ C(RESULT_ACCESS) ] = 0x0,
 		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
 		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-						P4_CACHE__2ndl_cache_load_miss_retired),
+						P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
 	},
 	},
 },
 },
  [ C(DTLB) ] = {
  [ C(DTLB) ] = {
 	[ C(OP_READ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0,
 		[ C(RESULT_ACCESS) ] = 0x0,
 		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
 		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-						P4_CACHE__dtlb_load_miss_retired),
+						P4_PEBS_METRIC__dtlb_load_miss_retired),
 	},
 	},
 	[ C(OP_WRITE) ] = {
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0,
 		[ C(RESULT_ACCESS) ] = 0x0,
 		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
 		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-						P4_CACHE__dtlb_store_miss_retired),
+						P4_PEBS_METRIC__dtlb_store_miss_retired),
 	},
 	},
  },
  },
  [ C(ITLB) ] = {
  [ C(ITLB) ] = {
 	[ C(OP_READ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
 		[ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
-						P4_CACHE__itlb_reference_hit),
+						P4_PEBS_METRIC__none),
 		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
 		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
-						P4_CACHE__itlb_reference_miss),
+						P4_PEBS_METRIC__none),
 	},
 	},
 	[ C(OP_WRITE) ] = {
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = -1,
 		[ C(RESULT_ACCESS) ] = -1,
@@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event)
 	return config;
 	return config;
 }
 }
 
 
+static int p4_validate_raw_event(struct perf_event *event)
+{
+	unsigned int v;
+
+	/* user data may have out-of-bound event index */
+	v = p4_config_unpack_event(event->attr.config);
+	if (v >= ARRAY_SIZE(p4_event_bind_map)) {
+		pr_warning("P4 PMU: Unknown event code: %d\n", v);
+		return -EINVAL;
+	}
+
+	/*
+	 * it may have some screwed PEBS bits
+	 */
+	if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
+		pr_warning("P4 PMU: PEBS are not supported yet\n");
+		return -EINVAL;
+	}
+	v = p4_config_unpack_metric(event->attr.config);
+	if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
+		pr_warning("P4 PMU: Unknown metric code: %d\n", v);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int p4_hw_config(struct perf_event *event)
 static int p4_hw_config(struct perf_event *event)
 {
 {
 	int cpu = get_cpu();
 	int cpu = get_cpu();
 	int rc = 0;
 	int rc = 0;
-	unsigned int evnt;
 	u32 escr, cccr;
 	u32 escr, cccr;
 
 
 	/*
 	/*
@@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event)
 
 
 	if (event->attr.type == PERF_TYPE_RAW) {
 	if (event->attr.type == PERF_TYPE_RAW) {
 
 
-		/* user data may have out-of-bound event index */
-		evnt = p4_config_unpack_event(event->attr.config);
-		if (evnt >= ARRAY_SIZE(p4_event_bind_map)) {
-			rc = -EINVAL;
+		rc = p4_validate_raw_event(event);
+		if (rc)
 			goto out;
 			goto out;
-		}
 
 
 		/*
 		/*
 		 * We don't control raw events so it's up to the caller
 		 * We don't control raw events so it's up to the caller
@@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event)
 		 * on HT machine but allow HT-compatible specifics to be
 		 * on HT machine but allow HT-compatible specifics to be
 		 * passed on)
 		 * passed on)
 		 *
 		 *
+		 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
+		 * bits since we keep additional info here (for cache events and etc)
+		 *
 		 * XXX: HT wide things should check perf_paranoid_cpu() &&
 		 * XXX: HT wide things should check perf_paranoid_cpu() &&
 		 *      CAP_SYS_ADMIN
 		 *      CAP_SYS_ADMIN
 		 */
 		 */
 		event->hw.config |= event->attr.config &
 		event->hw.config |= event->attr.config &
 			(p4_config_pack_escr(P4_ESCR_MASK_HT) |
 			(p4_config_pack_escr(P4_ESCR_MASK_HT) |
-			 p4_config_pack_cccr(P4_CCCR_MASK_HT));
+			 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
 	}
 	}
 
 
 	rc = x86_setup_perfctr(event);
 	rc = x86_setup_perfctr(event);
@@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
 	return overflow;
 	return overflow;
 }
 }
 
 
+static void p4_pmu_disable_pebs(void)
+{
+	/*
+	 * FIXME
+	 *
+	 * It's still allowed that two threads setup same cache
+	 * events so we can't simply clear metrics until we knew
+	 * noone is depending on us, so we need kind of counter
+	 * for "ReplayEvent" users.
+	 *
+	 * What is more complex -- RAW events, if user (for some
+	 * reason) will pass some cache event metric with improper
+	 * event opcode -- it's fine from hardware point of view
+	 * but completely nonsence from "meaning" of such action.
+	 *
+	 * So at moment let leave metrics turned on forever -- it's
+	 * ok for now but need to be revisited!
+	 *
+	 * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
+	 * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
+	 */
+}
+
 static inline void p4_pmu_disable_event(struct perf_event *event)
 static inline void p4_pmu_disable_event(struct perf_event *event)
 {
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event *hwc = &event->hw;
@@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void)
 			continue;
 			continue;
 		p4_pmu_disable_event(event);
 		p4_pmu_disable_event(event);
 	}
 	}
+
+	p4_pmu_disable_pebs();
+}
+
+/* configuration must be valid */
+static void p4_pmu_enable_pebs(u64 config)
+{
+	struct p4_pebs_bind *bind;
+	unsigned int idx;
+
+	BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
+
+	idx = p4_config_unpack_metric(config);
+	if (idx == P4_PEBS_METRIC__none)
+		return;
+
+	bind = &p4_pebs_bind_map[idx];
+
+	(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE,	(u64)bind->metric_pebs);
+	(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT,	(u64)bind->metric_vert);
 }
 }
 
 
 static void p4_pmu_enable_event(struct perf_event *event)
 static void p4_pmu_enable_event(struct perf_event *event)
@@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
 	int thread = p4_ht_config_thread(hwc->config);
 	int thread = p4_ht_config_thread(hwc->config);
 	u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
 	u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
 	unsigned int idx = p4_config_unpack_event(hwc->config);
 	unsigned int idx = p4_config_unpack_event(hwc->config);
-	unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
 	struct p4_event_bind *bind;
 	struct p4_event_bind *bind;
-	struct p4_cache_event_bind *bind_cache;
 	u64 escr_addr, cccr;
 	u64 escr_addr, cccr;
 
 
 	bind = &p4_event_bind_map[idx];
 	bind = &p4_event_bind_map[idx];
@@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event)
 	cccr = p4_config_unpack_cccr(hwc->config);
 	cccr = p4_config_unpack_cccr(hwc->config);
 
 
 	/*
 	/*
-	 * it could be Cache event so that we need to
-	 * set metrics into additional MSRs
+	 * it could be Cache event so we need to write metrics
+	 * into additional MSRs
 	 */
 	 */
-	BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK);
-	if (idx_cache > P4_CACHE__NONE &&
-		idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
-		bind_cache = &p4_cache_event_bind_map[idx_cache];
-		(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
-		(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
-	}
+	p4_pmu_enable_pebs(hwc->config);
 
 
 	(void)checking_wrmsrl(escr_addr, escr_conf);
 	(void)checking_wrmsrl(escr_addr, escr_conf);
 	(void)checking_wrmsrl(hwc->config_base + hwc->idx,
 	(void)checking_wrmsrl(hwc->config_base + hwc->idx,
@@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = {
 	.max_period		= (1ULL << 39) - 1,
 	.max_period		= (1ULL << 39) - 1,
 	.hw_config		= p4_hw_config,
 	.hw_config		= p4_hw_config,
 	.schedule_events	= p4_pmu_schedule_events,
 	.schedule_events	= p4_pmu_schedule_events,
+	/*
+	 * This handles erratum N15 in intel doc 249199-029,
+	 * the counter may not be updated correctly on write
+	 * so we need a second write operation to do the trick
+	 * (the official workaround didn't work)
+	 *
+	 * the former idea is taken from OProfile code
+	 */
+	.perfctr_second_write	= 1,
 };
 };
 
 
 static __init int p4_pmu_init(void)
 static __init int p4_pmu_init(void)

+ 0 - 1
arch/x86/kernel/dumpstack.c

@@ -18,7 +18,6 @@
 
 
 #include <asm/stacktrace.h>
 #include <asm/stacktrace.h>
 
 
-#include "dumpstack.h"
 
 
 int panic_on_unrecovered_nmi;
 int panic_on_unrecovered_nmi;
 int panic_on_io_nmi;
 int panic_on_io_nmi;

+ 0 - 56
arch/x86/kernel/dumpstack.h

@@ -1,56 +0,0 @@
-/*
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
- */
-
-#ifndef DUMPSTACK_H
-#define DUMPSTACK_H
-
-#ifdef CONFIG_X86_32
-#define STACKSLOTS_PER_LINE 8
-#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
-#else
-#define STACKSLOTS_PER_LINE 4
-#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
-#endif
-
-#include <linux/uaccess.h>
-
-extern void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp, char *log_lvl);
-
-extern void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *sp, unsigned long bp, char *log_lvl);
-
-extern unsigned int code_bytes;
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
-	struct stack_frame *next_frame;
-	unsigned long return_address;
-};
-
-struct stack_frame_ia32 {
-    u32 next_frame;
-    u32 return_address;
-};
-
-static inline unsigned long rewind_frame_pointer(int n)
-{
-	struct stack_frame *frame;
-
-	get_bp(frame);
-
-#ifdef CONFIG_FRAME_POINTER
-	while (n--) {
-		if (probe_kernel_address(&frame->next_frame, frame))
-			break;
-	}
-#endif
-
-	return (unsigned long)frame;
-}
-
-#endif /* DUMPSTACK_H */

+ 0 - 2
arch/x86/kernel/dumpstack_32.c

@@ -16,8 +16,6 @@
 
 
 #include <asm/stacktrace.h>
 #include <asm/stacktrace.h>
 
 
-#include "dumpstack.h"
-
 
 
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *stack, unsigned long bp,
 		unsigned long *stack, unsigned long bp,

+ 0 - 1
arch/x86/kernel/dumpstack_64.c

@@ -16,7 +16,6 @@
 
 
 #include <asm/stacktrace.h>
 #include <asm/stacktrace.h>
 
 
-#include "dumpstack.h"
 
 
 #define N_EXCEPTION_STACKS_END \
 #define N_EXCEPTION_STACKS_END \
 		(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
 		(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)

+ 36 - 15
arch/x86/kernel/hw_breakpoint.c

@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
 {
 {
 	/* Len */
 	/* Len */
 	switch (x86_len) {
 	switch (x86_len) {
+	case X86_BREAKPOINT_LEN_X:
+		*gen_len = sizeof(long);
+		break;
 	case X86_BREAKPOINT_LEN_1:
 	case X86_BREAKPOINT_LEN_1:
 		*gen_len = HW_BREAKPOINT_LEN_1;
 		*gen_len = HW_BREAKPOINT_LEN_1;
 		break;
 		break;
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp)
 
 
 	info->address = bp->attr.bp_addr;
 	info->address = bp->attr.bp_addr;
 
 
+	/* Type */
+	switch (bp->attr.bp_type) {
+	case HW_BREAKPOINT_W:
+		info->type = X86_BREAKPOINT_WRITE;
+		break;
+	case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+		info->type = X86_BREAKPOINT_RW;
+		break;
+	case HW_BREAKPOINT_X:
+		info->type = X86_BREAKPOINT_EXECUTE;
+		/*
+		 * x86 inst breakpoints need to have a specific undefined len.
+		 * But we still need to check userspace is not trying to setup
+		 * an unsupported length, to get a range breakpoint for example.
+		 */
+		if (bp->attr.bp_len == sizeof(long)) {
+			info->len = X86_BREAKPOINT_LEN_X;
+			return 0;
+		}
+	default:
+		return -EINVAL;
+	}
+
 	/* Len */
 	/* Len */
 	switch (bp->attr.bp_len) {
 	switch (bp->attr.bp_len) {
 	case HW_BREAKPOINT_LEN_1:
 	case HW_BREAKPOINT_LEN_1:
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp)
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	/* Type */
-	switch (bp->attr.bp_type) {
-	case HW_BREAKPOINT_W:
-		info->type = X86_BREAKPOINT_WRITE;
-		break;
-	case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
-		info->type = X86_BREAKPOINT_RW;
-		break;
-	case HW_BREAKPOINT_X:
-		info->type = X86_BREAKPOINT_EXECUTE;
-		break;
-	default:
-		return -EINVAL;
-	}
-
 	return 0;
 	return 0;
 }
 }
 /*
 /*
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	ret = -EINVAL;
 	ret = -EINVAL;
 
 
 	switch (info->len) {
 	switch (info->len) {
+	case X86_BREAKPOINT_LEN_X:
+		align = sizeof(long) -1;
+		break;
 	case X86_BREAKPOINT_LEN_1:
 	case X86_BREAKPOINT_LEN_1:
 		align = 0;
 		align = 0;
 		break;
 		break;
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 
 
 		perf_bp_event(bp, args->regs);
 		perf_bp_event(bp, args->regs);
 
 
+		/*
+		 * Set up resume flag to avoid breakpoint recursion when
+		 * returning back to origin.
+		 */
+		if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
+			args->regs->flags |= X86_EFLAGS_RF;
+
 		rcu_read_unlock();
 		rcu_read_unlock();
 	}
 	}
 	/*
 	/*

+ 17 - 16
arch/x86/kernel/kprobes.c

@@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to)
 }
 }
 
 
 /*
 /*
- * Check for the REX prefix which can only exist on X86_64
- * X86_32 always returns 0
+ * Skip the prefixes of the instruction.
  */
  */
-static int __kprobes is_REX_prefix(kprobe_opcode_t *insn)
+static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
 {
 {
+	insn_attr_t attr;
+
+	attr = inat_get_opcode_attribute((insn_byte_t)*insn);
+	while (inat_is_legacy_prefix(attr)) {
+		insn++;
+		attr = inat_get_opcode_attribute((insn_byte_t)*insn);
+	}
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
-	if ((*insn & 0xf0) == 0x40)
-		return 1;
+	if (inat_is_rex_prefix(attr))
+		insn++;
 #endif
 #endif
-	return 0;
+	return insn;
 }
 }
 
 
 /*
 /*
@@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr)
  */
  */
 static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 {
 {
+	/* Skip prefixes */
+	insn = skip_prefixes(insn);
+
 	switch (*insn) {
 	switch (*insn) {
 	case 0xfa:		/* cli */
 	case 0xfa:		/* cli */
 	case 0xfb:		/* sti */
 	case 0xfb:		/* sti */
@@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 		return 1;
 		return 1;
 	}
 	}
 
 
-	/*
-	 * on X86_64, 0x40-0x4f are REX prefixes so we need to look
-	 * at the next byte instead.. but of course not recurse infinitely
-	 */
-	if (is_REX_prefix(insn))
-		return is_IF_modifier(++insn);
-
 	return 0;
 	return 0;
 }
 }
 
 
@@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p,
 	unsigned long orig_ip = (unsigned long)p->addr;
 	unsigned long orig_ip = (unsigned long)p->addr;
 	kprobe_opcode_t *insn = p->ainsn.insn;
 	kprobe_opcode_t *insn = p->ainsn.insn;
 
 
-	/*skip the REX prefix*/
-	if (is_REX_prefix(insn))
-		insn++;
+	/* Skip prefixes */
+	insn = skip_prefixes(insn);
 
 
 	regs->flags &= ~X86_EFLAGS_TF;
 	regs->flags &= ~X86_EFLAGS_TF;
 	switch (*insn) {
 	switch (*insn) {

+ 4 - 0
arch/x86/kernel/process_32.c

@@ -57,6 +57,8 @@
 #include <asm/syscalls.h>
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 #include <asm/debugreg.h>
 
 
+#include <trace/events/power.h>
+
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
 
 /*
 /*
@@ -111,6 +113,8 @@ void cpu_idle(void)
 			stop_critical_timings();
 			stop_critical_timings();
 			pm_idle();
 			pm_idle();
 			start_critical_timings();
 			start_critical_timings();
+
+			trace_power_end(smp_processor_id());
 		}
 		}
 		tick_nohz_restart_sched_tick();
 		tick_nohz_restart_sched_tick();
 		preempt_enable_no_resched();
 		preempt_enable_no_resched();

+ 5 - 0
arch/x86/kernel/process_64.c

@@ -51,6 +51,8 @@
 #include <asm/syscalls.h>
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 #include <asm/debugreg.h>
 
 
+#include <trace/events/power.h>
+
 asmlinkage extern void ret_from_fork(void);
 asmlinkage extern void ret_from_fork(void);
 
 
 DEFINE_PER_CPU(unsigned long, old_rsp);
 DEFINE_PER_CPU(unsigned long, old_rsp);
@@ -138,6 +140,9 @@ void cpu_idle(void)
 			stop_critical_timings();
 			stop_critical_timings();
 			pm_idle();
 			pm_idle();
 			start_critical_timings();
 			start_critical_timings();
+
+			trace_power_end(smp_processor_id());
+
 			/* In many cases the interrupt that ended idle
 			/* In many cases the interrupt that ended idle
 			   has already called exit_idle. But some idle
 			   has already called exit_idle. But some idle
 			   loops can be woken up without interrupt. */
 			   loops can be woken up without interrupt. */

+ 16 - 15
arch/x86/kernel/stacktrace.c

@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name)
 	return 0;
 	return 0;
 }
 }
 
 
-static void save_stack_address(void *data, unsigned long addr, int reliable)
+static void
+__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
 {
 {
 	struct stack_trace *trace = data;
 	struct stack_trace *trace = data;
+#ifdef CONFIG_FRAME_POINTER
 	if (!reliable)
 	if (!reliable)
 		return;
 		return;
+#endif
+	if (nosched && in_sched_functions(addr))
+		return;
 	if (trace->skip > 0) {
 	if (trace->skip > 0) {
 		trace->skip--;
 		trace->skip--;
 		return;
 		return;
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
 		trace->entries[trace->nr_entries++] = addr;
 		trace->entries[trace->nr_entries++] = addr;
 }
 }
 
 
+static void save_stack_address(void *data, unsigned long addr, int reliable)
+{
+	return __save_stack_address(data, addr, reliable, false);
+}
+
 static void
 static void
 save_stack_address_nosched(void *data, unsigned long addr, int reliable)
 save_stack_address_nosched(void *data, unsigned long addr, int reliable)
 {
 {
-	struct stack_trace *trace = (struct stack_trace *)data;
-	if (!reliable)
-		return;
-	if (in_sched_functions(addr))
-		return;
-	if (trace->skip > 0) {
-		trace->skip--;
-		return;
-	}
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = addr;
+	return __save_stack_address(data, addr, reliable, true);
 }
 }
 
 
 static const struct stacktrace_ops save_stack_ops = {
 static const struct stacktrace_ops save_stack_ops = {
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 
 
 /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
 /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
 
 
-struct stack_frame {
+struct stack_frame_user {
 	const void __user	*next_fp;
 	const void __user	*next_fp;
 	unsigned long		ret_addr;
 	unsigned long		ret_addr;
 };
 };
 
 
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+static int
+copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
 {
 {
 	int ret;
 	int ret;
 
 
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
 		trace->entries[trace->nr_entries++] = regs->ip;
 		trace->entries[trace->nr_entries++] = regs->ip;
 
 
 	while (trace->nr_entries < trace->max_entries) {
 	while (trace->nr_entries < trace->max_entries) {
-		struct stack_frame frame;
+		struct stack_frame_user frame;
 
 
 		frame.next_fp = NULL;
 		frame.next_fp = NULL;
 		frame.ret_addr = 0;
 		frame.ret_addr = 0;

+ 7 - 0
arch/x86/kernel/traps.c

@@ -392,7 +392,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
 		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
 								== NOTIFY_STOP)
 								== NOTIFY_STOP)
 			return;
 			return;
+
 #ifdef CONFIG_X86_LOCAL_APIC
 #ifdef CONFIG_X86_LOCAL_APIC
+		if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+							== NOTIFY_STOP)
+			return;
+
+#ifndef CONFIG_LOCKUP_DETECTOR
 		/*
 		/*
 		 * Ok, so this is none of the documented NMI sources,
 		 * Ok, so this is none of the documented NMI sources,
 		 * so it must be the NMI watchdog.
 		 * so it must be the NMI watchdog.
@@ -400,6 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 		if (nmi_watchdog_tick(regs, reason))
 		if (nmi_watchdog_tick(regs, reason))
 			return;
 			return;
 		if (!do_nmi_callback(regs, cpu))
 		if (!do_nmi_callback(regs, cpu))
+#endif /* !CONFIG_LOCKUP_DETECTOR */
 			unknown_nmi_error(reason, regs);
 			unknown_nmi_error(reason, regs);
 #else
 #else
 		unknown_nmi_error(reason, regs);
 		unknown_nmi_error(reason, regs);

+ 17 - 13
arch/x86/mm/pf_in.c

@@ -40,16 +40,16 @@ static unsigned char prefix_codes[] = {
 static unsigned int reg_rop[] = {
 static unsigned int reg_rop[] = {
 	0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
 	0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
 };
 };
-static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };
 static unsigned int imm_wop[] = { 0xC6, 0xC7 };
 static unsigned int imm_wop[] = { 0xC6, 0xC7 };
 /* IA32 Manual 3, 3-432*/
 /* IA32 Manual 3, 3-432*/
-static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 };
+static unsigned int rw8[] = { 0x88, 0x8A, 0xC6, 0xAA };
 static unsigned int rw32[] = {
 static unsigned int rw32[] = {
-	0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+	0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB
 };
 };
-static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F };
+static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F, 0xAA };
 static unsigned int mw16[] = { 0xB70F, 0xBF0F };
 static unsigned int mw16[] = { 0xB70F, 0xBF0F };
-static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 };
+static unsigned int mw32[] = { 0x89, 0x8B, 0xC7, 0xAB };
 static unsigned int mw64[] = {};
 static unsigned int mw64[] = {};
 #else /* not __i386__ */
 #else /* not __i386__ */
 static unsigned char prefix_codes[] = {
 static unsigned char prefix_codes[] = {
@@ -63,20 +63,20 @@ static unsigned char prefix_codes[] = {
 static unsigned int reg_rop[] = {
 static unsigned int reg_rop[] = {
 	0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
 	0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
 };
 };
-static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };
 static unsigned int imm_wop[] = { 0xC6, 0xC7 };
 static unsigned int imm_wop[] = { 0xC6, 0xC7 };
-static unsigned int rw8[] = { 0xC6, 0x88, 0x8A };
+static unsigned int rw8[] = { 0xC6, 0x88, 0x8A, 0xAA };
 static unsigned int rw32[] = {
 static unsigned int rw32[] = {
-	0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+	0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB
 };
 };
 /* 8 bit only */
 /* 8 bit only */
-static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F };
+static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F, 0xAA };
 /* 16 bit only */
 /* 16 bit only */
 static unsigned int mw16[] = { 0xB70F, 0xBF0F };
 static unsigned int mw16[] = { 0xB70F, 0xBF0F };
 /* 16 or 32 bit */
 /* 16 or 32 bit */
 static unsigned int mw32[] = { 0xC7 };
 static unsigned int mw32[] = { 0xC7 };
 /* 16, 32 or 64 bit */
 /* 16, 32 or 64 bit */
-static unsigned int mw64[] = { 0x89, 0x8B };
+static unsigned int mw64[] = { 0x89, 0x8B, 0xAB };
 #endif /* not __i386__ */
 #endif /* not __i386__ */
 
 
 struct prefix_bits {
 struct prefix_bits {
@@ -410,7 +410,6 @@ static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
 unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 {
 {
 	unsigned int opcode;
 	unsigned int opcode;
-	unsigned char mod_rm;
 	int reg;
 	int reg;
 	unsigned char *p;
 	unsigned char *p;
 	struct prefix_bits prf;
 	struct prefix_bits prf;
@@ -437,8 +436,13 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 	goto err;
 	goto err;
 
 
 do_work:
 do_work:
-	mod_rm = *p;
-	reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
+	/* for STOS, source register is fixed */
+	if (opcode == 0xAA || opcode == 0xAB) {
+		reg = arg_AX;
+	} else {
+		unsigned char mod_rm = *p;
+		reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
+	}
 	switch (get_ins_reg_width(ins_addr)) {
 	switch (get_ins_reg_width(ins_addr)) {
 	case 1:
 	case 1:
 		return *get_reg_w8(reg, prf.rex, regs);
 		return *get_reg_w8(reg, prf.rex, regs);

+ 14 - 2
arch/x86/oprofile/nmi_int.c

@@ -634,6 +634,18 @@ static int __init ppro_init(char **cpu_type)
 	if (force_arch_perfmon && cpu_has_arch_perfmon)
 	if (force_arch_perfmon && cpu_has_arch_perfmon)
 		return 0;
 		return 0;
 
 
+	/*
+	 * Documentation on identifying Intel processors by CPU family
+	 * and model can be found in the Intel Software Developer's
+	 * Manuals (SDM):
+	 *
+	 *  http://www.intel.com/products/processor/manuals/
+	 *
+	 * As of May 2010 the documentation for this was in the:
+	 * "Intel 64 and IA-32 Architectures Software Developer's
+	 * Manual Volume 3B: System Programming Guide", "Table B-1
+	 * CPUID Signature Values of DisplayFamily_DisplayModel".
+	 */
 	switch (cpu_model) {
 	switch (cpu_model) {
 	case 0 ... 2:
 	case 0 ... 2:
 		*cpu_type = "i386/ppro";
 		*cpu_type = "i386/ppro";
@@ -655,12 +667,12 @@ static int __init ppro_init(char **cpu_type)
 	case 15: case 23:
 	case 15: case 23:
 		*cpu_type = "i386/core_2";
 		*cpu_type = "i386/core_2";
 		break;
 		break;
+	case 0x1a:
 	case 0x2e:
 	case 0x2e:
-	case 26:
 		spec = &op_arch_perfmon_spec;
 		spec = &op_arch_perfmon_spec;
 		*cpu_type = "i386/core_i7";
 		*cpu_type = "i386/core_i7";
 		break;
 		break;
-	case 28:
+	case 0x1c:
 		*cpu_type = "i386/atom";
 		*cpu_type = "i386/atom";
 		break;
 		break;
 	default:
 	default:

+ 1 - 0
arch/xtensa/include/asm/local64.h

@@ -0,0 +1 @@
+#include <asm-generic/local64.h>

+ 2 - 1
drivers/oprofile/event_buffer.c

@@ -135,7 +135,7 @@ static int event_buffer_open(struct inode *inode, struct file *file)
 	 * echo 1 >/dev/oprofile/enable
 	 * echo 1 >/dev/oprofile/enable
 	 */
 	 */
 
 
-	return 0;
+	return nonseekable_open(inode, file);
 
 
 fail:
 fail:
 	dcookie_unregister(file->private_data);
 	dcookie_unregister(file->private_data);
@@ -205,4 +205,5 @@ const struct file_operations event_buffer_fops = {
 	.open		= event_buffer_open,
 	.open		= event_buffer_open,
 	.release	= event_buffer_release,
 	.release	= event_buffer_release,
 	.read		= event_buffer_read,
 	.read		= event_buffer_read,
+	.llseek		= no_llseek,
 };
 };

+ 1 - 0
fs/exec.c

@@ -653,6 +653,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 	else
 	else
 		stack_base = vma->vm_start - stack_expand;
 		stack_base = vma->vm_start - stack_expand;
 #endif
 #endif
+	current->mm->start_stack = bprm->p;
 	ret = expand_stack(vma, stack_base);
 	ret = expand_stack(vma, stack_base);
 	if (ret)
 	if (ret)
 		ret = -EFAULT;
 		ret = -EFAULT;

+ 96 - 0
include/asm-generic/local64.h

@@ -0,0 +1,96 @@
+#ifndef _ASM_GENERIC_LOCAL64_H
+#define _ASM_GENERIC_LOCAL64_H
+
+#include <linux/percpu.h>
+#include <asm/types.h>
+
+/*
+ * A signed long type for operations which are atomic for a single CPU.
+ * Usually used in combination with per-cpu variables.
+ *
+ * This is the default implementation, which uses atomic64_t.  Which is
+ * rather pointless.  The whole point behind local64_t is that some processors
+ * can perform atomic adds and subtracts in a manner which is atomic wrt IRQs
+ * running on this CPU.  local64_t allows exploitation of such capabilities.
+ */
+
+/* Implement in terms of atomics. */
+
+#if BITS_PER_LONG == 64
+
+#include <asm/local.h>
+
+typedef struct {
+	local_t a;
+} local64_t;
+
+#define LOCAL64_INIT(i)	{ LOCAL_INIT(i) }
+
+#define local64_read(l)		local_read(&(l)->a)
+#define local64_set(l,i)	local_set((&(l)->a),(i))
+#define local64_inc(l)		local_inc(&(l)->a)
+#define local64_dec(l)		local_dec(&(l)->a)
+#define local64_add(i,l)	local_add((i),(&(l)->a))
+#define local64_sub(i,l)	local_sub((i),(&(l)->a))
+
+#define local64_sub_and_test(i, l) local_sub_and_test((i), (&(l)->a))
+#define local64_dec_and_test(l) local_dec_and_test(&(l)->a)
+#define local64_inc_and_test(l) local_inc_and_test(&(l)->a)
+#define local64_add_negative(i, l) local_add_negative((i), (&(l)->a))
+#define local64_add_return(i, l) local_add_return((i), (&(l)->a))
+#define local64_sub_return(i, l) local_sub_return((i), (&(l)->a))
+#define local64_inc_return(l)	local_inc_return(&(l)->a)
+
+#define local64_cmpxchg(l, o, n) local_cmpxchg((&(l)->a), (o), (n))
+#define local64_xchg(l, n)	local_xchg((&(l)->a), (n))
+#define local64_add_unless(l, _a, u) local_add_unless((&(l)->a), (_a), (u))
+#define local64_inc_not_zero(l)	local_inc_not_zero(&(l)->a)
+
+/* Non-atomic variants, ie. preemption disabled and won't be touched
+ * in interrupt, etc.  Some archs can optimize this case well. */
+#define __local64_inc(l)	local64_set((l), local64_read(l) + 1)
+#define __local64_dec(l)	local64_set((l), local64_read(l) - 1)
+#define __local64_add(i,l)	local64_set((l), local64_read(l) + (i))
+#define __local64_sub(i,l)	local64_set((l), local64_read(l) - (i))
+
+#else /* BITS_PER_LONG != 64 */
+
+#include <asm/atomic.h>
+
+/* Don't use typedef: don't want them to be mixed with atomic_t's. */
+typedef struct {
+	atomic64_t a;
+} local64_t;
+
+#define LOCAL64_INIT(i)	{ ATOMIC_LONG_INIT(i) }
+
+#define local64_read(l)		atomic64_read(&(l)->a)
+#define local64_set(l,i)	atomic64_set((&(l)->a),(i))
+#define local64_inc(l)		atomic64_inc(&(l)->a)
+#define local64_dec(l)		atomic64_dec(&(l)->a)
+#define local64_add(i,l)	atomic64_add((i),(&(l)->a))
+#define local64_sub(i,l)	atomic64_sub((i),(&(l)->a))
+
+#define local64_sub_and_test(i, l) atomic64_sub_and_test((i), (&(l)->a))
+#define local64_dec_and_test(l) atomic64_dec_and_test(&(l)->a)
+#define local64_inc_and_test(l) atomic64_inc_and_test(&(l)->a)
+#define local64_add_negative(i, l) atomic64_add_negative((i), (&(l)->a))
+#define local64_add_return(i, l) atomic64_add_return((i), (&(l)->a))
+#define local64_sub_return(i, l) atomic64_sub_return((i), (&(l)->a))
+#define local64_inc_return(l)	atomic64_inc_return(&(l)->a)
+
+#define local64_cmpxchg(l, o, n) atomic64_cmpxchg((&(l)->a), (o), (n))
+#define local64_xchg(l, n)	atomic64_xchg((&(l)->a), (n))
+#define local64_add_unless(l, _a, u) atomic64_add_unless((&(l)->a), (_a), (u))
+#define local64_inc_not_zero(l)	atomic64_inc_not_zero(&(l)->a)
+
+/* Non-atomic variants, ie. preemption disabled and won't be touched
+ * in interrupt, etc.  Some archs can optimize this case well. */
+#define __local64_inc(l)	local64_set((l), local64_read(l) + 1)
+#define __local64_dec(l)	local64_set((l), local64_read(l) - 1)
+#define __local64_add(i,l)	local64_set((l), local64_read(l) + (i))
+#define __local64_sub(i,l)	local64_set((l), local64_read(l) - (i))
+
+#endif /* BITS_PER_LONG != 64 */
+
+#endif /* _ASM_GENERIC_LOCAL64_H */

+ 0 - 4
include/asm-generic/vmlinux.lds.h

@@ -156,10 +156,6 @@
 	CPU_KEEP(exit.data)						\
 	CPU_KEEP(exit.data)						\
 	MEM_KEEP(init.data)						\
 	MEM_KEEP(init.data)						\
 	MEM_KEEP(exit.data)						\
 	MEM_KEEP(exit.data)						\
-	. = ALIGN(8);							\
-	VMLINUX_SYMBOL(__start___markers) = .;				\
-	*(__markers)							\
-	VMLINUX_SYMBOL(__stop___markers) = .;				\
 	. = ALIGN(32);							\
 	. = ALIGN(32);							\
 	VMLINUX_SYMBOL(__start___tracepoints) = .;			\
 	VMLINUX_SYMBOL(__start___tracepoints) = .;			\
 	*(__tracepoints)						\
 	*(__tracepoints)						\

+ 5 - 0
include/linux/ftrace.h

@@ -1,3 +1,8 @@
+/*
+ * Ftrace header.  For implementation details beyond the random comments
+ * scattered below, see: Documentation/trace/ftrace-design.txt
+ */
+
 #ifndef _LINUX_FTRACE_H
 #ifndef _LINUX_FTRACE_H
 #define _LINUX_FTRACE_H
 #define _LINUX_FTRACE_H
 
 

+ 12 - 6
include/linux/ftrace_event.h

@@ -11,8 +11,6 @@ struct trace_array;
 struct tracer;
 struct tracer;
 struct dentry;
 struct dentry;
 
 
-DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
-
 struct trace_print_flags {
 struct trace_print_flags {
 	unsigned long		mask;
 	unsigned long		mask;
 	const char		*name;
 	const char		*name;
@@ -58,6 +56,9 @@ struct trace_iterator {
 	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
 	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
 	unsigned long		iter_flags;
 	unsigned long		iter_flags;
 
 
+	/* trace_seq for __print_flags() and __print_symbolic() etc. */
+	struct trace_seq	tmp_seq;
+
 	/* The below is zeroed out in pipe_read */
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
 	struct trace_seq	seq;
 	struct trace_entry	*ent;
 	struct trace_entry	*ent;
@@ -146,14 +147,19 @@ struct ftrace_event_class {
 	int			(*raw_init)(struct ftrace_event_call *);
 	int			(*raw_init)(struct ftrace_event_call *);
 };
 };
 
 
+extern int ftrace_event_reg(struct ftrace_event_call *event,
+			    enum trace_reg type);
+
 enum {
 enum {
 	TRACE_EVENT_FL_ENABLED_BIT,
 	TRACE_EVENT_FL_ENABLED_BIT,
 	TRACE_EVENT_FL_FILTERED_BIT,
 	TRACE_EVENT_FL_FILTERED_BIT,
+	TRACE_EVENT_FL_RECORDED_CMD_BIT,
 };
 };
 
 
 enum {
 enum {
-	TRACE_EVENT_FL_ENABLED	= (1 << TRACE_EVENT_FL_ENABLED_BIT),
-	TRACE_EVENT_FL_FILTERED	= (1 << TRACE_EVENT_FL_FILTERED_BIT),
+	TRACE_EVENT_FL_ENABLED		= (1 << TRACE_EVENT_FL_ENABLED_BIT),
+	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
+	TRACE_EVENT_FL_RECORDED_CMD	= (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
 };
 };
 
 
 struct ftrace_event_call {
 struct ftrace_event_call {
@@ -171,6 +177,7 @@ struct ftrace_event_call {
 	 * 32 bit flags:
 	 * 32 bit flags:
 	 *   bit 1:		enabled
 	 *   bit 1:		enabled
 	 *   bit 2:		filter_active
 	 *   bit 2:		filter_active
+	 *   bit 3:		enabled cmd record
 	 *
 	 *
 	 * Changes to flags must hold the event_mutex.
 	 * Changes to flags must hold the event_mutex.
 	 *
 	 *
@@ -257,8 +264,7 @@ static inline void
 perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
 perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
 		       u64 count, struct pt_regs *regs, void *head)
 		       u64 count, struct pt_regs *regs, void *head)
 {
 {
-	perf_tp_event(addr, count, raw_data, size, regs, head);
-	perf_swevent_put_recursion_context(rctx);
+	perf_tp_event(addr, count, raw_data, size, regs, head, rctx);
 }
 }
 #endif
 #endif
 
 

+ 0 - 5
include/linux/kernel.h

@@ -513,9 +513,6 @@ extern void tracing_start(void);
 extern void tracing_stop(void);
 extern void tracing_stop(void);
 extern void ftrace_off_permanent(void);
 extern void ftrace_off_permanent(void);
 
 
-extern void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
-
 static inline void __attribute__ ((format (printf, 1, 2)))
 static inline void __attribute__ ((format (printf, 1, 2)))
 ____trace_printk_check_format(const char *fmt, ...)
 ____trace_printk_check_format(const char *fmt, ...)
 {
 {
@@ -591,8 +588,6 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
 
 
 extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
 extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
 #else
 #else
-static inline void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 static inline int
 static inline int
 trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 
 

+ 0 - 25
include/linux/kmemtrace.h

@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- *
- * This file is released under GPL version 2.
- */
-
-#ifndef _LINUX_KMEMTRACE_H
-#define _LINUX_KMEMTRACE_H
-
-#ifdef __KERNEL__
-
-#include <trace/events/kmem.h>
-
-#ifdef CONFIG_KMEMTRACE
-extern void kmemtrace_init(void);
-#else
-static inline void kmemtrace_init(void)
-{
-}
-#endif
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_KMEMTRACE_H */
-

+ 13 - 0
include/linux/nmi.h

@@ -20,10 +20,14 @@ extern void touch_nmi_watchdog(void);
 extern void acpi_nmi_disable(void);
 extern void acpi_nmi_disable(void);
 extern void acpi_nmi_enable(void);
 extern void acpi_nmi_enable(void);
 #else
 #else
+#ifndef CONFIG_HARDLOCKUP_DETECTOR
 static inline void touch_nmi_watchdog(void)
 static inline void touch_nmi_watchdog(void)
 {
 {
 	touch_softlockup_watchdog();
 	touch_softlockup_watchdog();
 }
 }
+#else
+extern void touch_nmi_watchdog(void);
+#endif
 static inline void acpi_nmi_disable(void) { }
 static inline void acpi_nmi_disable(void) { }
 static inline void acpi_nmi_enable(void) { }
 static inline void acpi_nmi_enable(void) { }
 #endif
 #endif
@@ -47,4 +51,13 @@ static inline bool trigger_all_cpu_backtrace(void)
 }
 }
 #endif
 #endif
 
 
+#ifdef CONFIG_LOCKUP_DETECTOR
+int hw_nmi_is_cpu_stuck(struct pt_regs *);
+u64 hw_nmi_get_sample_period(void);
+extern int watchdog_enabled;
+struct ctl_table;
+extern int proc_dowatchdog_enabled(struct ctl_table *, int ,
+			void __user *, size_t *, loff_t *);
+#endif
+
 #endif
 #endif

+ 47 - 48
include/linux/perf_event.h

@@ -214,8 +214,9 @@ struct perf_event_attr {
 				 *  See also PERF_RECORD_MISC_EXACT_IP
 				 *  See also PERF_RECORD_MISC_EXACT_IP
 				 */
 				 */
 				precise_ip     :  2, /* skid constraint       */
 				precise_ip     :  2, /* skid constraint       */
+				mmap_data      :  1, /* non-exec mmap data    */
 
 
-				__reserved_1   : 47;
+				__reserved_1   : 46;
 
 
 	union {
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -461,6 +462,7 @@ enum perf_callchain_context {
 
 
 #ifdef CONFIG_PERF_EVENTS
 #ifdef CONFIG_PERF_EVENTS
 # include <asm/perf_event.h>
 # include <asm/perf_event.h>
+# include <asm/local64.h>
 #endif
 #endif
 
 
 struct perf_guest_info_callbacks {
 struct perf_guest_info_callbacks {
@@ -531,14 +533,16 @@ struct hw_perf_event {
 			struct hrtimer	hrtimer;
 			struct hrtimer	hrtimer;
 		};
 		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-		/* breakpoint */
-		struct arch_hw_breakpoint	info;
+		struct { /* breakpoint */
+			struct arch_hw_breakpoint	info;
+			struct list_head		bp_list;
+		};
 #endif
 #endif
 	};
 	};
-	atomic64_t			prev_count;
+	local64_t			prev_count;
 	u64				sample_period;
 	u64				sample_period;
 	u64				last_period;
 	u64				last_period;
-	atomic64_t			period_left;
+	local64_t			period_left;
 	u64				interrupts;
 	u64				interrupts;
 
 
 	u64				freq_time_stamp;
 	u64				freq_time_stamp;
@@ -548,7 +552,10 @@ struct hw_perf_event {
 
 
 struct perf_event;
 struct perf_event;
 
 
-#define PERF_EVENT_TXN_STARTED 1
+/*
+ * Common implementation detail of pmu::{start,commit,cancel}_txn
+ */
+#define PERF_EVENT_TXN 0x1
 
 
 /**
 /**
  * struct pmu - generic performance monitoring unit
  * struct pmu - generic performance monitoring unit
@@ -562,14 +569,28 @@ struct pmu {
 	void (*unthrottle)		(struct perf_event *event);
 	void (*unthrottle)		(struct perf_event *event);
 
 
 	/*
 	/*
-	 * group events scheduling is treated as a transaction,
-	 * add group events as a whole and perform one schedulability test.
-	 * If test fails, roll back the whole group
+	 * Group events scheduling is treated as a transaction, add group
+	 * events as a whole and perform one schedulability test. If the test
+	 * fails, roll back the whole group
 	 */
 	 */
 
 
+	/*
+	 * Start the transaction, after this ->enable() doesn't need
+	 * to do schedulability tests.
+	 */
 	void (*start_txn)	(const struct pmu *pmu);
 	void (*start_txn)	(const struct pmu *pmu);
-	void (*cancel_txn)	(const struct pmu *pmu);
+	/*
+	 * If ->start_txn() disabled the ->enable() schedulability test
+	 * then ->commit_txn() is required to perform one. On success
+	 * the transaction is closed. On error the transaction is kept
+	 * open until ->cancel_txn() is called.
+	 */
 	int  (*commit_txn)	(const struct pmu *pmu);
 	int  (*commit_txn)	(const struct pmu *pmu);
+	/*
+	 * Will cancel the transaction, assumes ->disable() is called for
+	 * each successfull ->enable() during the transaction.
+	 */
+	void (*cancel_txn)	(const struct pmu *pmu);
 };
 };
 
 
 /**
 /**
@@ -584,7 +605,9 @@ enum perf_event_active_state {
 
 
 struct file;
 struct file;
 
 
-struct perf_mmap_data {
+#define PERF_BUFFER_WRITABLE		0x01
+
+struct perf_buffer {
 	atomic_t			refcount;
 	atomic_t			refcount;
 	struct rcu_head			rcu_head;
 	struct rcu_head			rcu_head;
 #ifdef CONFIG_PERF_USE_VMALLOC
 #ifdef CONFIG_PERF_USE_VMALLOC
@@ -650,7 +673,8 @@ struct perf_event {
 
 
 	enum perf_event_active_state	state;
 	enum perf_event_active_state	state;
 	unsigned int			attach_state;
 	unsigned int			attach_state;
-	atomic64_t			count;
+	local64_t			count;
+	atomic64_t			child_count;
 
 
 	/*
 	/*
 	 * These are the total time in nanoseconds that the event
 	 * These are the total time in nanoseconds that the event
@@ -709,7 +733,7 @@ struct perf_event {
 	atomic_t			mmap_count;
 	atomic_t			mmap_count;
 	int				mmap_locked;
 	int				mmap_locked;
 	struct user_struct		*mmap_user;
 	struct user_struct		*mmap_user;
-	struct perf_mmap_data		*data;
+	struct perf_buffer		*buffer;
 
 
 	/* poll related */
 	/* poll related */
 	wait_queue_head_t		waitq;
 	wait_queue_head_t		waitq;
@@ -807,7 +831,7 @@ struct perf_cpu_context {
 
 
 struct perf_output_handle {
 struct perf_output_handle {
 	struct perf_event		*event;
 	struct perf_event		*event;
-	struct perf_mmap_data		*data;
+	struct perf_buffer		*buffer;
 	unsigned long			wakeup;
 	unsigned long			wakeup;
 	unsigned long			size;
 	unsigned long			size;
 	void				*addr;
 	void				*addr;
@@ -910,8 +934,10 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 
 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 
 
-extern void
-perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+#ifndef perf_arch_fetch_caller_regs
+static inline void
+perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
+#endif
 
 
 /*
 /*
  * Take a snapshot of the regs. Skip ip and frame pointer to
  * Take a snapshot of the regs. Skip ip and frame pointer to
@@ -921,31 +947,11 @@ perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
  * - bp for callchains
  * - bp for callchains
  * - eflags, for future purposes, just in case
  * - eflags, for future purposes, just in case
  */
  */
-static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
+static inline void perf_fetch_caller_regs(struct pt_regs *regs)
 {
 {
-	unsigned long ip;
-
 	memset(regs, 0, sizeof(*regs));
 	memset(regs, 0, sizeof(*regs));
 
 
-	switch (skip) {
-	case 1 :
-		ip = CALLER_ADDR0;
-		break;
-	case 2 :
-		ip = CALLER_ADDR1;
-		break;
-	case 3 :
-		ip = CALLER_ADDR2;
-		break;
-	case 4:
-		ip = CALLER_ADDR3;
-		break;
-	/* No need to support further for now */
-	default:
-		ip = 0;
-	}
-
-	return perf_arch_fetch_caller_regs(regs, ip, skip);
+	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
 }
 }
 
 
 static inline void
 static inline void
@@ -955,21 +961,14 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 		struct pt_regs hot_regs;
 		struct pt_regs hot_regs;
 
 
 		if (!regs) {
 		if (!regs) {
-			perf_fetch_caller_regs(&hot_regs, 1);
+			perf_fetch_caller_regs(&hot_regs);
 			regs = &hot_regs;
 			regs = &hot_regs;
 		}
 		}
 		__perf_sw_event(event_id, nr, nmi, regs, addr);
 		__perf_sw_event(event_id, nr, nmi, regs, addr);
 	}
 	}
 }
 }
 
 
-extern void __perf_event_mmap(struct vm_area_struct *vma);
-
-static inline void perf_event_mmap(struct vm_area_struct *vma)
-{
-	if (vma->vm_flags & VM_EXEC)
-		__perf_event_mmap(vma);
-}
-
+extern void perf_event_mmap(struct vm_area_struct *vma);
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1001,7 +1000,7 @@ static inline bool perf_paranoid_kernel(void)
 extern void perf_event_init(void);
 extern void perf_event_init(void);
 extern void perf_tp_event(u64 addr, u64 count, void *record,
 extern void perf_tp_event(u64 addr, u64 count, void *record,
 			  int entry_size, struct pt_regs *regs,
 			  int entry_size, struct pt_regs *regs,
-			  struct hlist_head *head);
+			  struct hlist_head *head, int rctx);
 extern void perf_bp_event(struct perf_event *event, void *data);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 
 #ifndef perf_misc_flags
 #ifndef perf_misc_flags

+ 4 - 20
include/linux/sched.h

@@ -316,20 +316,16 @@ extern void scheduler_tick(void);
 
 
 extern void sched_show_task(struct task_struct *p);
 extern void sched_show_task(struct task_struct *p);
 
 
-#ifdef CONFIG_DETECT_SOFTLOCKUP
-extern void softlockup_tick(void);
+#ifdef CONFIG_LOCKUP_DETECTOR
 extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-				    void __user *buffer,
-				    size_t *lenp, loff_t *ppos);
+extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
+				  void __user *buffer,
+				  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern unsigned int  softlockup_panic;
 extern int softlockup_thresh;
 extern int softlockup_thresh;
 #else
 #else
-static inline void softlockup_tick(void)
-{
-}
 static inline void touch_softlockup_watchdog(void)
 static inline void touch_softlockup_watchdog(void)
 {
 {
 }
 }
@@ -2435,18 +2431,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 
 #endif /* CONFIG_SMP */
 #endif /* CONFIG_SMP */
 
 
-#ifdef CONFIG_TRACING
-extern void
-__trace_special(void *__tr, void *__data,
-		unsigned long arg1, unsigned long arg2, unsigned long arg3);
-#else
-static inline void
-__trace_special(void *__tr, void *__data,
-		unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-}
-#endif
-
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
 

+ 2 - 1
include/linux/slab_def.h

@@ -14,7 +14,8 @@
 #include <asm/page.h>		/* kmalloc_sizes.h needs PAGE_SIZE */
 #include <asm/page.h>		/* kmalloc_sizes.h needs PAGE_SIZE */
 #include <asm/cache.h>		/* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <asm/cache.h>		/* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <linux/compiler.h>
 #include <linux/compiler.h>
-#include <linux/kmemtrace.h>
+
+#include <trace/events/kmem.h>
 
 
 #ifndef ARCH_KMALLOC_MINALIGN
 #ifndef ARCH_KMALLOC_MINALIGN
 /*
 /*

+ 2 - 1
include/linux/slub_def.h

@@ -10,9 +10,10 @@
 #include <linux/gfp.h>
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
 #include <linux/kobject.h>
-#include <linux/kmemtrace.h>
 #include <linux/kmemleak.h>
 #include <linux/kmemleak.h>
 
 
+#include <trace/events/kmem.h>
+
 enum stat_item {
 enum stat_item {
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
 	ALLOC_SLOWPATH,		/* Allocation by getting a new cpu slab */
 	ALLOC_SLOWPATH,		/* Allocation by getting a new cpu slab */

+ 0 - 2
include/linux/syscalls.h

@@ -167,7 +167,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.enter_event	= &event_enter_##sname,		\
 		.enter_event	= &event_enter_##sname,		\
 		.exit_event	= &event_exit_##sname,		\
 		.exit_event	= &event_exit_##sname,		\
 		.enter_fields	= LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \
 		.enter_fields	= LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \
-		.exit_fields	= LIST_HEAD_INIT(__syscall_meta_##sname.exit_fields), \
 	};
 	};
 
 
 #define SYSCALL_DEFINE0(sname)					\
 #define SYSCALL_DEFINE0(sname)					\
@@ -182,7 +181,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.enter_event	= &event_enter__##sname,	\
 		.enter_event	= &event_enter__##sname,	\
 		.exit_event	= &event_exit__##sname,		\
 		.exit_event	= &event_exit__##sname,		\
 		.enter_fields	= LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \
 		.enter_fields	= LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \
-		.exit_fields	= LIST_HEAD_INIT(__syscall_meta__##sname.exit_fields), \
 	};							\
 	};							\
 	asmlinkage long sys_##sname(void)
 	asmlinkage long sys_##sname(void)
 #else
 #else

+ 0 - 60
include/trace/boot.h

@@ -1,60 +0,0 @@
-#ifndef _LINUX_TRACE_BOOT_H
-#define _LINUX_TRACE_BOOT_H
-
-#include <linux/module.h>
-#include <linux/kallsyms.h>
-#include <linux/init.h>
-
-/*
- * Structure which defines the trace of an initcall
- * while it is called.
- * You don't have to fill the func field since it is
- * only used internally by the tracer.
- */
-struct boot_trace_call {
-	pid_t			caller;
-	char			func[KSYM_SYMBOL_LEN];
-};
-
-/*
- * Structure which defines the trace of an initcall
- * while it returns.
- */
-struct boot_trace_ret {
-	char			func[KSYM_SYMBOL_LEN];
-	int				result;
-	unsigned long long	duration;		/* nsecs */
-};
-
-#ifdef CONFIG_BOOT_TRACER
-/* Append the traces on the ring-buffer */
-extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn);
-extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn);
-
-/* Tells the tracer that smp_pre_initcall is finished.
- * So we can start the tracing
- */
-extern void start_boot_trace(void);
-
-/* Resume the tracing of other necessary events
- * such as sched switches
- */
-extern void enable_boot_trace(void);
-
-/* Suspend this tracing. Actually, only sched_switches tracing have
- * to be suspended. Initcalls doesn't need it.)
- */
-extern void disable_boot_trace(void);
-#else
-static inline
-void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { }
-
-static inline
-void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { }
-
-static inline void start_boot_trace(void) { }
-static inline void enable_boot_trace(void) { }
-static inline void disable_boot_trace(void) { }
-#endif /* CONFIG_BOOT_TRACER */
-
-#endif /* __LINUX_TRACE_BOOT_H */

+ 7 - 25
include/trace/events/sched.h

@@ -49,31 +49,6 @@ TRACE_EVENT(sched_kthread_stop_ret,
 	TP_printk("ret=%d", __entry->ret)
 	TP_printk("ret=%d", __entry->ret)
 );
 );
 
 
-/*
- * Tracepoint for waiting on task to unschedule:
- */
-TRACE_EVENT(sched_wait_task,
-
-	TP_PROTO(struct task_struct *p),
-
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid	= p->pid;
-		__entry->prio	= p->prio;
-	),
-
-	TP_printk("comm=%s pid=%d prio=%d",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
 /*
 /*
  * Tracepoint for waking up a task:
  * Tracepoint for waking up a task:
  */
  */
@@ -239,6 +214,13 @@ DEFINE_EVENT(sched_process_template, sched_process_exit,
 	     TP_PROTO(struct task_struct *p),
 	     TP_PROTO(struct task_struct *p),
 	     TP_ARGS(p));
 	     TP_ARGS(p));
 
 
+/*
+ * Tracepoint for waiting on task to unschedule:
+ */
+DEFINE_EVENT(sched_process_template, sched_wait_task,
+	TP_PROTO(struct task_struct *p),
+	TP_ARGS(p));
+
 /*
 /*
  * Tracepoint for a waiting task:
  * Tracepoint for a waiting task:
  */
  */

+ 32 - 48
include/trace/events/timer.h

@@ -8,11 +8,7 @@
 #include <linux/hrtimer.h>
 #include <linux/hrtimer.h>
 #include <linux/timer.h>
 #include <linux/timer.h>
 
 
-/**
- * timer_init - called when the timer is initialized
- * @timer:	pointer to struct timer_list
- */
-TRACE_EVENT(timer_init,
+DECLARE_EVENT_CLASS(timer_class,
 
 
 	TP_PROTO(struct timer_list *timer),
 	TP_PROTO(struct timer_list *timer),
 
 
@@ -29,6 +25,17 @@ TRACE_EVENT(timer_init,
 	TP_printk("timer=%p", __entry->timer)
 	TP_printk("timer=%p", __entry->timer)
 );
 );
 
 
+/**
+ * timer_init - called when the timer is initialized
+ * @timer:	pointer to struct timer_list
+ */
+DEFINE_EVENT(timer_class, timer_init,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer)
+);
+
 /**
 /**
  * timer_start - called when the timer is started
  * timer_start - called when the timer is started
  * @timer:	pointer to struct timer_list
  * @timer:	pointer to struct timer_list
@@ -94,42 +101,22 @@ TRACE_EVENT(timer_expire_entry,
  * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might
  * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might
  * be invalid. We solely track the pointer.
  * be invalid. We solely track the pointer.
  */
  */
-TRACE_EVENT(timer_expire_exit,
+DEFINE_EVENT(timer_class, timer_expire_exit,
 
 
 	TP_PROTO(struct timer_list *timer),
 	TP_PROTO(struct timer_list *timer),
 
 
-	TP_ARGS(timer),
-
-	TP_STRUCT__entry(
-		__field(void *,	timer	)
-	),
-
-	TP_fast_assign(
-		__entry->timer	= timer;
-	),
-
-	TP_printk("timer=%p", __entry->timer)
+	TP_ARGS(timer)
 );
 );
 
 
 /**
 /**
  * timer_cancel - called when the timer is canceled
  * timer_cancel - called when the timer is canceled
  * @timer:	pointer to struct timer_list
  * @timer:	pointer to struct timer_list
  */
  */
-TRACE_EVENT(timer_cancel,
+DEFINE_EVENT(timer_class, timer_cancel,
 
 
 	TP_PROTO(struct timer_list *timer),
 	TP_PROTO(struct timer_list *timer),
 
 
-	TP_ARGS(timer),
-
-	TP_STRUCT__entry(
-		__field( void *,	timer	)
-	),
-
-	TP_fast_assign(
-		__entry->timer	= timer;
-	),
-
-	TP_printk("timer=%p", __entry->timer)
+	TP_ARGS(timer)
 );
 );
 
 
 /**
 /**
@@ -224,14 +211,7 @@ TRACE_EVENT(hrtimer_expire_entry,
 		  (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
 		  (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
  );
  );
 
 
-/**
- * hrtimer_expire_exit - called immediately after the hrtimer callback returns
- * @timer:	pointer to struct hrtimer
- *
- * When used in combination with the hrtimer_expire_entry tracepoint we can
- * determine the runtime of the callback function.
- */
-TRACE_EVENT(hrtimer_expire_exit,
+DECLARE_EVENT_CLASS(hrtimer_class,
 
 
 	TP_PROTO(struct hrtimer *hrtimer),
 	TP_PROTO(struct hrtimer *hrtimer),
 
 
@@ -249,24 +229,28 @@ TRACE_EVENT(hrtimer_expire_exit,
 );
 );
 
 
 /**
 /**
- * hrtimer_cancel - called when the hrtimer is canceled
- * @hrtimer:	pointer to struct hrtimer
+ * hrtimer_expire_exit - called immediately after the hrtimer callback returns
+ * @timer:	pointer to struct hrtimer
+ *
+ * When used in combination with the hrtimer_expire_entry tracepoint we can
+ * determine the runtime of the callback function.
  */
  */
-TRACE_EVENT(hrtimer_cancel,
+DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit,
 
 
 	TP_PROTO(struct hrtimer *hrtimer),
 	TP_PROTO(struct hrtimer *hrtimer),
 
 
-	TP_ARGS(hrtimer),
+	TP_ARGS(hrtimer)
+);
 
 
-	TP_STRUCT__entry(
-		__field( void *,	hrtimer	)
-	),
+/**
+ * hrtimer_cancel - called when the hrtimer is canceled
+ * @hrtimer:	pointer to struct hrtimer
+ */
+DEFINE_EVENT(hrtimer_class, hrtimer_cancel,
 
 
-	TP_fast_assign(
-		__entry->hrtimer	= hrtimer;
-	),
+	TP_PROTO(struct hrtimer *hrtimer),
 
 
-	TP_printk("hrtimer=%p", __entry->hrtimer)
+	TP_ARGS(hrtimer)
 );
 );
 
 
 /**
 /**

+ 8 - 15
include/trace/ftrace.h

@@ -75,15 +75,12 @@
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
 
-#undef __cpparg
-#define __cpparg(arg...) arg
-
 /* Callbacks are meaningless to ftrace. */
 /* Callbacks are meaningless to ftrace. */
 #undef TRACE_EVENT_FN
 #undef TRACE_EVENT_FN
 #define TRACE_EVENT_FN(name, proto, args, tstruct,			\
 #define TRACE_EVENT_FN(name, proto, args, tstruct,			\
 		assign, print, reg, unreg)				\
 		assign, print, reg, unreg)				\
-	TRACE_EVENT(name, __cpparg(proto), __cpparg(args),		\
-		__cpparg(tstruct), __cpparg(assign), __cpparg(print))	\
+	TRACE_EVENT(name, PARAMS(proto), PARAMS(args),			\
+		PARAMS(tstruct), PARAMS(assign), PARAMS(print))		\
 
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 
@@ -145,7 +142,7 @@
  *	struct trace_seq *s = &iter->seq;
  *	struct trace_seq *s = &iter->seq;
  *	struct ftrace_raw_<call> *field; <-- defined in stage 1
  *	struct ftrace_raw_<call> *field; <-- defined in stage 1
  *	struct trace_entry *entry;
  *	struct trace_entry *entry;
- *	struct trace_seq *p;
+ *	struct trace_seq *p = &iter->tmp_seq;
  *	int ret;
  *	int ret;
  *
  *
  *	entry = iter->ent;
  *	entry = iter->ent;
@@ -157,12 +154,10 @@
  *
  *
  *	field = (typeof(field))entry;
  *	field = (typeof(field))entry;
  *
  *
- *	p = &get_cpu_var(ftrace_event_seq);
  *	trace_seq_init(p);
  *	trace_seq_init(p);
  *	ret = trace_seq_printf(s, "%s: ", <call>);
  *	ret = trace_seq_printf(s, "%s: ", <call>);
  *	if (ret)
  *	if (ret)
  *		ret = trace_seq_printf(s, <TP_printk> "\n");
  *		ret = trace_seq_printf(s, <TP_printk> "\n");
- *	put_cpu();
  *	if (!ret)
  *	if (!ret)
  *		return TRACE_TYPE_PARTIAL_LINE;
  *		return TRACE_TYPE_PARTIAL_LINE;
  *
  *
@@ -216,7 +211,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,	\
 	struct trace_seq *s = &iter->seq;				\
 	struct trace_seq *s = &iter->seq;				\
 	struct ftrace_raw_##call *field;				\
 	struct ftrace_raw_##call *field;				\
 	struct trace_entry *entry;					\
 	struct trace_entry *entry;					\
-	struct trace_seq *p;						\
+	struct trace_seq *p = &iter->tmp_seq;				\
 	int ret;							\
 	int ret;							\
 									\
 									\
 	event = container_of(trace_event, struct ftrace_event_call,	\
 	event = container_of(trace_event, struct ftrace_event_call,	\
@@ -231,12 +226,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,	\
 									\
 									\
 	field = (typeof(field))entry;					\
 	field = (typeof(field))entry;					\
 									\
 									\
-	p = &get_cpu_var(ftrace_event_seq);				\
 	trace_seq_init(p);						\
 	trace_seq_init(p);						\
 	ret = trace_seq_printf(s, "%s: ", event->name);			\
 	ret = trace_seq_printf(s, "%s: ", event->name);			\
 	if (ret)							\
 	if (ret)							\
 		ret = trace_seq_printf(s, print);			\
 		ret = trace_seq_printf(s, print);			\
-	put_cpu();							\
 	if (!ret)							\
 	if (!ret)							\
 		return TRACE_TYPE_PARTIAL_LINE;				\
 		return TRACE_TYPE_PARTIAL_LINE;				\
 									\
 									\
@@ -255,7 +248,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,	\
 	struct trace_seq *s = &iter->seq;				\
 	struct trace_seq *s = &iter->seq;				\
 	struct ftrace_raw_##template *field;				\
 	struct ftrace_raw_##template *field;				\
 	struct trace_entry *entry;					\
 	struct trace_entry *entry;					\
-	struct trace_seq *p;						\
+	struct trace_seq *p = &iter->tmp_seq;				\
 	int ret;							\
 	int ret;							\
 									\
 									\
 	entry = iter->ent;						\
 	entry = iter->ent;						\
@@ -267,12 +260,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,	\
 									\
 									\
 	field = (typeof(field))entry;					\
 	field = (typeof(field))entry;					\
 									\
 									\
-	p = &get_cpu_var(ftrace_event_seq);				\
 	trace_seq_init(p);						\
 	trace_seq_init(p);						\
 	ret = trace_seq_printf(s, "%s: ", #call);			\
 	ret = trace_seq_printf(s, "%s: ", #call);			\
 	if (ret)							\
 	if (ret)							\
 		ret = trace_seq_printf(s, print);			\
 		ret = trace_seq_printf(s, print);			\
-	put_cpu();							\
 	if (!ret)							\
 	if (!ret)							\
 		return TRACE_TYPE_PARTIAL_LINE;				\
 		return TRACE_TYPE_PARTIAL_LINE;				\
 									\
 									\
@@ -439,6 +430,7 @@ static inline notrace int ftrace_get_offsets_##call(			\
  *	.fields			= LIST_HEAD_INIT(event_class_##call.fields),
  *	.fields			= LIST_HEAD_INIT(event_class_##call.fields),
  *	.raw_init		= trace_event_raw_init,
  *	.raw_init		= trace_event_raw_init,
  *	.probe			= ftrace_raw_event_##call,
  *	.probe			= ftrace_raw_event_##call,
+ *	.reg			= ftrace_event_reg,
  * };
  * };
  *
  *
  * static struct ftrace_event_call __used
  * static struct ftrace_event_call __used
@@ -567,6 +559,7 @@ static struct ftrace_event_class __used event_class_##call = {		\
 	.fields			= LIST_HEAD_INIT(event_class_##call.fields),\
 	.fields			= LIST_HEAD_INIT(event_class_##call.fields),\
 	.raw_init		= trace_event_raw_init,			\
 	.raw_init		= trace_event_raw_init,			\
 	.probe			= ftrace_raw_event_##call,		\
 	.probe			= ftrace_raw_event_##call,		\
+	.reg			= ftrace_event_reg,			\
 	_TRACE_PERF_INIT(call)						\
 	_TRACE_PERF_INIT(call)						\
 };
 };
 
 
@@ -705,7 +698,7 @@ perf_trace_##call(void *__data, proto)					\
 	int __data_size;						\
 	int __data_size;						\
 	int rctx;							\
 	int rctx;							\
 									\
 									\
-	perf_fetch_caller_regs(&__regs, 1);				\
+	perf_fetch_caller_regs(&__regs);				\
 									\
 									\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 	__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
 	__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\

+ 0 - 1
include/trace/syscall.h

@@ -26,7 +26,6 @@ struct syscall_metadata {
 	const char	**types;
 	const char	**types;
 	const char	**args;
 	const char	**args;
 	struct list_head enter_fields;
 	struct list_head enter_fields;
-	struct list_head exit_fields;
 
 
 	struct ftrace_event_call *enter_event;
 	struct ftrace_event_call *enter_event;
 	struct ftrace_event_call *exit_event;
 	struct ftrace_event_call *exit_event;

+ 10 - 19
init/main.c

@@ -66,11 +66,9 @@
 #include <linux/ftrace.h>
 #include <linux/ftrace.h>
 #include <linux/async.h>
 #include <linux/async.h>
 #include <linux/kmemcheck.h>
 #include <linux/kmemcheck.h>
-#include <linux/kmemtrace.h>
 #include <linux/sfi.h>
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
-#include <trace/boot.h>
 
 
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/bugs.h>
 #include <asm/bugs.h>
@@ -664,7 +662,6 @@ asmlinkage void __init start_kernel(void)
 #endif
 #endif
 	page_cgroup_init();
 	page_cgroup_init();
 	enable_debug_pagealloc();
 	enable_debug_pagealloc();
-	kmemtrace_init();
 	kmemleak_init();
 	kmemleak_init();
 	debug_objects_mem_init();
 	debug_objects_mem_init();
 	idr_init_cache();
 	idr_init_cache();
@@ -726,38 +723,33 @@ int initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
 core_param(initcall_debug, initcall_debug, bool, 0644);
 
 
 static char msgbuf[64];
 static char msgbuf[64];
-static struct boot_trace_call call;
-static struct boot_trace_ret ret;
 
 
 int do_one_initcall(initcall_t fn)
 int do_one_initcall(initcall_t fn)
 {
 {
 	int count = preempt_count();
 	int count = preempt_count();
 	ktime_t calltime, delta, rettime;
 	ktime_t calltime, delta, rettime;
+	unsigned long long duration;
+	int ret;
 
 
 	if (initcall_debug) {
 	if (initcall_debug) {
-		call.caller = task_pid_nr(current);
-		printk("calling  %pF @ %i\n", fn, call.caller);
+		printk("calling  %pF @ %i\n", fn, task_pid_nr(current));
 		calltime = ktime_get();
 		calltime = ktime_get();
-		trace_boot_call(&call, fn);
-		enable_boot_trace();
 	}
 	}
 
 
-	ret.result = fn();
+	ret = fn();
 
 
 	if (initcall_debug) {
 	if (initcall_debug) {
-		disable_boot_trace();
 		rettime = ktime_get();
 		rettime = ktime_get();
 		delta = ktime_sub(rettime, calltime);
 		delta = ktime_sub(rettime, calltime);
-		ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
-		trace_boot_ret(&ret, fn);
-		printk("initcall %pF returned %d after %Ld usecs\n", fn,
-			ret.result, ret.duration);
+		duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+		printk("initcall %pF returned %d after %lld usecs\n", fn,
+			ret, duration);
 	}
 	}
 
 
 	msgbuf[0] = 0;
 	msgbuf[0] = 0;
 
 
-	if (ret.result && ret.result != -ENODEV && initcall_debug)
-		sprintf(msgbuf, "error code %d ", ret.result);
+	if (ret && ret != -ENODEV && initcall_debug)
+		sprintf(msgbuf, "error code %d ", ret);
 
 
 	if (preempt_count() != count) {
 	if (preempt_count() != count) {
 		strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
 		strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -771,7 +763,7 @@ int do_one_initcall(initcall_t fn)
 		printk("initcall %pF returned with %s\n", fn, msgbuf);
 		printk("initcall %pF returned with %s\n", fn, msgbuf);
 	}
 	}
 
 
-	return ret.result;
+	return ret;
 }
 }
 
 
 
 
@@ -895,7 +887,6 @@ static int __init kernel_init(void * unused)
 	smp_prepare_cpus(setup_max_cpus);
 	smp_prepare_cpus(setup_max_cpus);
 
 
 	do_pre_smp_initcalls();
 	do_pre_smp_initcalls();
-	start_boot_trace();
 
 
 	smp_init();
 	smp_init();
 	sched_init_smp();
 	sched_init_smp();

+ 1 - 1
kernel/Makefile

@@ -76,8 +76,8 @@ obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_KGDB) += debug/
-obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
+obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o

+ 41 - 37
kernel/hw_breakpoint.c

@@ -41,6 +41,7 @@
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
+#include <linux/list.h>
 #include <linux/cpu.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/smp.h>
 
 
@@ -62,6 +63,9 @@ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
 
 
 static int nr_slots[TYPE_MAX];
 static int nr_slots[TYPE_MAX];
 
 
+/* Keep track of the breakpoints attached to tasks */
+static LIST_HEAD(bp_task_head);
+
 static int constraints_initialized;
 static int constraints_initialized;
 
 
 /* Gather the number of total pinned and un-pinned bp in a cpuset */
 /* Gather the number of total pinned and un-pinned bp in a cpuset */
@@ -103,33 +107,21 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 	return 0;
 	return 0;
 }
 }
 
 
-static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
+/*
+ * Count the number of breakpoints of the same type and same task.
+ * The given event must be not on the list.
+ */
+static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
 {
 {
-	struct perf_event_context *ctx = tsk->perf_event_ctxp;
-	struct list_head *list;
-	struct perf_event *bp;
-	unsigned long flags;
+	struct perf_event_context *ctx = bp->ctx;
+	struct perf_event *iter;
 	int count = 0;
 	int count = 0;
 
 
-	if (WARN_ONCE(!ctx, "No perf context for this task"))
-		return 0;
-
-	list = &ctx->event_list;
-
-	raw_spin_lock_irqsave(&ctx->lock, flags);
-
-	/*
-	 * The current breakpoint counter is not included in the list
-	 * at the open() callback time
-	 */
-	list_for_each_entry(bp, list, event_entry) {
-		if (bp->attr.type == PERF_TYPE_BREAKPOINT)
-			if (find_slot_idx(bp) == type)
-				count += hw_breakpoint_weight(bp);
+	list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
+		if (iter->ctx == ctx && find_slot_idx(iter) == type)
+			count += hw_breakpoint_weight(iter);
 	}
 	}
 
 
-	raw_spin_unlock_irqrestore(&ctx->lock, flags);
-
 	return count;
 	return count;
 }
 }
 
 
@@ -149,7 +141,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
 		if (!tsk)
 		if (!tsk)
 			slots->pinned += max_task_bp_pinned(cpu, type);
 			slots->pinned += max_task_bp_pinned(cpu, type);
 		else
 		else
-			slots->pinned += task_bp_pinned(tsk, type);
+			slots->pinned += task_bp_pinned(bp, type);
 		slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
 		slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
 
 
 		return;
 		return;
@@ -162,7 +154,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
 		if (!tsk)
 		if (!tsk)
 			nr += max_task_bp_pinned(cpu, type);
 			nr += max_task_bp_pinned(cpu, type);
 		else
 		else
-			nr += task_bp_pinned(tsk, type);
+			nr += task_bp_pinned(bp, type);
 
 
 		if (nr > slots->pinned)
 		if (nr > slots->pinned)
 			slots->pinned = nr;
 			slots->pinned = nr;
@@ -188,7 +180,7 @@ fetch_this_slot(struct bp_busy_slots *slots, int weight)
 /*
 /*
  * Add a pinned breakpoint for the given task in our constraint table
  * Add a pinned breakpoint for the given task in our constraint table
  */
  */
-static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
+static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
 				enum bp_type_idx type, int weight)
 				enum bp_type_idx type, int weight)
 {
 {
 	unsigned int *tsk_pinned;
 	unsigned int *tsk_pinned;
@@ -196,10 +188,11 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
 	int old_idx = 0;
 	int old_idx = 0;
 	int idx = 0;
 	int idx = 0;
 
 
-	old_count = task_bp_pinned(tsk, type);
+	old_count = task_bp_pinned(bp, type);
 	old_idx = old_count - 1;
 	old_idx = old_count - 1;
 	idx = old_idx + weight;
 	idx = old_idx + weight;
 
 
+	/* tsk_pinned[n] is the number of tasks having n breakpoints */
 	tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
 	tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
 	if (enable) {
 	if (enable) {
 		tsk_pinned[idx]++;
 		tsk_pinned[idx]++;
@@ -222,23 +215,30 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
 	int cpu = bp->cpu;
 	int cpu = bp->cpu;
 	struct task_struct *tsk = bp->ctx->task;
 	struct task_struct *tsk = bp->ctx->task;
 
 
+	/* Pinned counter cpu profiling */
+	if (!tsk) {
+
+		if (enable)
+			per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
+		else
+			per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
+		return;
+	}
+
 	/* Pinned counter task profiling */
 	/* Pinned counter task profiling */
-	if (tsk) {
-		if (cpu >= 0) {
-			toggle_bp_task_slot(tsk, cpu, enable, type, weight);
-			return;
-		}
 
 
+	if (!enable)
+		list_del(&bp->hw.bp_list);
+
+	if (cpu >= 0) {
+		toggle_bp_task_slot(bp, cpu, enable, type, weight);
+	} else {
 		for_each_online_cpu(cpu)
 		for_each_online_cpu(cpu)
-			toggle_bp_task_slot(tsk, cpu, enable, type, weight);
-		return;
+			toggle_bp_task_slot(bp, cpu, enable, type, weight);
 	}
 	}
 
 
-	/* Pinned counter cpu profiling */
 	if (enable)
 	if (enable)
-		per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
-	else
-		per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
+		list_add_tail(&bp->hw.bp_list, &bp_task_head);
 }
 }
 
 
 /*
 /*
@@ -312,6 +312,10 @@ static int __reserve_bp_slot(struct perf_event *bp)
 	weight = hw_breakpoint_weight(bp);
 	weight = hw_breakpoint_weight(bp);
 
 
 	fetch_bp_busy_slots(&slots, bp, type);
 	fetch_bp_busy_slots(&slots, bp, type);
+	/*
+	 * Simulate the addition of this breakpoint to the constraints
+	 * and see the result.
+	 */
 	fetch_this_slot(&slots, weight);
 	fetch_this_slot(&slots, weight);
 
 
 	/* Flexible counters need to keep at least one slot */
 	/* Flexible counters need to keep at least one slot */

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 218 - 208
kernel/perf_event.c


+ 3 - 3
kernel/sched.c

@@ -3726,7 +3726,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
  * off of preempt_enable. Kernel preemptions off return from interrupt
  * off of preempt_enable. Kernel preemptions off return from interrupt
  * occur there and call schedule directly.
  * occur there and call schedule directly.
  */
  */
-asmlinkage void __sched preempt_schedule(void)
+asmlinkage void __sched notrace preempt_schedule(void)
 {
 {
 	struct thread_info *ti = current_thread_info();
 	struct thread_info *ti = current_thread_info();
 
 
@@ -3738,9 +3738,9 @@ asmlinkage void __sched preempt_schedule(void)
 		return;
 		return;
 
 
 	do {
 	do {
-		add_preempt_count(PREEMPT_ACTIVE);
+		add_preempt_count_notrace(PREEMPT_ACTIVE);
 		schedule();
 		schedule();
-		sub_preempt_count(PREEMPT_ACTIVE);
+		sub_preempt_count_notrace(PREEMPT_ACTIVE);
 
 
 		/*
 		/*
 		 * Check again in case we missed a preemption opportunity
 		 * Check again in case we missed a preemption opportunity

+ 0 - 293
kernel/softlockup.c

@@ -1,293 +0,0 @@
-/*
- * Detect Soft Lockups
- *
- * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
- *
- * this code detects soft lockups: incidents in where on a CPU
- * the kernel does not reschedule for 10 seconds or more.
- */
-#include <linux/mm.h>
-#include <linux/cpu.h>
-#include <linux/nmi.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <linux/lockdep.h>
-#include <linux/notifier.h>
-#include <linux/module.h>
-#include <linux/sysctl.h>
-
-#include <asm/irq_regs.h>
-
-static DEFINE_SPINLOCK(print_lock);
-
-static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
-static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
-static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
-static DEFINE_PER_CPU(bool, softlock_touch_sync);
-
-static int __read_mostly did_panic;
-int __read_mostly softlockup_thresh = 60;
-
-/*
- * Should we panic (and reboot, if panic_timeout= is set) when a
- * soft-lockup occurs:
- */
-unsigned int __read_mostly softlockup_panic =
-				CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
-
-static int __init softlockup_panic_setup(char *str)
-{
-	softlockup_panic = simple_strtoul(str, NULL, 0);
-
-	return 1;
-}
-__setup("softlockup_panic=", softlockup_panic_setup);
-
-static int
-softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
-{
-	did_panic = 1;
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block panic_block = {
-	.notifier_call = softlock_panic,
-};
-
-/*
- * Returns seconds, approximately.  We don't need nanosecond
- * resolution, and we don't need to waste time with a big divide when
- * 2^30ns == 1.074s.
- */
-static unsigned long get_timestamp(int this_cpu)
-{
-	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
-}
-
-static void __touch_softlockup_watchdog(void)
-{
-	int this_cpu = raw_smp_processor_id();
-
-	__raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu);
-}
-
-void touch_softlockup_watchdog(void)
-{
-	__raw_get_cpu_var(softlockup_touch_ts) = 0;
-}
-EXPORT_SYMBOL(touch_softlockup_watchdog);
-
-void touch_softlockup_watchdog_sync(void)
-{
-	__raw_get_cpu_var(softlock_touch_sync) = true;
-	__raw_get_cpu_var(softlockup_touch_ts) = 0;
-}
-
-void touch_all_softlockup_watchdogs(void)
-{
-	int cpu;
-
-	/* Cause each CPU to re-update its timestamp rather than complain */
-	for_each_online_cpu(cpu)
-		per_cpu(softlockup_touch_ts, cpu) = 0;
-}
-EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
-
-int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-			     void __user *buffer,
-			     size_t *lenp, loff_t *ppos)
-{
-	touch_all_softlockup_watchdogs();
-	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-}
-
-/*
- * This callback runs from the timer interrupt, and checks
- * whether the watchdog thread has hung or not:
- */
-void softlockup_tick(void)
-{
-	int this_cpu = smp_processor_id();
-	unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu);
-	unsigned long print_ts;
-	struct pt_regs *regs = get_irq_regs();
-	unsigned long now;
-
-	/* Is detection switched off? */
-	if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) {
-		/* Be sure we don't false trigger if switched back on */
-		if (touch_ts)
-			per_cpu(softlockup_touch_ts, this_cpu) = 0;
-		return;
-	}
-
-	if (touch_ts == 0) {
-		if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
-			/*
-			 * If the time stamp was touched atomically
-			 * make sure the scheduler tick is up to date.
-			 */
-			per_cpu(softlock_touch_sync, this_cpu) = false;
-			sched_clock_tick();
-		}
-		__touch_softlockup_watchdog();
-		return;
-	}
-
-	print_ts = per_cpu(softlockup_print_ts, this_cpu);
-
-	/* report at most once a second */
-	if (print_ts == touch_ts || did_panic)
-		return;
-
-	/* do not print during early bootup: */
-	if (unlikely(system_state != SYSTEM_RUNNING)) {
-		__touch_softlockup_watchdog();
-		return;
-	}
-
-	now = get_timestamp(this_cpu);
-
-	/*
-	 * Wake up the high-prio watchdog task twice per
-	 * threshold timespan.
-	 */
-	if (time_after(now - softlockup_thresh/2, touch_ts))
-		wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
-
-	/* Warn about unreasonable delays: */
-	if (time_before_eq(now - softlockup_thresh, touch_ts))
-		return;
-
-	per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
-
-	spin_lock(&print_lock);
-	printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
-			this_cpu, now - touch_ts,
-			current->comm, task_pid_nr(current));
-	print_modules();
-	print_irqtrace_events(current);
-	if (regs)
-		show_regs(regs);
-	else
-		dump_stack();
-	spin_unlock(&print_lock);
-
-	if (softlockup_panic)
-		panic("softlockup: hung tasks");
-}
-
-/*
- * The watchdog thread - runs every second and touches the timestamp.
- */
-static int watchdog(void *__bind_cpu)
-{
-	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
-
-	sched_setscheduler(current, SCHED_FIFO, &param);
-
-	/* initialize timestamp */
-	__touch_softlockup_watchdog();
-
-	set_current_state(TASK_INTERRUPTIBLE);
-	/*
-	 * Run briefly once per second to reset the softlockup timestamp.
-	 * If this gets delayed for more than 60 seconds then the
-	 * debug-printout triggers in softlockup_tick().
-	 */
-	while (!kthread_should_stop()) {
-		__touch_softlockup_watchdog();
-		schedule();
-
-		if (kthread_should_stop())
-			break;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-	}
-	__set_current_state(TASK_RUNNING);
-
-	return 0;
-}
-
-/*
- * Create/destroy watchdog threads as CPUs come and go:
- */
-static int __cpuinit
-cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
-	int hotcpu = (unsigned long)hcpu;
-	struct task_struct *p;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		BUG_ON(per_cpu(softlockup_watchdog, hotcpu));
-		p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
-		if (IS_ERR(p)) {
-			printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
-			return NOTIFY_BAD;
-		}
-		per_cpu(softlockup_touch_ts, hotcpu) = 0;
-		per_cpu(softlockup_watchdog, hotcpu) = p;
-		kthread_bind(p, hotcpu);
-		break;
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		wake_up_process(per_cpu(softlockup_watchdog, hotcpu));
-		break;
-#ifdef CONFIG_HOTPLUG_CPU
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-		if (!per_cpu(softlockup_watchdog, hotcpu))
-			break;
-		/* Unbind so it can run.  Fall thru. */
-		kthread_bind(per_cpu(softlockup_watchdog, hotcpu),
-			     cpumask_any(cpu_online_mask));
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		p = per_cpu(softlockup_watchdog, hotcpu);
-		per_cpu(softlockup_watchdog, hotcpu) = NULL;
-		kthread_stop(p);
-		break;
-#endif /* CONFIG_HOTPLUG_CPU */
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block __cpuinitdata cpu_nfb = {
-	.notifier_call = cpu_callback
-};
-
-static int __initdata nosoftlockup;
-
-static int __init nosoftlockup_setup(char *str)
-{
-	nosoftlockup = 1;
-	return 1;
-}
-__setup("nosoftlockup", nosoftlockup_setup);
-
-static int __init spawn_softlockup_task(void)
-{
-	void *cpu = (void *)(long)smp_processor_id();
-	int err;
-
-	if (nosoftlockup)
-		return 0;
-
-	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
-	if (err == NOTIFY_BAD) {
-		BUG();
-		return 1;
-	}
-	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
-	register_cpu_notifier(&cpu_nfb);
-
-	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
-
-	return 0;
-}
-early_initcall(spawn_softlockup_task);

+ 33 - 22
kernel/sysctl.c

@@ -76,6 +76,10 @@
 #include <scsi/sg.h>
 #include <scsi/sg.h>
 #endif
 #endif
 
 
+#ifdef CONFIG_LOCKUP_DETECTOR
+#include <linux/nmi.h>
+#endif
+
 
 
 #if defined(CONFIG_SYSCTL)
 #if defined(CONFIG_SYSCTL)
 
 
@@ -106,7 +110,7 @@ extern int blk_iopoll_enabled;
 #endif
 #endif
 
 
 /* Constants used for minimum and  maximum */
 /* Constants used for minimum and  maximum */
-#ifdef CONFIG_DETECT_SOFTLOCKUP
+#ifdef CONFIG_LOCKUP_DETECTOR
 static int sixty = 60;
 static int sixty = 60;
 static int neg_one = -1;
 static int neg_one = -1;
 #endif
 #endif
@@ -710,7 +714,34 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0444,
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
 		.proc_handler	= proc_dointvec,
 	},
 	},
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+#if defined(CONFIG_LOCKUP_DETECTOR)
+	{
+		.procname       = "watchdog",
+		.data           = &watchdog_enabled,
+		.maxlen         = sizeof (int),
+		.mode           = 0644,
+		.proc_handler   = proc_dowatchdog_enabled,
+	},
+	{
+		.procname	= "watchdog_thresh",
+		.data		= &softlockup_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dowatchdog_thresh,
+		.extra1		= &neg_one,
+		.extra2		= &sixty,
+	},
+	{
+		.procname	= "softlockup_panic",
+		.data		= &softlockup_panic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
 	{
 	{
 		.procname       = "unknown_nmi_panic",
 		.procname       = "unknown_nmi_panic",
 		.data           = &unknown_nmi_panic,
 		.data           = &unknown_nmi_panic,
@@ -813,26 +844,6 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec,
 		.proc_handler	= proc_dointvec,
 	},
 	},
 #endif
 #endif
-#ifdef CONFIG_DETECT_SOFTLOCKUP
-	{
-		.procname	= "softlockup_panic",
-		.data		= &softlockup_panic,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one,
-	},
-	{
-		.procname	= "softlockup_thresh",
-		.data		= &softlockup_thresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dosoftlockup_thresh,
-		.extra1		= &neg_one,
-		.extra2		= &sixty,
-	},
-#endif
 #ifdef CONFIG_DETECT_HUNG_TASK
 #ifdef CONFIG_DETECT_HUNG_TASK
 	{
 	{
 		.procname	= "hung_task_panic",
 		.procname	= "hung_task_panic",

+ 0 - 1
kernel/timer.c

@@ -1302,7 +1302,6 @@ void run_local_timers(void)
 {
 {
 	hrtimer_run_queues();
 	hrtimer_run_queues();
 	raise_softirq(TIMER_SOFTIRQ);
 	raise_softirq(TIMER_SOFTIRQ);
-	softlockup_tick();
 }
 }
 
 
 /*
 /*

+ 0 - 68
kernel/trace/Kconfig

@@ -194,15 +194,6 @@ config PREEMPT_TRACER
 	  enabled. This option and the irqs-off timing option can be
 	  enabled. This option and the irqs-off timing option can be
 	  used together or separately.)
 	  used together or separately.)
 
 
-config SYSPROF_TRACER
-	bool "Sysprof Tracer"
-	depends on X86
-	select GENERIC_TRACER
-	select CONTEXT_SWITCH_TRACER
-	help
-	  This tracer provides the trace needed by the 'Sysprof' userspace
-	  tool.
-
 config SCHED_TRACER
 config SCHED_TRACER
 	bool "Scheduling Latency Tracer"
 	bool "Scheduling Latency Tracer"
 	select GENERIC_TRACER
 	select GENERIC_TRACER
@@ -229,23 +220,6 @@ config FTRACE_SYSCALLS
 	help
 	help
 	  Basic tracer to catch the syscall entry and exit events.
 	  Basic tracer to catch the syscall entry and exit events.
 
 
-config BOOT_TRACER
-	bool "Trace boot initcalls"
-	select GENERIC_TRACER
-	select CONTEXT_SWITCH_TRACER
-	help
-	  This tracer helps developers to optimize boot times: it records
-	  the timings of the initcalls and traces key events and the identity
-	  of tasks that can cause boot delays, such as context-switches.
-
-	  Its aim is to be parsed by the scripts/bootgraph.pl tool to
-	  produce pretty graphics about boot inefficiencies, giving a visual
-	  representation of the delays during initcalls - but the raw
-	  /debug/tracing/trace text output is readable too.
-
-	  You must pass in initcall_debug and ftrace=initcall to the kernel
-	  command line to enable this on bootup.
-
 config TRACE_BRANCH_PROFILING
 config TRACE_BRANCH_PROFILING
 	bool
 	bool
 	select GENERIC_TRACER
 	select GENERIC_TRACER
@@ -325,28 +299,6 @@ config BRANCH_TRACER
 
 
 	  Say N if unsure.
 	  Say N if unsure.
 
 
-config KSYM_TRACER
-	bool "Trace read and write access on kernel memory locations"
-	depends on HAVE_HW_BREAKPOINT
-	select TRACING
-	help
-	  This tracer helps find read and write operations on any given kernel
-	  symbol i.e. /proc/kallsyms.
-
-config PROFILE_KSYM_TRACER
-	bool "Profile all kernel memory accesses on 'watched' variables"
-	depends on KSYM_TRACER
-	help
-	  This tracer profiles kernel accesses on variables watched through the
-	  ksym tracer ftrace plugin. Depending upon the hardware, all read
-	  and write operations on kernel variables can be monitored for
-	  accesses.
-
-	  The results will be displayed in:
-	  /debugfs/tracing/profile_ksym
-
-	  Say N if unsure.
-
 config STACK_TRACER
 config STACK_TRACER
 	bool "Trace max stack"
 	bool "Trace max stack"
 	depends on HAVE_FUNCTION_TRACER
 	depends on HAVE_FUNCTION_TRACER
@@ -371,26 +323,6 @@ config STACK_TRACER
 
 
 	  Say N if unsure.
 	  Say N if unsure.
 
 
-config KMEMTRACE
-	bool "Trace SLAB allocations"
-	select GENERIC_TRACER
-	help
-	  kmemtrace provides tracing for slab allocator functions, such as
-	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
-	  data is then fed to the userspace application in order to analyse
-	  allocation hotspots, internal fragmentation and so on, making it
-	  possible to see how well an allocator performs, as well as debug
-	  and profile kernel code.
-
-	  This requires an userspace application to use. See
-	  Documentation/trace/kmemtrace.txt for more information.
-
-	  Saying Y will make the kernel somewhat larger and slower. However,
-	  if you disable kmemtrace at run-time or boot-time, the performance
-	  impact is minimal (depending on the arch the kernel is built for).
-
-	  If unsure, say N.
-
 config WORKQUEUE_TRACER
 config WORKQUEUE_TRACER
 	bool "Trace workqueues"
 	bool "Trace workqueues"
 	select GENERIC_TRACER
 	select GENERIC_TRACER

+ 0 - 4
kernel/trace/Makefile

@@ -30,7 +30,6 @@ obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_TRACING) += trace_printk.o
 obj-$(CONFIG_TRACING) += trace_printk.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
-obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
@@ -38,10 +37,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
 obj-$(CONFIG_NOP_TRACER) += trace_nop.o
 obj-$(CONFIG_NOP_TRACER) += trace_nop.o
 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
-obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
-obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
 ifeq ($(CONFIG_BLOCK),y)
 ifeq ($(CONFIG_BLOCK),y)
@@ -55,7 +52,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
 endif
 endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
-obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
 obj-$(CONFIG_EVENT_TRACING) += power-traces.o
 obj-$(CONFIG_EVENT_TRACING) += power-traces.o
 ifeq ($(CONFIG_TRACING),y)
 ifeq ($(CONFIG_TRACING),y)
 obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
 obj-$(CONFIG_KGDB_KDB) += trace_kdb.o

+ 2 - 3
kernel/trace/ftrace.c

@@ -1883,7 +1883,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
 	struct hlist_head *hhd;
 	struct hlist_head *hhd;
 	struct hlist_node *n;
 	struct hlist_node *n;
 	unsigned long key;
 	unsigned long key;
-	int resched;
 
 
 	key = hash_long(ip, FTRACE_HASH_BITS);
 	key = hash_long(ip, FTRACE_HASH_BITS);
 
 
@@ -1897,12 +1896,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
 	 * period. This syncs the hash iteration and freeing of items
 	 * period. This syncs the hash iteration and freeing of items
 	 * on the hash. rcu_read_lock is too dangerous here.
 	 * on the hash. rcu_read_lock is too dangerous here.
 	 */
 	 */
-	resched = ftrace_preempt_disable();
+	preempt_disable_notrace();
 	hlist_for_each_entry_rcu(entry, n, hhd, node) {
 	hlist_for_each_entry_rcu(entry, n, hhd, node) {
 		if (entry->ip == ip)
 		if (entry->ip == ip)
 			entry->ops->func(ip, parent_ip, &entry->data);
 			entry->ops->func(ip, parent_ip, &entry->data);
 	}
 	}
-	ftrace_preempt_enable(resched);
+	preempt_enable_notrace();
 }
 }
 
 
 static struct ftrace_ops trace_probe_ops __read_mostly =
 static struct ftrace_ops trace_probe_ops __read_mostly =

+ 0 - 529
kernel/trace/kmemtrace.c

@@ -1,529 +0,0 @@
-/*
- * Memory allocator tracing
- *
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- */
-
-#include <linux/tracepoint.h>
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/dcache.h>
-#include <linux/fs.h>
-
-#include <linux/kmemtrace.h>
-
-#include "trace_output.h"
-#include "trace.h"
-
-/* Select an alternative, minimalistic output than the original one */
-#define TRACE_KMEM_OPT_MINIMAL	0x1
-
-static struct tracer_opt kmem_opts[] = {
-	/* Default disable the minimalistic output */
-	{ TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
-	{ }
-};
-
-static struct tracer_flags kmem_tracer_flags = {
-	.val			= 0,
-	.opts			= kmem_opts
-};
-
-static struct trace_array *kmemtrace_array;
-
-/* Trace allocations */
-static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
-				   unsigned long call_site,
-				   const void *ptr,
-				   size_t bytes_req,
-				   size_t bytes_alloc,
-				   gfp_t gfp_flags,
-				   int node)
-{
-	struct ftrace_event_call *call = &event_kmem_alloc;
-	struct trace_array *tr = kmemtrace_array;
-	struct kmemtrace_alloc_entry *entry;
-	struct ring_buffer_event *event;
-
-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
-	if (!event)
-		return;
-
-	entry = ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0, 0);
-
-	entry->ent.type		= TRACE_KMEM_ALLOC;
-	entry->type_id		= type_id;
-	entry->call_site	= call_site;
-	entry->ptr		= ptr;
-	entry->bytes_req	= bytes_req;
-	entry->bytes_alloc	= bytes_alloc;
-	entry->gfp_flags	= gfp_flags;
-	entry->node		= node;
-
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
-
-	trace_wake_up();
-}
-
-static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
-				  unsigned long call_site,
-				  const void *ptr)
-{
-	struct ftrace_event_call *call = &event_kmem_free;
-	struct trace_array *tr = kmemtrace_array;
-	struct kmemtrace_free_entry *entry;
-	struct ring_buffer_event *event;
-
-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
-	if (!event)
-		return;
-	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0, 0);
-
-	entry->ent.type		= TRACE_KMEM_FREE;
-	entry->type_id		= type_id;
-	entry->call_site	= call_site;
-	entry->ptr		= ptr;
-
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
-
-	trace_wake_up();
-}
-
-static void kmemtrace_kmalloc(void *ignore,
-			      unsigned long call_site,
-			      const void *ptr,
-			      size_t bytes_req,
-			      size_t bytes_alloc,
-			      gfp_t gfp_flags)
-{
-	kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
-			bytes_req, bytes_alloc, gfp_flags, -1);
-}
-
-static void kmemtrace_kmem_cache_alloc(void *ignore,
-				       unsigned long call_site,
-				       const void *ptr,
-				       size_t bytes_req,
-				       size_t bytes_alloc,
-				       gfp_t gfp_flags)
-{
-	kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
-			bytes_req, bytes_alloc, gfp_flags, -1);
-}
-
-static void kmemtrace_kmalloc_node(void *ignore,
-				   unsigned long call_site,
-				   const void *ptr,
-				   size_t bytes_req,
-				   size_t bytes_alloc,
-				   gfp_t gfp_flags,
-				   int node)
-{
-	kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
-			bytes_req, bytes_alloc, gfp_flags, node);
-}
-
-static void kmemtrace_kmem_cache_alloc_node(void *ignore,
-					    unsigned long call_site,
-					    const void *ptr,
-					    size_t bytes_req,
-					    size_t bytes_alloc,
-					    gfp_t gfp_flags,
-					    int node)
-{
-	kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
-			bytes_req, bytes_alloc, gfp_flags, node);
-}
-
-static void
-kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr)
-{
-	kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
-}
-
-static void kmemtrace_kmem_cache_free(void *ignore,
-				      unsigned long call_site, const void *ptr)
-{
-	kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
-}
-
-static int kmemtrace_start_probes(void)
-{
-	int err;
-
-	err = register_trace_kmalloc(kmemtrace_kmalloc, NULL);
-	if (err)
-		return err;
-	err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
-	if (err)
-		return err;
-	err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
-	if (err)
-		return err;
-	err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
-	if (err)
-		return err;
-	err = register_trace_kfree(kmemtrace_kfree, NULL);
-	if (err)
-		return err;
-	err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
-
-	return err;
-}
-
-static void kmemtrace_stop_probes(void)
-{
-	unregister_trace_kmalloc(kmemtrace_kmalloc, NULL);
-	unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
-	unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
-	unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
-	unregister_trace_kfree(kmemtrace_kfree, NULL);
-	unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
-}
-
-static int kmem_trace_init(struct trace_array *tr)
-{
-	kmemtrace_array = tr;
-
-	tracing_reset_online_cpus(tr);
-
-	kmemtrace_start_probes();
-
-	return 0;
-}
-
-static void kmem_trace_reset(struct trace_array *tr)
-{
-	kmemtrace_stop_probes();
-}
-
-static void kmemtrace_headers(struct seq_file *s)
-{
-	/* Don't need headers for the original kmemtrace output */
-	if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
-		return;
-
-	seq_printf(s, "#\n");
-	seq_printf(s, "# ALLOC  TYPE  REQ   GIVEN  FLAGS     "
-			"      POINTER         NODE    CALLER\n");
-	seq_printf(s, "# FREE   |      |     |       |       "
-			"       |   |            |        |\n");
-	seq_printf(s, "# |\n\n");
-}
-
-/*
- * The following functions give the original output from kmemtrace,
- * plus the origin CPU, since reordering occurs in-kernel now.
- */
-
-#define KMEMTRACE_USER_ALLOC	0
-#define KMEMTRACE_USER_FREE	1
-
-struct kmemtrace_user_event {
-	u8			event_id;
-	u8			type_id;
-	u16			event_size;
-	u32			cpu;
-	u64			timestamp;
-	unsigned long		call_site;
-	unsigned long		ptr;
-};
-
-struct kmemtrace_user_event_alloc {
-	size_t			bytes_req;
-	size_t			bytes_alloc;
-	unsigned		gfp_flags;
-	int			node;
-};
-
-static enum print_line_t
-kmemtrace_print_alloc(struct trace_iterator *iter, int flags,
-		      struct trace_event *event)
-{
-	struct trace_seq *s = &iter->seq;
-	struct kmemtrace_alloc_entry *entry;
-	int ret;
-
-	trace_assign_type(entry, iter->ent);
-
-	ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
-	    "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
-	    entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
-	    (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
-	    (unsigned long)entry->gfp_flags, entry->node);
-
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-	return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free(struct trace_iterator *iter, int flags,
-		     struct trace_event *event)
-{
-	struct trace_seq *s = &iter->seq;
-	struct kmemtrace_free_entry *entry;
-	int ret;
-
-	trace_assign_type(entry, iter->ent);
-
-	ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
-			       entry->type_id, (void *)entry->call_site,
-			       (unsigned long)entry->ptr);
-
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-	return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags,
-			   struct trace_event *event)
-{
-	struct trace_seq *s = &iter->seq;
-	struct kmemtrace_alloc_entry *entry;
-	struct kmemtrace_user_event *ev;
-	struct kmemtrace_user_event_alloc *ev_alloc;
-
-	trace_assign_type(entry, iter->ent);
-
-	ev = trace_seq_reserve(s, sizeof(*ev));
-	if (!ev)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	ev->event_id		= KMEMTRACE_USER_ALLOC;
-	ev->type_id		= entry->type_id;
-	ev->event_size		= sizeof(*ev) + sizeof(*ev_alloc);
-	ev->cpu			= iter->cpu;
-	ev->timestamp		= iter->ts;
-	ev->call_site		= entry->call_site;
-	ev->ptr			= (unsigned long)entry->ptr;
-
-	ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
-	if (!ev_alloc)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	ev_alloc->bytes_req	= entry->bytes_req;
-	ev_alloc->bytes_alloc	= entry->bytes_alloc;
-	ev_alloc->gfp_flags	= entry->gfp_flags;
-	ev_alloc->node		= entry->node;
-
-	return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free_user(struct trace_iterator *iter, int flags,
-			  struct trace_event *event)
-{
-	struct trace_seq *s = &iter->seq;
-	struct kmemtrace_free_entry *entry;
-	struct kmemtrace_user_event *ev;
-
-	trace_assign_type(entry, iter->ent);
-
-	ev = trace_seq_reserve(s, sizeof(*ev));
-	if (!ev)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	ev->event_id		= KMEMTRACE_USER_FREE;
-	ev->type_id		= entry->type_id;
-	ev->event_size		= sizeof(*ev);
-	ev->cpu			= iter->cpu;
-	ev->timestamp		= iter->ts;
-	ev->call_site		= entry->call_site;
-	ev->ptr			= (unsigned long)entry->ptr;
-
-	return TRACE_TYPE_HANDLED;
-}
-
-/* The two other following provide a more minimalistic output */
-static enum print_line_t
-kmemtrace_print_alloc_compress(struct trace_iterator *iter)
-{
-	struct kmemtrace_alloc_entry *entry;
-	struct trace_seq *s = &iter->seq;
-	int ret;
-
-	trace_assign_type(entry, iter->ent);
-
-	/* Alloc entry */
-	ret = trace_seq_printf(s, "  +      ");
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Type */
-	switch (entry->type_id) {
-	case KMEMTRACE_TYPE_KMALLOC:
-		ret = trace_seq_printf(s, "K   ");
-		break;
-	case KMEMTRACE_TYPE_CACHE:
-		ret = trace_seq_printf(s, "C   ");
-		break;
-	case KMEMTRACE_TYPE_PAGES:
-		ret = trace_seq_printf(s, "P   ");
-		break;
-	default:
-		ret = trace_seq_printf(s, "?   ");
-	}
-
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Requested */
-	ret = trace_seq_printf(s, "%4zu   ", entry->bytes_req);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Allocated */
-	ret = trace_seq_printf(s, "%4zu   ", entry->bytes_alloc);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Flags
-	 * TODO: would be better to see the name of the GFP flag names
-	 */
-	ret = trace_seq_printf(s, "%08x   ", entry->gfp_flags);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Pointer to allocated */
-	ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Node and call site*/
-	ret = trace_seq_printf(s, "%4d   %pf\n", entry->node,
-						 (void *)entry->call_site);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free_compress(struct trace_iterator *iter)
-{
-	struct kmemtrace_free_entry *entry;
-	struct trace_seq *s = &iter->seq;
-	int ret;
-
-	trace_assign_type(entry, iter->ent);
-
-	/* Free entry */
-	ret = trace_seq_printf(s, "  -      ");
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Type */
-	switch (entry->type_id) {
-	case KMEMTRACE_TYPE_KMALLOC:
-		ret = trace_seq_printf(s, "K     ");
-		break;
-	case KMEMTRACE_TYPE_CACHE:
-		ret = trace_seq_printf(s, "C     ");
-		break;
-	case KMEMTRACE_TYPE_PAGES:
-		ret = trace_seq_printf(s, "P     ");
-		break;
-	default:
-		ret = trace_seq_printf(s, "?     ");
-	}
-
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Skip requested/allocated/flags */
-	ret = trace_seq_printf(s, "                       ");
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Pointer to allocated */
-	ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Skip node and print call site*/
-	ret = trace_seq_printf(s, "       %pf\n", (void *)entry->call_site);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
-{
-	struct trace_entry *entry = iter->ent;
-
-	if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
-		return TRACE_TYPE_UNHANDLED;
-
-	switch (entry->type) {
-	case TRACE_KMEM_ALLOC:
-		return kmemtrace_print_alloc_compress(iter);
-	case TRACE_KMEM_FREE:
-		return kmemtrace_print_free_compress(iter);
-	default:
-		return TRACE_TYPE_UNHANDLED;
-	}
-}
-
-static struct trace_event_functions kmem_trace_alloc_funcs = {
-	.trace			= kmemtrace_print_alloc,
-	.binary			= kmemtrace_print_alloc_user,
-};
-
-static struct trace_event kmem_trace_alloc = {
-	.type			= TRACE_KMEM_ALLOC,
-	.funcs			= &kmem_trace_alloc_funcs,
-};
-
-static struct trace_event_functions kmem_trace_free_funcs = {
-	.trace			= kmemtrace_print_free,
-	.binary			= kmemtrace_print_free_user,
-};
-
-static struct trace_event kmem_trace_free = {
-	.type			= TRACE_KMEM_FREE,
-	.funcs			= &kmem_trace_free_funcs,
-};
-
-static struct tracer kmem_tracer __read_mostly = {
-	.name			= "kmemtrace",
-	.init			= kmem_trace_init,
-	.reset			= kmem_trace_reset,
-	.print_line		= kmemtrace_print_line,
-	.print_header		= kmemtrace_headers,
-	.flags			= &kmem_tracer_flags
-};
-
-void kmemtrace_init(void)
-{
-	/* earliest opportunity to start kmem tracing */
-}
-
-static int __init init_kmem_tracer(void)
-{
-	if (!register_ftrace_event(&kmem_trace_alloc)) {
-		pr_warning("Warning: could not register kmem events\n");
-		return 1;
-	}
-
-	if (!register_ftrace_event(&kmem_trace_free)) {
-		pr_warning("Warning: could not register kmem events\n");
-		return 1;
-	}
-
-	if (register_tracer(&kmem_tracer) != 0) {
-		pr_warning("Warning: could not register the kmem tracer\n");
-		return 1;
-	}
-
-	return 0;
-}
-device_initcall(init_kmem_tracer);

+ 9 - 31
kernel/trace/ring_buffer.c

@@ -443,6 +443,7 @@ int ring_buffer_print_page_header(struct trace_seq *s)
  */
  */
 struct ring_buffer_per_cpu {
 struct ring_buffer_per_cpu {
 	int				cpu;
 	int				cpu;
+	atomic_t			record_disabled;
 	struct ring_buffer		*buffer;
 	struct ring_buffer		*buffer;
 	spinlock_t			reader_lock;	/* serialize readers */
 	spinlock_t			reader_lock;	/* serialize readers */
 	arch_spinlock_t			lock;
 	arch_spinlock_t			lock;
@@ -462,7 +463,6 @@ struct ring_buffer_per_cpu {
 	unsigned long			read;
 	unsigned long			read;
 	u64				write_stamp;
 	u64				write_stamp;
 	u64				read_stamp;
 	u64				read_stamp;
-	atomic_t			record_disabled;
 };
 };
 
 
 struct ring_buffer {
 struct ring_buffer {
@@ -2242,8 +2242,6 @@ static void trace_recursive_unlock(void)
 
 
 #endif
 #endif
 
 
-static DEFINE_PER_CPU(int, rb_need_resched);
-
 /**
 /**
  * ring_buffer_lock_reserve - reserve a part of the buffer
  * ring_buffer_lock_reserve - reserve a part of the buffer
  * @buffer: the ring buffer to reserve from
  * @buffer: the ring buffer to reserve from
@@ -2264,13 +2262,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 {
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
 	struct ring_buffer_event *event;
-	int cpu, resched;
+	int cpu;
 
 
 	if (ring_buffer_flags != RB_BUFFERS_ON)
 	if (ring_buffer_flags != RB_BUFFERS_ON)
 		return NULL;
 		return NULL;
 
 
 	/* If we are tracing schedule, we don't want to recurse */
 	/* If we are tracing schedule, we don't want to recurse */
-	resched = ftrace_preempt_disable();
+	preempt_disable_notrace();
 
 
 	if (atomic_read(&buffer->record_disabled))
 	if (atomic_read(&buffer->record_disabled))
 		goto out_nocheck;
 		goto out_nocheck;
@@ -2295,21 +2293,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 	if (!event)
 	if (!event)
 		goto out;
 		goto out;
 
 
-	/*
-	 * Need to store resched state on this cpu.
-	 * Only the first needs to.
-	 */
-
-	if (preempt_count() == 1)
-		per_cpu(rb_need_resched, cpu) = resched;
-
 	return event;
 	return event;
 
 
  out:
  out:
 	trace_recursive_unlock();
 	trace_recursive_unlock();
 
 
  out_nocheck:
  out_nocheck:
-	ftrace_preempt_enable(resched);
+	preempt_enable_notrace();
 	return NULL;
 	return NULL;
 }
 }
 EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
 EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@ -2355,13 +2345,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
 
 	trace_recursive_unlock();
 	trace_recursive_unlock();
 
 
-	/*
-	 * Only the last preempt count needs to restore preemption.
-	 */
-	if (preempt_count() == 1)
-		ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
-	else
-		preempt_enable_no_resched_notrace();
+	preempt_enable_notrace();
 
 
 	return 0;
 	return 0;
 }
 }
@@ -2469,13 +2453,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 
 
 	trace_recursive_unlock();
 	trace_recursive_unlock();
 
 
-	/*
-	 * Only the last preempt count needs to restore preemption.
-	 */
-	if (preempt_count() == 1)
-		ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
-	else
-		preempt_enable_no_resched_notrace();
+	preempt_enable_notrace();
 
 
 }
 }
 EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
 EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
@@ -2501,12 +2479,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
 	struct ring_buffer_event *event;
 	struct ring_buffer_event *event;
 	void *body;
 	void *body;
 	int ret = -EBUSY;
 	int ret = -EBUSY;
-	int cpu, resched;
+	int cpu;
 
 
 	if (ring_buffer_flags != RB_BUFFERS_ON)
 	if (ring_buffer_flags != RB_BUFFERS_ON)
 		return -EBUSY;
 		return -EBUSY;
 
 
-	resched = ftrace_preempt_disable();
+	preempt_disable_notrace();
 
 
 	if (atomic_read(&buffer->record_disabled))
 	if (atomic_read(&buffer->record_disabled))
 		goto out;
 		goto out;
@@ -2536,7 +2514,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 
 
 	ret = 0;
 	ret = 0;
  out:
  out:
-	ftrace_preempt_enable(resched);
+	preempt_enable_notrace();
 
 
 	return ret;
 	return ret;
 }
 }

+ 55 - 72
kernel/trace/trace.c

@@ -341,7 +341,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 /* trace_flags holds trace_options default values */
 /* trace_flags holds trace_options default values */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
 	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
 	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
-	TRACE_ITER_GRAPH_TIME;
+	TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;
 
 
 static int trace_stop_count;
 static int trace_stop_count;
 static DEFINE_SPINLOCK(tracing_start_lock);
 static DEFINE_SPINLOCK(tracing_start_lock);
@@ -425,6 +425,7 @@ static const char *trace_options[] = {
 	"latency-format",
 	"latency-format",
 	"sleep-time",
 	"sleep-time",
 	"graph-time",
 	"graph-time",
+	"record-cmd",
 	NULL
 	NULL
 };
 };
 
 
@@ -656,6 +657,10 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 		return;
 		return;
 
 
 	WARN_ON_ONCE(!irqs_disabled());
 	WARN_ON_ONCE(!irqs_disabled());
+	if (!current_trace->use_max_tr) {
+		WARN_ON_ONCE(1);
+		return;
+	}
 	arch_spin_lock(&ftrace_max_lock);
 	arch_spin_lock(&ftrace_max_lock);
 
 
 	tr->buffer = max_tr.buffer;
 	tr->buffer = max_tr.buffer;
@@ -682,6 +687,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 		return;
 		return;
 
 
 	WARN_ON_ONCE(!irqs_disabled());
 	WARN_ON_ONCE(!irqs_disabled());
+	if (!current_trace->use_max_tr) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+
 	arch_spin_lock(&ftrace_max_lock);
 	arch_spin_lock(&ftrace_max_lock);
 
 
 	ftrace_disable_cpu();
 	ftrace_disable_cpu();
@@ -726,7 +736,7 @@ __acquires(kernel_lock)
 		return -1;
 		return -1;
 	}
 	}
 
 
-	if (strlen(type->name) > MAX_TRACER_SIZE) {
+	if (strlen(type->name) >= MAX_TRACER_SIZE) {
 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
 		return -1;
 		return -1;
 	}
 	}
@@ -1328,61 +1338,6 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
 
 
 #endif /* CONFIG_STACKTRACE */
 #endif /* CONFIG_STACKTRACE */
 
 
-static void
-ftrace_trace_special(void *__tr,
-		     unsigned long arg1, unsigned long arg2, unsigned long arg3,
-		     int pc)
-{
-	struct ftrace_event_call *call = &event_special;
-	struct ring_buffer_event *event;
-	struct trace_array *tr = __tr;
-	struct ring_buffer *buffer = tr->buffer;
-	struct special_entry *entry;
-
-	event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
-					  sizeof(*entry), 0, pc);
-	if (!event)
-		return;
-	entry	= ring_buffer_event_data(event);
-	entry->arg1			= arg1;
-	entry->arg2			= arg2;
-	entry->arg3			= arg3;
-
-	if (!filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, 0, pc);
-}
-
-void
-__trace_special(void *__tr, void *__data,
-		unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-	ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
-}
-
-void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-	struct trace_array *tr = &global_trace;
-	struct trace_array_cpu *data;
-	unsigned long flags;
-	int cpu;
-	int pc;
-
-	if (tracing_disabled)
-		return;
-
-	pc = preempt_count();
-	local_irq_save(flags);
-	cpu = raw_smp_processor_id();
-	data = tr->data[cpu];
-
-	if (likely(atomic_inc_return(&data->disabled) == 1))
-		ftrace_trace_special(tr, arg1, arg2, arg3, pc);
-
-	atomic_dec(&data->disabled);
-	local_irq_restore(flags);
-}
-
 /**
 /**
  * trace_vbprintk - write binary msg to tracing buffer
  * trace_vbprintk - write binary msg to tracing buffer
  *
  *
@@ -1401,7 +1356,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	struct bprint_entry *entry;
 	struct bprint_entry *entry;
 	unsigned long flags;
 	unsigned long flags;
 	int disable;
 	int disable;
-	int resched;
 	int cpu, len = 0, size, pc;
 	int cpu, len = 0, size, pc;
 
 
 	if (unlikely(tracing_selftest_running || tracing_disabled))
 	if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1411,7 +1365,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	pause_graph_tracing();
 	pause_graph_tracing();
 
 
 	pc = preempt_count();
 	pc = preempt_count();
-	resched = ftrace_preempt_disable();
+	preempt_disable_notrace();
 	cpu = raw_smp_processor_id();
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 	data = tr->data[cpu];
 
 
@@ -1449,7 +1403,7 @@ out_unlock:
 
 
 out:
 out:
 	atomic_dec_return(&data->disabled);
 	atomic_dec_return(&data->disabled);
-	ftrace_preempt_enable(resched);
+	preempt_enable_notrace();
 	unpause_graph_tracing();
 	unpause_graph_tracing();
 
 
 	return len;
 	return len;
@@ -2386,6 +2340,7 @@ static const struct file_operations show_traces_fops = {
 	.open		= show_traces_open,
 	.open		= show_traces_open,
 	.read		= seq_read,
 	.read		= seq_read,
 	.release	= seq_release,
 	.release	= seq_release,
+	.llseek		= seq_lseek,
 };
 };
 
 
 /*
 /*
@@ -2479,6 +2434,7 @@ static const struct file_operations tracing_cpumask_fops = {
 	.open		= tracing_open_generic,
 	.open		= tracing_open_generic,
 	.read		= tracing_cpumask_read,
 	.read		= tracing_cpumask_read,
 	.write		= tracing_cpumask_write,
 	.write		= tracing_cpumask_write,
+	.llseek		= generic_file_llseek,
 };
 };
 
 
 static int tracing_trace_options_show(struct seq_file *m, void *v)
 static int tracing_trace_options_show(struct seq_file *m, void *v)
@@ -2554,6 +2510,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
 		trace_flags |= mask;
 		trace_flags |= mask;
 	else
 	else
 		trace_flags &= ~mask;
 		trace_flags &= ~mask;
+
+	if (mask == TRACE_ITER_RECORD_CMD)
+		trace_event_enable_cmd_record(enabled);
 }
 }
 
 
 static ssize_t
 static ssize_t
@@ -2645,6 +2604,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf,
 static const struct file_operations tracing_readme_fops = {
 static const struct file_operations tracing_readme_fops = {
 	.open		= tracing_open_generic,
 	.open		= tracing_open_generic,
 	.read		= tracing_readme_read,
 	.read		= tracing_readme_read,
+	.llseek		= generic_file_llseek,
 };
 };
 
 
 static ssize_t
 static ssize_t
@@ -2695,6 +2655,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
 static const struct file_operations tracing_saved_cmdlines_fops = {
 static const struct file_operations tracing_saved_cmdlines_fops = {
     .open       = tracing_open_generic,
     .open       = tracing_open_generic,
     .read       = tracing_saved_cmdlines_read,
     .read       = tracing_saved_cmdlines_read,
+    .llseek	= generic_file_llseek,
 };
 };
 
 
 static ssize_t
 static ssize_t
@@ -2790,6 +2751,9 @@ static int tracing_resize_ring_buffer(unsigned long size)
 	if (ret < 0)
 	if (ret < 0)
 		return ret;
 		return ret;
 
 
+	if (!current_trace->use_max_tr)
+		goto out;
+
 	ret = ring_buffer_resize(max_tr.buffer, size);
 	ret = ring_buffer_resize(max_tr.buffer, size);
 	if (ret < 0) {
 	if (ret < 0) {
 		int r;
 		int r;
@@ -2817,11 +2781,14 @@ static int tracing_resize_ring_buffer(unsigned long size)
 		return ret;
 		return ret;
 	}
 	}
 
 
+	max_tr.entries = size;
+ out:
 	global_trace.entries = size;
 	global_trace.entries = size;
 
 
 	return ret;
 	return ret;
 }
 }
 
 
+
 /**
 /**
  * tracing_update_buffers - used by tracing facility to expand ring buffers
  * tracing_update_buffers - used by tracing facility to expand ring buffers
  *
  *
@@ -2882,12 +2849,26 @@ static int tracing_set_tracer(const char *buf)
 	trace_branch_disable();
 	trace_branch_disable();
 	if (current_trace && current_trace->reset)
 	if (current_trace && current_trace->reset)
 		current_trace->reset(tr);
 		current_trace->reset(tr);
-
+	if (current_trace && current_trace->use_max_tr) {
+		/*
+		 * We don't free the ring buffer. instead, resize it because
+		 * The max_tr ring buffer has some state (e.g. ring->clock) and
+		 * we want preserve it.
+		 */
+		ring_buffer_resize(max_tr.buffer, 1);
+		max_tr.entries = 1;
+	}
 	destroy_trace_option_files(topts);
 	destroy_trace_option_files(topts);
 
 
 	current_trace = t;
 	current_trace = t;
 
 
 	topts = create_trace_option_files(current_trace);
 	topts = create_trace_option_files(current_trace);
+	if (current_trace->use_max_tr) {
+		ret = ring_buffer_resize(max_tr.buffer, global_trace.entries);
+		if (ret < 0)
+			goto out;
+		max_tr.entries = global_trace.entries;
+	}
 
 
 	if (t->init) {
 	if (t->init) {
 		ret = tracer_init(t, tr);
 		ret = tracer_init(t, tr);
@@ -3024,6 +3005,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	if (iter->trace->pipe_open)
 	if (iter->trace->pipe_open)
 		iter->trace->pipe_open(iter);
 		iter->trace->pipe_open(iter);
 
 
+	nonseekable_open(inode, filp);
 out:
 out:
 	mutex_unlock(&trace_types_lock);
 	mutex_unlock(&trace_types_lock);
 	return ret;
 	return ret;
@@ -3469,7 +3451,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 	}
 	}
 
 
 	tracing_start();
 	tracing_start();
-	max_tr.entries = global_trace.entries;
 	mutex_unlock(&trace_types_lock);
 	mutex_unlock(&trace_types_lock);
 
 
 	return cnt;
 	return cnt;
@@ -3582,18 +3563,21 @@ static const struct file_operations tracing_max_lat_fops = {
 	.open		= tracing_open_generic,
 	.open		= tracing_open_generic,
 	.read		= tracing_max_lat_read,
 	.read		= tracing_max_lat_read,
 	.write		= tracing_max_lat_write,
 	.write		= tracing_max_lat_write,
+	.llseek		= generic_file_llseek,
 };
 };
 
 
 static const struct file_operations tracing_ctrl_fops = {
 static const struct file_operations tracing_ctrl_fops = {
 	.open		= tracing_open_generic,
 	.open		= tracing_open_generic,
 	.read		= tracing_ctrl_read,
 	.read		= tracing_ctrl_read,
 	.write		= tracing_ctrl_write,
 	.write		= tracing_ctrl_write,
+	.llseek		= generic_file_llseek,
 };
 };
 
 
 static const struct file_operations set_tracer_fops = {
 static const struct file_operations set_tracer_fops = {
 	.open		= tracing_open_generic,
 	.open		= tracing_open_generic,
 	.read		= tracing_set_trace_read,
 	.read		= tracing_set_trace_read,
 	.write		= tracing_set_trace_write,
 	.write		= tracing_set_trace_write,
+	.llseek		= generic_file_llseek,
 };
 };
 
 
 static const struct file_operations tracing_pipe_fops = {
 static const struct file_operations tracing_pipe_fops = {
@@ -3602,17 +3586,20 @@ static const struct file_operations tracing_pipe_fops = {
 	.read		= tracing_read_pipe,
 	.read		= tracing_read_pipe,
 	.splice_read	= tracing_splice_read_pipe,
 	.splice_read	= tracing_splice_read_pipe,
 	.release	= tracing_release_pipe,
 	.release	= tracing_release_pipe,
+	.llseek		= no_llseek,
 };
 };
 
 
 static const struct file_operations tracing_entries_fops = {
 static const struct file_operations tracing_entries_fops = {
 	.open		= tracing_open_generic,
 	.open		= tracing_open_generic,
 	.read		= tracing_entries_read,
 	.read		= tracing_entries_read,
 	.write		= tracing_entries_write,
 	.write		= tracing_entries_write,
+	.llseek		= generic_file_llseek,
 };
 };
 
 
 static const struct file_operations tracing_mark_fops = {
 static const struct file_operations tracing_mark_fops = {
 	.open		= tracing_open_generic,
 	.open		= tracing_open_generic,
 	.write		= tracing_mark_write,
 	.write		= tracing_mark_write,
+	.llseek		= generic_file_llseek,
 };
 };
 
 
 static const struct file_operations trace_clock_fops = {
 static const struct file_operations trace_clock_fops = {
@@ -3918,6 +3905,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 static const struct file_operations tracing_stats_fops = {
 static const struct file_operations tracing_stats_fops = {
 	.open		= tracing_open_generic,
 	.open		= tracing_open_generic,
 	.read		= tracing_stats_read,
 	.read		= tracing_stats_read,
+	.llseek		= generic_file_llseek,
 };
 };
 
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -3954,6 +3942,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
 static const struct file_operations tracing_dyn_info_fops = {
 static const struct file_operations tracing_dyn_info_fops = {
 	.open		= tracing_open_generic,
 	.open		= tracing_open_generic,
 	.read		= tracing_read_dyn_info,
 	.read		= tracing_read_dyn_info,
+	.llseek		= generic_file_llseek,
 };
 };
 #endif
 #endif
 
 
@@ -4107,6 +4096,7 @@ static const struct file_operations trace_options_fops = {
 	.open = tracing_open_generic,
 	.open = tracing_open_generic,
 	.read = trace_options_read,
 	.read = trace_options_read,
 	.write = trace_options_write,
 	.write = trace_options_write,
+	.llseek	= generic_file_llseek,
 };
 };
 
 
 static ssize_t
 static ssize_t
@@ -4158,6 +4148,7 @@ static const struct file_operations trace_options_core_fops = {
 	.open = tracing_open_generic,
 	.open = tracing_open_generic,
 	.read = trace_options_core_read,
 	.read = trace_options_core_read,
 	.write = trace_options_core_write,
 	.write = trace_options_core_write,
+	.llseek = generic_file_llseek,
 };
 };
 
 
 struct dentry *trace_create_file(const char *name,
 struct dentry *trace_create_file(const char *name,
@@ -4347,9 +4338,6 @@ static __init int tracer_init_debugfs(void)
 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 #endif
 #endif
-#ifdef CONFIG_SYSPROF_TRACER
-	init_tracer_sysprof_debugfs(d_tracer);
-#endif
 
 
 	create_trace_options_dir();
 	create_trace_options_dir();
 
 
@@ -4576,16 +4564,14 @@ __init static int tracer_alloc_buffers(void)
 
 
 
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 #ifdef CONFIG_TRACER_MAX_TRACE
-	max_tr.buffer = ring_buffer_alloc(ring_buf_size,
-					     TRACE_BUFFER_FLAGS);
+	max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS);
 	if (!max_tr.buffer) {
 	if (!max_tr.buffer) {
 		printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
 		printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
 		WARN_ON(1);
 		WARN_ON(1);
 		ring_buffer_free(global_trace.buffer);
 		ring_buffer_free(global_trace.buffer);
 		goto out_free_cpumask;
 		goto out_free_cpumask;
 	}
 	}
-	max_tr.entries = ring_buffer_size(max_tr.buffer);
-	WARN_ON(max_tr.entries != global_trace.entries);
+	max_tr.entries = 1;
 #endif
 #endif
 
 
 	/* Allocate the first page for all buffers */
 	/* Allocate the first page for all buffers */
@@ -4598,9 +4584,6 @@ __init static int tracer_alloc_buffers(void)
 
 
 	register_tracer(&nop_trace);
 	register_tracer(&nop_trace);
 	current_trace = &nop_trace;
 	current_trace = &nop_trace;
-#ifdef CONFIG_BOOT_TRACER
-	register_tracer(&boot_tracer);
-#endif
 	/* All seems OK, enable tracing */
 	/* All seems OK, enable tracing */
 	tracing_disabled = 0;
 	tracing_disabled = 0;
 
 

+ 6 - 84
kernel/trace/trace.h

@@ -9,10 +9,7 @@
 #include <linux/mmiotrace.h>
 #include <linux/mmiotrace.h>
 #include <linux/tracepoint.h>
 #include <linux/tracepoint.h>
 #include <linux/ftrace.h>
 #include <linux/ftrace.h>
-#include <trace/boot.h>
-#include <linux/kmemtrace.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/hw_breakpoint.h>
-
 #include <linux/trace_seq.h>
 #include <linux/trace_seq.h>
 #include <linux/ftrace_event.h>
 #include <linux/ftrace_event.h>
 
 
@@ -25,30 +22,17 @@ enum trace_type {
 	TRACE_STACK,
 	TRACE_STACK,
 	TRACE_PRINT,
 	TRACE_PRINT,
 	TRACE_BPRINT,
 	TRACE_BPRINT,
-	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
 	TRACE_MMIO_MAP,
 	TRACE_BRANCH,
 	TRACE_BRANCH,
-	TRACE_BOOT_CALL,
-	TRACE_BOOT_RET,
 	TRACE_GRAPH_RET,
 	TRACE_GRAPH_RET,
 	TRACE_GRAPH_ENT,
 	TRACE_GRAPH_ENT,
 	TRACE_USER_STACK,
 	TRACE_USER_STACK,
-	TRACE_KMEM_ALLOC,
-	TRACE_KMEM_FREE,
 	TRACE_BLK,
 	TRACE_BLK,
-	TRACE_KSYM,
 
 
 	__TRACE_LAST_TYPE,
 	__TRACE_LAST_TYPE,
 };
 };
 
 
-enum kmemtrace_type_id {
-	KMEMTRACE_TYPE_KMALLOC = 0,	/* kmalloc() or kfree(). */
-	KMEMTRACE_TYPE_CACHE,		/* kmem_cache_*(). */
-	KMEMTRACE_TYPE_PAGES,		/* __get_free_pages() and friends. */
-};
-
-extern struct tracer boot_tracer;
 
 
 #undef __field
 #undef __field
 #define __field(type, item)		type	item;
 #define __field(type, item)		type	item;
@@ -204,23 +188,15 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
 		IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT);	\
 		IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT);	\
-		IF_ASSIGN(var, ent, struct special_entry, 0);		\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
 			  TRACE_MMIO_RW);				\
 			  TRACE_MMIO_RW);				\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_map,		\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_map,		\
 			  TRACE_MMIO_MAP);				\
 			  TRACE_MMIO_MAP);				\
-		IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
-		IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
 		IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
 		IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
 		IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry,	\
 		IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry,	\
 			  TRACE_GRAPH_ENT);		\
 			  TRACE_GRAPH_ENT);		\
 		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\
 		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\
 			  TRACE_GRAPH_RET);		\
 			  TRACE_GRAPH_RET);		\
-		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
-			  TRACE_KMEM_ALLOC);	\
-		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
-			  TRACE_KMEM_FREE);	\
-		IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
 		__ftrace_bad_type();					\
 		__ftrace_bad_type();					\
 	} while (0)
 	} while (0)
 
 
@@ -298,6 +274,7 @@ struct tracer {
 	struct tracer		*next;
 	struct tracer		*next;
 	int			print_max;
 	int			print_max;
 	struct tracer_flags	*flags;
 	struct tracer_flags	*flags;
+	int			use_max_tr;
 };
 };
 
 
 
 
@@ -318,7 +295,6 @@ struct dentry *trace_create_file(const char *name,
 				 const struct file_operations *fops);
 				 const struct file_operations *fops);
 
 
 struct dentry *tracing_init_dentry(void);
 struct dentry *tracing_init_dentry(void);
-void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
 
 struct ring_buffer_event;
 struct ring_buffer_event;
 
 
@@ -363,11 +339,6 @@ void tracing_sched_wakeup_trace(struct trace_array *tr,
 				struct task_struct *wakee,
 				struct task_struct *wakee,
 				struct task_struct *cur,
 				struct task_struct *cur,
 				unsigned long flags, int pc);
 				unsigned long flags, int pc);
-void trace_special(struct trace_array *tr,
-		   struct trace_array_cpu *data,
-		   unsigned long arg1,
-		   unsigned long arg2,
-		   unsigned long arg3, int pc);
 void trace_function(struct trace_array *tr,
 void trace_function(struct trace_array *tr,
 		    unsigned long ip,
 		    unsigned long ip,
 		    unsigned long parent_ip,
 		    unsigned long parent_ip,
@@ -398,8 +369,6 @@ extern cpumask_var_t __read_mostly tracing_buffer_mask;
 #define for_each_tracing_cpu(cpu)	\
 #define for_each_tracing_cpu(cpu)	\
 	for_each_cpu(cpu, tracing_buffer_mask)
 	for_each_cpu(cpu, tracing_buffer_mask)
 
 
-extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
-
 extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 
 
 extern unsigned long tracing_thresh;
 extern unsigned long tracing_thresh;
@@ -469,12 +438,8 @@ extern int trace_selftest_startup_nop(struct tracer *trace,
 					 struct trace_array *tr);
 					 struct trace_array *tr);
 extern int trace_selftest_startup_sched_switch(struct tracer *trace,
 extern int trace_selftest_startup_sched_switch(struct tracer *trace,
 					       struct trace_array *tr);
 					       struct trace_array *tr);
-extern int trace_selftest_startup_sysprof(struct tracer *trace,
-					       struct trace_array *tr);
 extern int trace_selftest_startup_branch(struct tracer *trace,
 extern int trace_selftest_startup_branch(struct tracer *trace,
 					 struct trace_array *tr);
 					 struct trace_array *tr);
-extern int trace_selftest_startup_ksym(struct tracer *trace,
-					 struct trace_array *tr);
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 
 extern void *head_page(struct trace_array_cpu *data);
 extern void *head_page(struct trace_array_cpu *data);
@@ -636,6 +601,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_LATENCY_FMT		= 0x20000,
 	TRACE_ITER_LATENCY_FMT		= 0x20000,
 	TRACE_ITER_SLEEP_TIME		= 0x40000,
 	TRACE_ITER_SLEEP_TIME		= 0x40000,
 	TRACE_ITER_GRAPH_TIME		= 0x80000,
 	TRACE_ITER_GRAPH_TIME		= 0x80000,
+	TRACE_ITER_RECORD_CMD		= 0x100000,
 };
 };
 
 
 /*
 /*
@@ -647,54 +613,6 @@ enum trace_iterator_flags {
 
 
 extern struct tracer nop_trace;
 extern struct tracer nop_trace;
 
 
-/**
- * ftrace_preempt_disable - disable preemption scheduler safe
- *
- * When tracing can happen inside the scheduler, there exists
- * cases that the tracing might happen before the need_resched
- * flag is checked. If this happens and the tracer calls
- * preempt_enable (after a disable), a schedule might take place
- * causing an infinite recursion.
- *
- * To prevent this, we read the need_resched flag before
- * disabling preemption. When we want to enable preemption we
- * check the flag, if it is set, then we call preempt_enable_no_resched.
- * Otherwise, we call preempt_enable.
- *
- * The rational for doing the above is that if need_resched is set
- * and we have yet to reschedule, we are either in an atomic location
- * (where we do not need to check for scheduling) or we are inside
- * the scheduler and do not want to resched.
- */
-static inline int ftrace_preempt_disable(void)
-{
-	int resched;
-
-	resched = need_resched();
-	preempt_disable_notrace();
-
-	return resched;
-}
-
-/**
- * ftrace_preempt_enable - enable preemption scheduler safe
- * @resched: the return value from ftrace_preempt_disable
- *
- * This is a scheduler safe way to enable preemption and not miss
- * any preemption checks. The disabled saved the state of preemption.
- * If resched is set, then we are either inside an atomic or
- * are inside the scheduler (we would have already scheduled
- * otherwise). In this case, we do not want to call normal
- * preempt_enable, but preempt_enable_no_resched instead.
- */
-static inline void ftrace_preempt_enable(int resched)
-{
-	if (resched)
-		preempt_enable_no_resched_notrace();
-	else
-		preempt_enable_notrace();
-}
-
 #ifdef CONFIG_BRANCH_TRACER
 #ifdef CONFIG_BRANCH_TRACER
 extern int enable_branch_tracing(struct trace_array *tr);
 extern int enable_branch_tracing(struct trace_array *tr);
 extern void disable_branch_tracing(void);
 extern void disable_branch_tracing(void);
@@ -785,6 +703,8 @@ struct filter_pred {
 	int 			pop_n;
 	int 			pop_n;
 };
 };
 
 
+extern struct list_head ftrace_common_fields;
+
 extern enum regex_type
 extern enum regex_type
 filter_parse_regex(char *buff, int len, char **search, int *not);
 filter_parse_regex(char *buff, int len, char **search, int *not);
 extern void print_event_filter(struct ftrace_event_call *call,
 extern void print_event_filter(struct ftrace_event_call *call,
@@ -814,6 +734,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 	return 0;
 	return 0;
 }
 }
 
 
+extern void trace_event_enable_cmd_record(bool enable);
+
 extern struct mutex event_mutex;
 extern struct mutex event_mutex;
 extern struct list_head ftrace_events;
 extern struct list_head ftrace_events;
 
 

+ 0 - 185
kernel/trace/trace_boot.c

@@ -1,185 +0,0 @@
-/*
- * ring buffer based initcalls tracer
- *
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <linux/ftrace.h>
-#include <linux/kallsyms.h>
-#include <linux/time.h>
-
-#include "trace.h"
-#include "trace_output.h"
-
-static struct trace_array *boot_trace;
-static bool pre_initcalls_finished;
-
-/* Tells the boot tracer that the pre_smp_initcalls are finished.
- * So we are ready .
- * It doesn't enable sched events tracing however.
- * You have to call enable_boot_trace to do so.
- */
-void start_boot_trace(void)
-{
-	pre_initcalls_finished = true;
-}
-
-void enable_boot_trace(void)
-{
-	if (boot_trace && pre_initcalls_finished)
-		tracing_start_sched_switch_record();
-}
-
-void disable_boot_trace(void)
-{
-	if (boot_trace && pre_initcalls_finished)
-		tracing_stop_sched_switch_record();
-}
-
-static int boot_trace_init(struct trace_array *tr)
-{
-	boot_trace = tr;
-
-	if (!tr)
-		return 0;
-
-	tracing_reset_online_cpus(tr);
-
-	tracing_sched_switch_assign_trace(tr);
-	return 0;
-}
-
-static enum print_line_t
-initcall_call_print_line(struct trace_iterator *iter)
-{
-	struct trace_entry *entry = iter->ent;
-	struct trace_seq *s = &iter->seq;
-	struct trace_boot_call *field;
-	struct boot_trace_call *call;
-	u64 ts;
-	unsigned long nsec_rem;
-	int ret;
-
-	trace_assign_type(field, entry);
-	call = &field->boot_call;
-	ts = iter->ts;
-	nsec_rem = do_div(ts, NSEC_PER_SEC);
-
-	ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
-			(unsigned long)ts, nsec_rem, call->func, call->caller);
-
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-	else
-		return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-initcall_ret_print_line(struct trace_iterator *iter)
-{
-	struct trace_entry *entry = iter->ent;
-	struct trace_seq *s = &iter->seq;
-	struct trace_boot_ret *field;
-	struct boot_trace_ret *init_ret;
-	u64 ts;
-	unsigned long nsec_rem;
-	int ret;
-
-	trace_assign_type(field, entry);
-	init_ret = &field->boot_ret;
-	ts = iter->ts;
-	nsec_rem = do_div(ts, NSEC_PER_SEC);
-
-	ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
-			"returned %d after %llu msecs\n",
-			(unsigned long) ts,
-			nsec_rem,
-			init_ret->func, init_ret->result, init_ret->duration);
-
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-	else
-		return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t initcall_print_line(struct trace_iterator *iter)
-{
-	struct trace_entry *entry = iter->ent;
-
-	switch (entry->type) {
-	case TRACE_BOOT_CALL:
-		return initcall_call_print_line(iter);
-	case TRACE_BOOT_RET:
-		return initcall_ret_print_line(iter);
-	default:
-		return TRACE_TYPE_UNHANDLED;
-	}
-}
-
-struct tracer boot_tracer __read_mostly =
-{
-	.name		= "initcall",
-	.init		= boot_trace_init,
-	.reset		= tracing_reset_online_cpus,
-	.print_line	= initcall_print_line,
-};
-
-void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
-{
-	struct ftrace_event_call *call = &event_boot_call;
-	struct ring_buffer_event *event;
-	struct ring_buffer *buffer;
-	struct trace_boot_call *entry;
-	struct trace_array *tr = boot_trace;
-
-	if (!tr || !pre_initcalls_finished)
-		return;
-
-	/* Get its name now since this function could
-	 * disappear because it is in the .init section.
-	 */
-	sprint_symbol(bt->func, (unsigned long)fn);
-	preempt_disable();
-
-	buffer = tr->buffer;
-	event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
-					  sizeof(*entry), 0, 0);
-	if (!event)
-		goto out;
-	entry	= ring_buffer_event_data(event);
-	entry->boot_call = *bt;
-	if (!filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
-	preempt_enable();
-}
-
-void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
-{
-	struct ftrace_event_call *call = &event_boot_ret;
-	struct ring_buffer_event *event;
-	struct ring_buffer *buffer;
-	struct trace_boot_ret *entry;
-	struct trace_array *tr = boot_trace;
-
-	if (!tr || !pre_initcalls_finished)
-		return;
-
-	sprint_symbol(bt->func, (unsigned long)fn);
-	preempt_disable();
-
-	buffer = tr->buffer;
-	event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
-					  sizeof(*entry), 0, 0);
-	if (!event)
-		goto out;
-	entry	= ring_buffer_event_data(event);
-	entry->boot_ret = *bt;
-	if (!filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
-	preempt_enable();
-}

+ 2 - 3
kernel/trace/trace_clock.c

@@ -32,16 +32,15 @@
 u64 notrace trace_clock_local(void)
 u64 notrace trace_clock_local(void)
 {
 {
 	u64 clock;
 	u64 clock;
-	int resched;
 
 
 	/*
 	/*
 	 * sched_clock() is an architecture implemented, fast, scalable,
 	 * sched_clock() is an architecture implemented, fast, scalable,
 	 * lockless clock. It is not guaranteed to be coherent across
 	 * lockless clock. It is not guaranteed to be coherent across
 	 * CPUs, nor across CPU idle events.
 	 * CPUs, nor across CPU idle events.
 	 */
 	 */
-	resched = ftrace_preempt_disable();
+	preempt_disable_notrace();
 	clock = sched_clock();
 	clock = sched_clock();
-	ftrace_preempt_enable(resched);
+	preempt_enable_notrace();
 
 
 	return clock;
 	return clock;
 }
 }

+ 0 - 94
kernel/trace/trace_entries.h

@@ -150,23 +150,6 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
 		)
 		)
 );
 );
 
 
-/*
- * Special (free-form) trace entry:
- */
-FTRACE_ENTRY(special, special_entry,
-
-	TRACE_SPECIAL,
-
-	F_STRUCT(
-		__field(	unsigned long,	arg1	)
-		__field(	unsigned long,	arg2	)
-		__field(	unsigned long,	arg3	)
-	),
-
-	F_printk("(%08lx) (%08lx) (%08lx)",
-		 __entry->arg1, __entry->arg2, __entry->arg3)
-);
-
 /*
 /*
  * Stack-trace entry:
  * Stack-trace entry:
  */
  */
@@ -271,33 +254,6 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
 		 __entry->map_id, __entry->opcode)
 		 __entry->map_id, __entry->opcode)
 );
 );
 
 
-FTRACE_ENTRY(boot_call, trace_boot_call,
-
-	TRACE_BOOT_CALL,
-
-	F_STRUCT(
-		__field_struct(	struct boot_trace_call,	boot_call	)
-		__field_desc(	pid_t,	boot_call,	caller		)
-		__array_desc(	char,	boot_call,	func,	KSYM_SYMBOL_LEN)
-	),
-
-	F_printk("%d  %s", __entry->caller, __entry->func)
-);
-
-FTRACE_ENTRY(boot_ret, trace_boot_ret,
-
-	TRACE_BOOT_RET,
-
-	F_STRUCT(
-		__field_struct(	struct boot_trace_ret,	boot_ret	)
-		__array_desc(	char,	boot_ret,	func,	KSYM_SYMBOL_LEN)
-		__field_desc(	int,	boot_ret,	result		)
-		__field_desc(	unsigned long, boot_ret, duration	)
-	),
-
-	F_printk("%s %d %lx",
-		 __entry->func, __entry->result, __entry->duration)
-);
 
 
 #define TRACE_FUNC_SIZE 30
 #define TRACE_FUNC_SIZE 30
 #define TRACE_FILE_SIZE 20
 #define TRACE_FILE_SIZE 20
@@ -318,53 +274,3 @@ FTRACE_ENTRY(branch, trace_branch,
 		 __entry->func, __entry->file, __entry->correct)
 		 __entry->func, __entry->file, __entry->correct)
 );
 );
 
 
-FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
-
-	TRACE_KMEM_ALLOC,
-
-	F_STRUCT(
-		__field(	enum kmemtrace_type_id,	type_id		)
-		__field(	unsigned long,		call_site	)
-		__field(	const void *,		ptr		)
-		__field(	size_t,			bytes_req	)
-		__field(	size_t,			bytes_alloc	)
-		__field(	gfp_t,			gfp_flags	)
-		__field(	int,			node		)
-	),
-
-	F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
-		 " flags:%x node:%d",
-		 __entry->type_id, __entry->call_site, __entry->ptr,
-		 __entry->bytes_req, __entry->bytes_alloc,
-		 __entry->gfp_flags, __entry->node)
-);
-
-FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
-
-	TRACE_KMEM_FREE,
-
-	F_STRUCT(
-		__field(	enum kmemtrace_type_id,	type_id		)
-		__field(	unsigned long,		call_site	)
-		__field(	const void *,		ptr		)
-	),
-
-	F_printk("type:%u call_site:%lx ptr:%p",
-		 __entry->type_id, __entry->call_site, __entry->ptr)
-);
-
-FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
-
-	TRACE_KSYM,
-
-	F_STRUCT(
-		__field(	unsigned long,	ip			  )
-		__field(	unsigned char,	type			  )
-		__array(	char	     ,	cmd,	   TASK_COMM_LEN  )
-		__field(	unsigned long,  addr			  )
-	),
-
-	F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
-		(void *)__entry->ip, (unsigned int)__entry->type,
-		(void *)__entry->addr,  __entry->cmd)
-);

+ 6 - 21
kernel/trace/trace_event_perf.c

@@ -9,8 +9,6 @@
 #include <linux/kprobes.h>
 #include <linux/kprobes.h>
 #include "trace.h"
 #include "trace.h"
 
 
-EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
-
 static char *perf_trace_buf[4];
 static char *perf_trace_buf[4];
 
 
 /*
 /*
@@ -56,13 +54,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
 		}
 		}
 	}
 	}
 
 
-	if (tp_event->class->reg)
-		ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
-	else
-		ret = tracepoint_probe_register(tp_event->name,
-						tp_event->class->perf_probe,
-						tp_event);
-
+	ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
 	if (ret)
 	if (ret)
 		goto fail;
 		goto fail;
 
 
@@ -96,9 +88,7 @@ int perf_trace_init(struct perf_event *p_event)
 	mutex_lock(&event_mutex);
 	mutex_lock(&event_mutex);
 	list_for_each_entry(tp_event, &ftrace_events, list) {
 	list_for_each_entry(tp_event, &ftrace_events, list) {
 		if (tp_event->event.type == event_id &&
 		if (tp_event->event.type == event_id &&
-		    tp_event->class &&
-		    (tp_event->class->perf_probe ||
-		     tp_event->class->reg) &&
+		    tp_event->class && tp_event->class->reg &&
 		    try_module_get(tp_event->mod)) {
 		    try_module_get(tp_event->mod)) {
 			ret = perf_trace_event_init(tp_event, p_event);
 			ret = perf_trace_event_init(tp_event, p_event);
 			break;
 			break;
@@ -138,18 +128,13 @@ void perf_trace_destroy(struct perf_event *p_event)
 	if (--tp_event->perf_refcount > 0)
 	if (--tp_event->perf_refcount > 0)
 		goto out;
 		goto out;
 
 
-	if (tp_event->class->reg)
-		tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
-	else
-		tracepoint_probe_unregister(tp_event->name,
-					    tp_event->class->perf_probe,
-					    tp_event);
+	tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
 
 
 	/*
 	/*
-	 * Ensure our callback won't be called anymore. See
-	 * tracepoint_probe_unregister() and __DO_TRACE().
+	 * Ensure our callback won't be called anymore. The buffers
+	 * will be freed after that.
 	 */
 	 */
-	synchronize_sched();
+	tracepoint_synchronize_unregister();
 
 
 	free_percpu(tp_event->perf_events);
 	free_percpu(tp_event->perf_events);
 	tp_event->perf_events = NULL;
 	tp_event->perf_events = NULL;

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است