Browse Source

Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux-2.6 into perf/core

Ingo Molnar 14 years ago
parent
commit
e4b546a364

+ 36 - 1
tools/perf/Documentation/perf-annotate.txt

@@ -24,12 +24,47 @@ OPTIONS
 --input=::
 --input=::
         Input file name. (default: perf.data)
         Input file name. (default: perf.data)
 
 
+-d::
+--dsos=<dso[,dso...]>::
+        Only consider symbols in these dsos.
+-s::
+--symbol=<symbol>::
+        Symbol to annotate.
+
+-f::
+--force::
+        Don't complain, do it.
+
+-v::
+--verbose::
+        Be more verbose. (Show symbol address, etc)
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-k::
+--vmlinux=<file>::
+        vmlinux pathname.
+
+-m::
+--modules::
+        Load module symbols. WARNING: use only with -k and LIVE kernel.
+
+-l::
+--print-line::
+        Print matching source lines (may be slow).
+
+-P::
+--full-paths::
+        Don't shorten the displayed pathnames.
+
 --stdio:: Use the stdio interface.
 --stdio:: Use the stdio interface.
 
 
 --tui:: Use the TUI interface Use of --tui requires a tty, if one is not
 --tui:: Use the TUI interface Use of --tui requires a tty, if one is not
 	present, as when piping to other commands, the stdio interface is
 	present, as when piping to other commands, the stdio interface is
 	used. This interfaces starts by centering on the line with more
 	used. This interfaces starts by centering on the line with more
-	samples, TAB/UNTAB cycles thru the lines with more samples.
+	samples, TAB/UNTAB cycles through the lines with more samples.
 
 
 SEE ALSO
 SEE ALSO
 --------
 --------

+ 3 - 0
tools/perf/Documentation/perf-buildid-list.txt

@@ -18,6 +18,9 @@ perf report.
 
 
 OPTIONS
 OPTIONS
 -------
 -------
+-H::
+--with-hits::
+        Show only DSOs with hits.
 -i::
 -i::
 --input=::
 --input=::
         Input file name. (default: perf.data)
         Input file name. (default: perf.data)

+ 18 - 1
tools/perf/Documentation/perf-diff.txt

@@ -19,6 +19,18 @@ If no parameters are passed it will assume perf.data.old and perf.data.
 
 
 OPTIONS
 OPTIONS
 -------
 -------
+-M::
+--displacement::
+        Show position displacement relative to baseline.
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-m::
+--modules::
+        Load module symbols. WARNING: use only with -k and LIVE kernel
+
 -d::
 -d::
 --dsos=::
 --dsos=::
 	Only consider symbols in these dsos. CSV that understands
 	Only consider symbols in these dsos. CSV that understands
@@ -42,7 +54,7 @@ OPTIONS
 --field-separator=::
 --field-separator=::
 
 
 	Use a special separator character and don't pad with spaces, replacing
 	Use a special separator character and don't pad with spaces, replacing
-	all occurances of this separator in symbol names (and other output)
+	all occurrences of this separator in symbol names (and other output)
 	with a '.' character, that thus it's the only non valid separator.
 	with a '.' character, that thus it's the only non valid separator.
 
 
 -v::
 -v::
@@ -50,6 +62,11 @@ OPTIONS
 	Be verbose, for instance, show the raw counts in addition to the
 	Be verbose, for instance, show the raw counts in addition to the
 	diff.
 	diff.
 
 
+-f::
+--force::
+       Don't complain, do it.
+
+
 SEE ALSO
 SEE ALSO
 --------
 --------
 linkperf:perf-record[1]
 linkperf:perf-record[1]

+ 7 - 1
tools/perf/Documentation/perf-kvm.txt

@@ -22,7 +22,7 @@ There are a couple of variants of perf kvm:
   a performance counter profile of guest os in realtime
   a performance counter profile of guest os in realtime
   of an arbitrary workload.
   of an arbitrary workload.
 
 
-  'perf kvm record <command>' to record the performance couinter profile
+  'perf kvm record <command>' to record the performance counter profile
   of an arbitrary workload and save it into a perf data file. If both
   of an arbitrary workload and save it into a perf data file. If both
   --host and --guest are input, the perf data file name is perf.data.kvm.
   --host and --guest are input, the perf data file name is perf.data.kvm.
   If there is  no --host but --guest, the file name is perf.data.guest.
   If there is  no --host but --guest, the file name is perf.data.guest.
@@ -40,6 +40,12 @@ There are a couple of variants of perf kvm:
 
 
 OPTIONS
 OPTIONS
 -------
 -------
+-i::
+--input=::
+        Input file name.
+-o::
+--output::
+        Output file name.
 --host=::
 --host=::
         Collect host side performance profile.
         Collect host side performance profile.
 --guest=::
 --guest=::

+ 15 - 0
tools/perf/Documentation/perf-lock.txt

@@ -24,6 +24,21 @@ and statistics with this 'perf lock' command.
 
 
   'perf lock report' reports statistical data.
   'perf lock report' reports statistical data.
 
 
+OPTIONS
+-------
+
+-i::
+--input=<file>::
+        Input file name.
+
+-v::
+--verbose::
+        Be more verbose (show symbol address, etc).
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
 SEE ALSO
 SEE ALSO
 --------
 --------
 linkperf:perf[1]
 linkperf:perf[1]

+ 1 - 1
tools/perf/Documentation/perf-probe.txt

@@ -115,7 +115,7 @@ Each probe argument follows below syntax.
 
 
 LINE SYNTAX
 LINE SYNTAX
 -----------
 -----------
-Line range is descripted by following syntax.
+Line range is described by following syntax.
 
 
  "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]"
  "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]"
 
 

+ 13 - 4
tools/perf/Documentation/perf-record.txt

@@ -39,15 +39,24 @@ OPTIONS
           be passed as follows: '\mem:addr[:[r][w][x]]'.
           be passed as follows: '\mem:addr[:[r][w][x]]'.
           If you want to profile read-write accesses in 0x1000, just set
           If you want to profile read-write accesses in 0x1000, just set
           'mem:0x1000:rw'.
           'mem:0x1000:rw'.
+
+--filter=<filter>::
+        Event filter.
+
 -a::
 -a::
-        System-wide collection.
+--all-cpus::
+        System-wide collection from all CPUs.
 
 
 -l::
 -l::
         Scale counter values.
         Scale counter values.
 
 
 -p::
 -p::
 --pid=::
 --pid=::
-	Record events on existing pid.
+	Record events on existing process ID.
+
+-t::
+--tid=::
+        Record events on existing thread ID.
 
 
 -r::
 -r::
 --realtime=::
 --realtime=::
@@ -109,8 +118,8 @@ Collect raw sample records from all opened counters (default for tracepoint coun
 
 
 -C::
 -C::
 --cpu::
 --cpu::
-Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 In per-thread mode with inheritance mode on (default), samples are captured only when
 In per-thread mode with inheritance mode on (default), samples are captured only when
 the thread executes on the designated CPUs. Default is to monitor all CPUs.
 the thread executes on the designated CPUs. Default is to monitor all CPUs.
 
 

+ 45 - 4
tools/perf/Documentation/perf-report.txt

@@ -20,6 +20,11 @@ OPTIONS
 -i::
 -i::
 --input=::
 --input=::
         Input file name. (default: perf.data)
         Input file name. (default: perf.data)
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
 -d::
 -d::
 --dsos=::
 --dsos=::
 	Only consider symbols in these dsos. CSV that understands
 	Only consider symbols in these dsos. CSV that understands
@@ -27,6 +32,10 @@ OPTIONS
 -n::
 -n::
 --show-nr-samples::
 --show-nr-samples::
 	Show the number of samples for each symbol
 	Show the number of samples for each symbol
+
+--showcpuutilization::
+        Show sample percentage for different cpu modes.
+
 -T::
 -T::
 --threads::
 --threads::
 	Show per-thread event counters
 	Show per-thread event counters
@@ -39,12 +48,24 @@ OPTIONS
 	Only consider these symbols. CSV that understands
 	Only consider these symbols. CSV that understands
 	file://filename entries.
 	file://filename entries.
 
 
+-U::
+--hide-unresolved::
+        Only display entries resolved to a symbol.
+
 -s::
 -s::
 --sort=::
 --sort=::
 	Sort by key(s): pid, comm, dso, symbol, parent.
 	Sort by key(s): pid, comm, dso, symbol, parent.
 
 
+-p::
+--parent=<regex>::
+        regex filter to identify parent, see: '--sort parent'
+
+-x::
+--exclude-other::
+        Only display entries with parent-match.
+
 -w::
 -w::
---field-width=::
+--column-widths=<width[,width...]>::
 	Force each column width to the provided list, for large terminal
 	Force each column width to the provided list, for large terminal
 	readability.
 	readability.
 
 
@@ -52,19 +73,26 @@ OPTIONS
 --field-separator=::
 --field-separator=::
 
 
 	Use a special separator character and don't pad with spaces, replacing
 	Use a special separator character and don't pad with spaces, replacing
-	all occurances of this separator in symbol names (and other output)
+	all occurrences of this separator in symbol names (and other output)
 	with a '.' character, that thus it's the only non valid separator.
 	with a '.' character, that thus it's the only non valid separator.
 
 
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
 -g [type,min]::
 -g [type,min]::
 --call-graph::
 --call-graph::
-        Display callchains using type and min percent threshold.
+        Display call chains using type and min percent threshold.
 	type can be either:
 	type can be either:
-	- flat: single column, linear exposure of callchains.
+	- flat: single column, linear exposure of call chains.
 	- graph: use a graph tree, displaying absolute overhead rates.
 	- graph: use a graph tree, displaying absolute overhead rates.
 	- fractal: like graph, but displays relative rates. Each branch of
 	- fractal: like graph, but displays relative rates. Each branch of
 		 the tree is considered as a new profiled object. +
 		 the tree is considered as a new profiled object. +
 	Default: fractal,0.5.
 	Default: fractal,0.5.
 
 
+--pretty=<key>::
+        Pretty printing style.  key: normal, raw
+
 --stdio:: Use the stdio interface.
 --stdio:: Use the stdio interface.
 
 
 --tui:: Use the TUI interface, that is integrated with annotate and allows
 --tui:: Use the TUI interface, that is integrated with annotate and allows
@@ -72,6 +100,19 @@ OPTIONS
 	requires a tty, if one is not present, as when piping to other
 	requires a tty, if one is not present, as when piping to other
 	commands, the stdio interface is used.
 	commands, the stdio interface is used.
 
 
+-k::
+--vmlinux=<file>::
+        vmlinux pathname
+
+-m::
+--modules::
+        Load module symbols. WARNING: This should only be used with -k and
+        a LIVE kernel.
+
+-f::
+--force::
+        Don't complain, do it.
+
 SEE ALSO
 SEE ALSO
 --------
 --------
 linkperf:perf-stat[1]
 linkperf:perf-stat[1]

+ 16 - 2
tools/perf/Documentation/perf-sched.txt

@@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies)
 SYNOPSIS
 SYNOPSIS
 --------
 --------
 [verse]
 [verse]
-'perf sched' {record|latency|replay|trace}
+'perf sched' {record|latency|map|replay|trace}
 
 
 DESCRIPTION
 DESCRIPTION
 -----------
 -----------
-There are four variants of perf sched:
+There are five variants of perf sched:
 
 
   'perf sched record <command>' to record the scheduling events
   'perf sched record <command>' to record the scheduling events
   of an arbitrary workload.
   of an arbitrary workload.
@@ -30,8 +30,22 @@ There are four variants of perf sched:
   of the workload as it occurred when it was recorded - and can repeat
   of the workload as it occurred when it was recorded - and can repeat
   it a number of times, measuring its performance.)
   it a number of times, measuring its performance.)
 
 
+  'perf sched map' to print a textual context-switching outline of
+  workload captured via perf sched record.  Columns stand for
+  individual CPUs, and the two-letter shortcuts stand for tasks that
+  are running on a CPU. A '*' denotes the CPU that had the event, and
+  a dot signals an idle CPU.
+
 OPTIONS
 OPTIONS
 -------
 -------
+-i::
+--input=<file>::
+        Input file name. (default: perf.data)
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
 -D::
 -D::
 --dump-raw-trace=::
 --dump-raw-trace=::
         Display verbose dump of the sched data.
         Display verbose dump of the sched data.

+ 7 - 0
tools/perf/Documentation/perf-script.txt

@@ -104,6 +104,13 @@ OPTIONS
         normally don't - this option allows the latter to be run in
         normally don't - this option allows the latter to be run in
         system-wide mode.
         system-wide mode.
 
 
+-i::
+--input=::
+        Input file name.
+
+-d::
+--debug-mode::
+        Do various checks like samples ordering and lost events.
 
 
 SEE ALSO
 SEE ALSO
 --------
 --------

+ 32 - 7
tools/perf/Documentation/perf-stat.txt

@@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics
 SYNOPSIS
 SYNOPSIS
 --------
 --------
 [verse]
 [verse]
-'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command>
-'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
 
 
 DESCRIPTION
 DESCRIPTION
 -----------
 -----------
@@ -35,21 +35,33 @@ OPTIONS
         child tasks do not inherit counters
         child tasks do not inherit counters
 -p::
 -p::
 --pid=<pid>::
 --pid=<pid>::
-        stat events on existing pid
+        stat events on existing process id
+
+-t::
+--tid=<tid>::
+        stat events on existing thread id
+
 
 
 -a::
 -a::
-        system-wide collection
+--all-cpus::
+        system-wide collection from all CPUs
 
 
 -c::
 -c::
-        scale counter values
+--scale::
+	scale/normalize counter values
+
+-r::
+--repeat=<n>::
+	repeat command and print average + stddev (max: 100)
 
 
 -B::
 -B::
+--big-num::
         print large numbers with thousands' separators according to locale
         print large numbers with thousands' separators according to locale
 
 
 -C::
 -C::
 --cpu=::
 --cpu=::
-Count only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Count only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 In per-thread mode, this option is ignored. The -a option is still necessary
 In per-thread mode, this option is ignored. The -a option is still necessary
 to activate system-wide monitoring. Default is to count on all CPUs.
 to activate system-wide monitoring. Default is to count on all CPUs.
 
 
@@ -58,6 +70,19 @@ to activate system-wide monitoring. Default is to count on all CPUs.
 Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
 Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
 This option is only valid in system-wide mode.
 This option is only valid in system-wide mode.
 
 
+-n::
+--null::
+        null run - don't start any counters
+
+-v::
+--verbose::
+        be more verbose (show counter open errors, etc)
+
+-x SEP::
+--field-separator SEP::
+print counts using a CSV-style output to make it easy to import directly into
+spreadsheets. Columns are separated by the string specified in SEP.
+
 EXAMPLES
 EXAMPLES
 --------
 --------
 
 

+ 1 - 1
tools/perf/Documentation/perf-test.txt

@@ -12,7 +12,7 @@ SYNOPSIS
 
 
 DESCRIPTION
 DESCRIPTION
 -----------
 -----------
-This command does assorted sanity tests, initially thru linked routines but
+This command does assorted sanity tests, initially through linked routines but
 also will look for a directory with more tests in the form of scripts.
 also will look for a directory with more tests in the form of scripts.
 
 
 OPTIONS
 OPTIONS

+ 24 - 4
tools/perf/Documentation/perf-top.txt

@@ -12,7 +12,7 @@ SYNOPSIS
 
 
 DESCRIPTION
 DESCRIPTION
 -----------
 -----------
-This command generates and displays a performance counter profile in realtime.
+This command generates and displays a performance counter profile in real time.
 
 
 
 
 OPTIONS
 OPTIONS
@@ -27,8 +27,8 @@ OPTIONS
 
 
 -C <cpu-list>::
 -C <cpu-list>::
 --cpu=<cpu>::
 --cpu=<cpu>::
-Monitor only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 Default is to monitor all CPUS.
 Default is to monitor all CPUS.
 
 
 -d <seconds>::
 -d <seconds>::
@@ -50,6 +50,10 @@ Default is to monitor all CPUS.
 --count-filter=<count>::
 --count-filter=<count>::
 	Only display functions with more events than this.
 	Only display functions with more events than this.
 
 
+-g::
+--group::
+        Put the counters into a counter group.
+
 -F <freq>::
 -F <freq>::
 --freq=<freq>::
 --freq=<freq>::
 	Profile at this frequency.
 	Profile at this frequency.
@@ -68,7 +72,11 @@ Default is to monitor all CPUS.
 
 
 -p <pid>::
 -p <pid>::
 --pid=<pid>::
 --pid=<pid>::
-	Profile events on existing pid.
+	Profile events on existing Process ID.
+
+-t <tid>::
+--tid=<tid>::
+        Profile events on existing thread ID.
 
 
 -r <priority>::
 -r <priority>::
 --realtime=<priority>::
 --realtime=<priority>::
@@ -78,6 +86,18 @@ Default is to monitor all CPUS.
 --sym-annotate=<symbol>::
 --sym-annotate=<symbol>::
         Annotate this symbol.
         Annotate this symbol.
 
 
+-K::
+--hide_kernel_symbols::
+        Hide kernel symbols.
+
+-U::
+--hide_user_symbols::
+        Hide user symbols.
+
+-D::
+--dump-symtab::
+        Dump the symbol table used for profiling.
+
 -v::
 -v::
 --verbose::
 --verbose::
 	Be more verbose (show counter open errors, etc).
 	Be more verbose (show counter open errors, etc).

+ 1 - 1
tools/perf/builtin-diff.c

@@ -173,7 +173,7 @@ static const char * const diff_usage[] = {
 static const struct option options[] = {
 static const struct option options[] = {
 	OPT_INCR('v', "verbose", &verbose,
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
 		    "be more verbose (show symbol address, etc)"),
-	OPT_BOOLEAN('m', "displacement", &show_displacement,
+	OPT_BOOLEAN('M', "displacement", &show_displacement,
 		    "Show position displacement relative to baseline"),
 		    "Show position displacement relative to baseline"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 		    "dump raw trace in ASCII"),

+ 105 - 41
tools/perf/builtin-stat.c

@@ -52,6 +52,8 @@
 #include <math.h>
 #include <math.h>
 #include <locale.h>
 #include <locale.h>
 
 
+#define DEFAULT_SEPARATOR	" "
+
 static struct perf_event_attr default_attrs[] = {
 static struct perf_event_attr default_attrs[] = {
 
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
@@ -82,8 +84,11 @@ static pid_t			*all_tids			=  NULL;
 static int			thread_num			=  0;
 static int			thread_num			=  0;
 static pid_t			child_pid			= -1;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
 static bool			null_run			=  false;
-static bool			big_num				=  false;
+static bool			big_num				=  true;
+static int			big_num_opt			=  -1;
 static const char		*cpu_list;
 static const char		*cpu_list;
+static const char		*csv_sep			= NULL;
+static bool			csv_output			= false;
 
 
 
 
 static int			*fd[MAX_NR_CPUS][MAX_COUNTERS];
 static int			*fd[MAX_NR_CPUS][MAX_COUNTERS];
@@ -449,12 +454,18 @@ static void print_noise(int counter, double avg)
 static void nsec_printout(int cpu, int counter, double avg)
 static void nsec_printout(int cpu, int counter, double avg)
 {
 {
 	double msecs = avg / 1e6;
 	double msecs = avg / 1e6;
+	char cpustr[16] = { '\0', };
+	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
 
 
 	if (no_aggr)
 	if (no_aggr)
-		fprintf(stderr, "CPU%-4d %18.6f  %-24s",
-			cpumap[cpu], msecs, event_name(counter));
-	else
-		fprintf(stderr, " %18.6f  %-24s", msecs, event_name(counter));
+		sprintf(cpustr, "CPU%*d%s",
+			csv_output ? 0 : -4,
+			cpumap[cpu], csv_sep);
+
+	fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(counter));
+
+	if (csv_output)
+		return;
 
 
 	if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
 	if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
 		fprintf(stderr, " # %10.3f CPUs ",
 		fprintf(stderr, " # %10.3f CPUs ",
@@ -466,18 +477,26 @@ static void abs_printout(int cpu, int counter, double avg)
 {
 {
 	double total, ratio = 0.0;
 	double total, ratio = 0.0;
 	char cpustr[16] = { '\0', };
 	char cpustr[16] = { '\0', };
+	const char *fmt;
+
+	if (csv_output)
+		fmt = "%s%.0f%s%s";
+	else if (big_num)
+		fmt = "%s%'18.0f%s%-24s";
+	else
+		fmt = "%s%18.0f%s%-24s";
 
 
 	if (no_aggr)
 	if (no_aggr)
-		sprintf(cpustr, "CPU%-4d", cpumap[cpu]);
+		sprintf(cpustr, "CPU%*d%s",
+			csv_output ? 0 : -4,
+			cpumap[cpu], csv_sep);
 	else
 	else
 		cpu = 0;
 		cpu = 0;
 
 
-	if (big_num)
-		fprintf(stderr, "%s %'18.0f  %-24s",
-			cpustr, avg, event_name(counter));
-	else
-		fprintf(stderr, "%s %18.0f  %-24s",
-			cpustr, avg, event_name(counter));
+	fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(counter));
+
+	if (csv_output)
+		return;
 
 
 	if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
 	if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
 		total = avg_stats(&runtime_cycles_stats[cpu]);
 		total = avg_stats(&runtime_cycles_stats[cpu]);
@@ -515,8 +534,9 @@ static void print_counter_aggr(int counter)
 	int scaled = event_scaled[counter];
 	int scaled = event_scaled[counter];
 
 
 	if (scaled == -1) {
 	if (scaled == -1) {
-		fprintf(stderr, " %18s  %-24s\n",
-			"<not counted>", event_name(counter));
+		fprintf(stderr, "%*s%s%-24s\n",
+			csv_output ? 0 : 18,
+			"<not counted>", csv_sep, event_name(counter));
 		return;
 		return;
 	}
 	}
 
 
@@ -525,6 +545,11 @@ static void print_counter_aggr(int counter)
 	else
 	else
 		abs_printout(-1, counter, avg);
 		abs_printout(-1, counter, avg);
 
 
+	if (csv_output) {
+		fputc('\n', stderr);
+		return;
+	}
+
 	print_noise(counter, avg);
 	print_noise(counter, avg);
 
 
 	if (scaled) {
 	if (scaled) {
@@ -554,8 +579,12 @@ static void print_counter(int counter)
 		ena = cpu_counts[cpu][counter].ena;
 		ena = cpu_counts[cpu][counter].ena;
 		run = cpu_counts[cpu][counter].run;
 		run = cpu_counts[cpu][counter].run;
 		if (run == 0 || ena == 0) {
 		if (run == 0 || ena == 0) {
-			fprintf(stderr, "CPU%-4d %18s  %-24s", cpumap[cpu],
-					"<not counted>", event_name(counter));
+			fprintf(stderr, "CPU%*d%s%*s%s%-24s",
+				csv_output ? 0 : -4,
+				cpumap[cpu], csv_sep,
+				csv_output ? 0 : 18,
+				"<not counted>", csv_sep,
+				event_name(counter));
 
 
 			fprintf(stderr, "\n");
 			fprintf(stderr, "\n");
 			continue;
 			continue;
@@ -566,11 +595,13 @@ static void print_counter(int counter)
 		else
 		else
 			abs_printout(cpu, counter, val);
 			abs_printout(cpu, counter, val);
 
 
-		print_noise(counter, 1.0);
+		if (!csv_output) {
+			print_noise(counter, 1.0);
 
 
-		if (run != ena) {
-			fprintf(stderr, "  (scaled from %.2f%%)",
+			if (run != ena) {
+				fprintf(stderr, "  (scaled from %.2f%%)",
 					100.0 * run / ena);
 					100.0 * run / ena);
+			}
 		}
 		}
 		fprintf(stderr, "\n");
 		fprintf(stderr, "\n");
 	}
 	}
@@ -582,21 +613,23 @@ static void print_stat(int argc, const char **argv)
 
 
 	fflush(stdout);
 	fflush(stdout);
 
 
-	fprintf(stderr, "\n");
-	fprintf(stderr, " Performance counter stats for ");
-	if(target_pid == -1 && target_tid == -1) {
-		fprintf(stderr, "\'%s", argv[0]);
-		for (i = 1; i < argc; i++)
-			fprintf(stderr, " %s", argv[i]);
-	} else if (target_pid != -1)
-		fprintf(stderr, "process id \'%d", target_pid);
-	else
-		fprintf(stderr, "thread id \'%d", target_tid);
+	if (!csv_output) {
+		fprintf(stderr, "\n");
+		fprintf(stderr, " Performance counter stats for ");
+		if(target_pid == -1 && target_tid == -1) {
+			fprintf(stderr, "\'%s", argv[0]);
+			for (i = 1; i < argc; i++)
+				fprintf(stderr, " %s", argv[i]);
+		} else if (target_pid != -1)
+			fprintf(stderr, "process id \'%d", target_pid);
+		else
+			fprintf(stderr, "thread id \'%d", target_tid);
 
 
-	fprintf(stderr, "\'");
-	if (run_count > 1)
-		fprintf(stderr, " (%d runs)", run_count);
-	fprintf(stderr, ":\n\n");
+		fprintf(stderr, "\'");
+		if (run_count > 1)
+			fprintf(stderr, " (%d runs)", run_count);
+		fprintf(stderr, ":\n\n");
+	}
 
 
 	if (no_aggr) {
 	if (no_aggr) {
 		for (counter = 0; counter < nr_counters; counter++)
 		for (counter = 0; counter < nr_counters; counter++)
@@ -606,15 +639,17 @@ static void print_stat(int argc, const char **argv)
 			print_counter_aggr(counter);
 			print_counter_aggr(counter);
 	}
 	}
 
 
-	fprintf(stderr, "\n");
-	fprintf(stderr, " %18.9f  seconds time elapsed",
-			avg_stats(&walltime_nsecs_stats)/1e9);
-	if (run_count > 1) {
-		fprintf(stderr, "   ( +- %7.3f%% )",
+	if (!csv_output) {
+		fprintf(stderr, "\n");
+		fprintf(stderr, " %18.9f  seconds time elapsed",
+				avg_stats(&walltime_nsecs_stats)/1e9);
+		if (run_count > 1) {
+			fprintf(stderr, "   ( +- %7.3f%% )",
 				100*stddev_stats(&walltime_nsecs_stats) /
 				100*stddev_stats(&walltime_nsecs_stats) /
 				avg_stats(&walltime_nsecs_stats));
 				avg_stats(&walltime_nsecs_stats));
+		}
+		fprintf(stderr, "\n\n");
 	}
 	}
-	fprintf(stderr, "\n\n");
 }
 }
 
 
 static volatile int signr = -1;
 static volatile int signr = -1;
@@ -644,6 +679,13 @@ static const char * const stat_usage[] = {
 	NULL
 	NULL
 };
 };
 
 
+static int stat__set_big_num(const struct option *opt __used,
+			     const char *s __used, int unset)
+{
+	big_num_opt = unset ? 0 : 1;
+	return 0;
+}
+
 static const struct option options[] = {
 static const struct option options[] = {
 	OPT_CALLBACK('e', "event", NULL, "event",
 	OPT_CALLBACK('e', "event", NULL, "event",
 		     "event selector. use 'perf list' to list available events",
 		     "event selector. use 'perf list' to list available events",
@@ -664,12 +706,15 @@ static const struct option options[] = {
 		    "repeat command and print average + stddev (max: 100)"),
 		    "repeat command and print average + stddev (max: 100)"),
 	OPT_BOOLEAN('n', "null", &null_run,
 	OPT_BOOLEAN('n', "null", &null_run,
 		    "null run - dont start any counters"),
 		    "null run - dont start any counters"),
-	OPT_BOOLEAN('B', "big-num", &big_num,
-		    "print large numbers with thousands\' separators"),
+	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
+			   "print large numbers with thousands\' separators",
+			   stat__set_big_num),
 	OPT_STRING('C', "cpu", &cpu_list, "cpu",
 	OPT_STRING('C', "cpu", &cpu_list, "cpu",
 		    "list of cpus to monitor in system-wide"),
 		    "list of cpus to monitor in system-wide"),
 	OPT_BOOLEAN('A', "no-aggr", &no_aggr,
 	OPT_BOOLEAN('A', "no-aggr", &no_aggr,
 		    "disable CPU count aggregation"),
 		    "disable CPU count aggregation"),
+	OPT_STRING('x', "field-separator", &csv_sep, "separator",
+		   "print counts with custom separator"),
 	OPT_END()
 	OPT_END()
 };
 };
 
 
@@ -682,6 +727,25 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 
 
 	argc = parse_options(argc, argv, options, stat_usage,
 	argc = parse_options(argc, argv, options, stat_usage,
 		PARSE_OPT_STOP_AT_NON_OPTION);
 		PARSE_OPT_STOP_AT_NON_OPTION);
+
+	if (csv_sep)
+		csv_output = true;
+	else
+		csv_sep = DEFAULT_SEPARATOR;
+
+	/*
+	 * let the spreadsheet do the pretty-printing
+	 */
+	if (csv_output) {
+		/* User explicitely passed -B? */
+		if (big_num_opt == 1) {
+			fprintf(stderr, "-B option not supported with -x\n");
+			usage_with_options(stat_usage, options);
+		} else /* Nope, so disable big number formatting */
+			big_num = false;
+	} else if (big_num_opt == 0) /* User passed --no-big-num */
+		big_num = false;
+
 	if (!argc && target_pid == -1 && target_tid == -1)
 	if (!argc && target_pid == -1 && target_tid == -1)
 		usage_with_options(stat_usage, options);
 		usage_with_options(stat_usage, options);
 	if (run_count <= 0)
 	if (run_count <= 0)