|
@@ -64,122 +64,12 @@
|
|
|
#define CNTR_NOT_SUPPORTED "<not supported>"
|
|
|
#define CNTR_NOT_COUNTED "<not counted>"
|
|
|
|
|
|
-static struct perf_event_attr default_attrs[] = {
|
|
|
-
|
|
|
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
|
|
|
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
|
|
|
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
|
|
|
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
|
|
|
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
|
|
|
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
|
|
|
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
|
|
|
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
|
|
|
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
|
|
|
-
|
|
|
-};
|
|
|
-
|
|
|
-/*
|
|
|
- * Detailed stats (-d), covering the L1 and last level data caches:
|
|
|
- */
|
|
|
-static struct perf_event_attr detailed_attrs[] = {
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HW_CACHE,
|
|
|
- .config =
|
|
|
- PERF_COUNT_HW_CACHE_L1D << 0 |
|
|
|
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HW_CACHE,
|
|
|
- .config =
|
|
|
- PERF_COUNT_HW_CACHE_L1D << 0 |
|
|
|
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HW_CACHE,
|
|
|
- .config =
|
|
|
- PERF_COUNT_HW_CACHE_LL << 0 |
|
|
|
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HW_CACHE,
|
|
|
- .config =
|
|
|
- PERF_COUNT_HW_CACHE_LL << 0 |
|
|
|
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
-};
|
|
|
-
|
|
|
-/*
|
|
|
- * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
|
|
|
- */
|
|
|
-static struct perf_event_attr very_detailed_attrs[] = {
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HW_CACHE,
|
|
|
- .config =
|
|
|
- PERF_COUNT_HW_CACHE_L1I << 0 |
|
|
|
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HW_CACHE,
|
|
|
- .config =
|
|
|
- PERF_COUNT_HW_CACHE_L1I << 0 |
|
|
|
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HW_CACHE,
|
|
|
- .config =
|
|
|
- PERF_COUNT_HW_CACHE_DTLB << 0 |
|
|
|
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HW_CACHE,
|
|
|
- .config =
|
|
|
- PERF_COUNT_HW_CACHE_DTLB << 0 |
|
|
|
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HW_CACHE,
|
|
|
- .config =
|
|
|
- PERF_COUNT_HW_CACHE_ITLB << 0 |
|
|
|
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HW_CACHE,
|
|
|
- .config =
|
|
|
- PERF_COUNT_HW_CACHE_ITLB << 0 |
|
|
|
- (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
-
|
|
|
-};
|
|
|
-
|
|
|
-/*
|
|
|
- * Very, very detailed stats (-d -d -d), adding prefetch events:
|
|
|
- */
|
|
|
-static struct perf_event_attr very_very_detailed_attrs[] = {
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HW_CACHE,
|
|
|
- .config =
|
|
|
- PERF_COUNT_HW_CACHE_L1D << 0 |
|
|
|
- (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
|
|
|
- (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
-
|
|
|
- { .type = PERF_TYPE_HW_CACHE,
|
|
|
- .config =
|
|
|
- PERF_COUNT_HW_CACHE_L1D << 0 |
|
|
|
- (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
|
|
|
- (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
-};
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
static struct perf_evlist *evsel_list;
|
|
|
|
|
|
static struct perf_target target = {
|
|
|
.uid = UINT_MAX,
|
|
|
};
|
|
|
|
|
|
-static int run_idx = 0;
|
|
|
static int run_count = 1;
|
|
|
static bool no_inherit = false;
|
|
|
static bool scale = true;
|
|
@@ -187,15 +77,12 @@ static bool no_aggr = false;
|
|
|
static pid_t child_pid = -1;
|
|
|
static bool null_run = false;
|
|
|
static int detailed_run = 0;
|
|
|
-static bool sync_run = false;
|
|
|
static bool big_num = true;
|
|
|
static int big_num_opt = -1;
|
|
|
static const char *csv_sep = NULL;
|
|
|
static bool csv_output = false;
|
|
|
static bool group = false;
|
|
|
-static const char *output_name = NULL;
|
|
|
static FILE *output = NULL;
|
|
|
-static int output_fd;
|
|
|
|
|
|
static volatile int done = 0;
|
|
|
|
|
@@ -1028,11 +915,6 @@ static void sig_atexit(void)
|
|
|
kill(getpid(), signr);
|
|
|
}
|
|
|
|
|
|
-static const char * const stat_usage[] = {
|
|
|
- "perf stat [<options>] [<command>]",
|
|
|
- NULL
|
|
|
-};
|
|
|
-
|
|
|
static int stat__set_big_num(const struct option *opt __maybe_unused,
|
|
|
const char *s __maybe_unused, int unset)
|
|
|
{
|
|
@@ -1040,62 +922,119 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static bool append_file;
|
|
|
-
|
|
|
-static const struct option options[] = {
|
|
|
- OPT_CALLBACK('e', "event", &evsel_list, "event",
|
|
|
- "event selector. use 'perf list' to list available events",
|
|
|
- parse_events_option),
|
|
|
- OPT_CALLBACK(0, "filter", &evsel_list, "filter",
|
|
|
- "event filter", parse_filter),
|
|
|
- OPT_BOOLEAN('i', "no-inherit", &no_inherit,
|
|
|
- "child tasks do not inherit counters"),
|
|
|
- OPT_STRING('p', "pid", &target.pid, "pid",
|
|
|
- "stat events on existing process id"),
|
|
|
- OPT_STRING('t', "tid", &target.tid, "tid",
|
|
|
- "stat events on existing thread id"),
|
|
|
- OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
|
|
|
- "system-wide collection from all CPUs"),
|
|
|
- OPT_BOOLEAN('g', "group", &group,
|
|
|
- "put the counters into a counter group"),
|
|
|
- OPT_BOOLEAN('c', "scale", &scale,
|
|
|
- "scale/normalize counters"),
|
|
|
- OPT_INCR('v', "verbose", &verbose,
|
|
|
- "be more verbose (show counter open errors, etc)"),
|
|
|
- OPT_INTEGER('r', "repeat", &run_count,
|
|
|
- "repeat command and print average + stddev (max: 100)"),
|
|
|
- OPT_BOOLEAN('n', "null", &null_run,
|
|
|
- "null run - dont start any counters"),
|
|
|
- OPT_INCR('d', "detailed", &detailed_run,
|
|
|
- "detailed run - start a lot of events"),
|
|
|
- OPT_BOOLEAN('S', "sync", &sync_run,
|
|
|
- "call sync() before starting a run"),
|
|
|
- OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
|
|
|
- "print large numbers with thousands\' separators",
|
|
|
- stat__set_big_num),
|
|
|
- OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
|
|
|
- "list of cpus to monitor in system-wide"),
|
|
|
- OPT_BOOLEAN('A', "no-aggr", &no_aggr,
|
|
|
- "disable CPU count aggregation"),
|
|
|
- OPT_STRING('x', "field-separator", &csv_sep, "separator",
|
|
|
- "print counts with custom separator"),
|
|
|
- OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
|
|
|
- "monitor event in cgroup name only",
|
|
|
- parse_cgroups),
|
|
|
- OPT_STRING('o', "output", &output_name, "file",
|
|
|
- "output file name"),
|
|
|
- OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
|
|
|
- OPT_INTEGER(0, "log-fd", &output_fd,
|
|
|
- "log output to fd, instead of stderr"),
|
|
|
- OPT_END()
|
|
|
-};
|
|
|
-
|
|
|
/*
|
|
|
* Add default attributes, if there were no attributes specified or
|
|
|
* if -d/--detailed, -d -d or -d -d -d is used:
|
|
|
*/
|
|
|
static int add_default_attributes(void)
|
|
|
{
|
|
|
+ struct perf_event_attr default_attrs[] = {
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
|
|
|
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
|
|
|
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
|
|
|
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
|
|
|
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
|
|
|
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
|
|
|
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
|
|
|
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
|
|
|
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
|
|
|
+
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * Detailed stats (-d), covering the L1 and last level data caches:
|
|
|
+ */
|
|
|
+ struct perf_event_attr detailed_attrs[] = {
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HW_CACHE,
|
|
|
+ .config =
|
|
|
+ PERF_COUNT_HW_CACHE_L1D << 0 |
|
|
|
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HW_CACHE,
|
|
|
+ .config =
|
|
|
+ PERF_COUNT_HW_CACHE_L1D << 0 |
|
|
|
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HW_CACHE,
|
|
|
+ .config =
|
|
|
+ PERF_COUNT_HW_CACHE_LL << 0 |
|
|
|
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HW_CACHE,
|
|
|
+ .config =
|
|
|
+ PERF_COUNT_HW_CACHE_LL << 0 |
|
|
|
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
|
|
|
+ */
|
|
|
+ struct perf_event_attr very_detailed_attrs[] = {
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HW_CACHE,
|
|
|
+ .config =
|
|
|
+ PERF_COUNT_HW_CACHE_L1I << 0 |
|
|
|
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HW_CACHE,
|
|
|
+ .config =
|
|
|
+ PERF_COUNT_HW_CACHE_L1I << 0 |
|
|
|
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HW_CACHE,
|
|
|
+ .config =
|
|
|
+ PERF_COUNT_HW_CACHE_DTLB << 0 |
|
|
|
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HW_CACHE,
|
|
|
+ .config =
|
|
|
+ PERF_COUNT_HW_CACHE_DTLB << 0 |
|
|
|
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HW_CACHE,
|
|
|
+ .config =
|
|
|
+ PERF_COUNT_HW_CACHE_ITLB << 0 |
|
|
|
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HW_CACHE,
|
|
|
+ .config =
|
|
|
+ PERF_COUNT_HW_CACHE_ITLB << 0 |
|
|
|
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
|
|
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
+
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * Very, very detailed stats (-d -d -d), adding prefetch events:
|
|
|
+ */
|
|
|
+ struct perf_event_attr very_very_detailed_attrs[] = {
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HW_CACHE,
|
|
|
+ .config =
|
|
|
+ PERF_COUNT_HW_CACHE_L1D << 0 |
|
|
|
+ (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
|
|
|
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
|
|
|
+
|
|
|
+ { .type = PERF_TYPE_HW_CACHE,
|
|
|
+ .config =
|
|
|
+ PERF_COUNT_HW_CACHE_L1D << 0 |
|
|
|
+ (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
|
|
|
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
|
|
|
+};
|
|
|
+
|
|
|
/* Set attrs if no event is selected and !null_run: */
|
|
|
if (null_run)
|
|
|
return 0;
|
|
@@ -1130,8 +1069,59 @@ static int add_default_attributes(void)
|
|
|
|
|
|
int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
|
|
|
{
|
|
|
+ bool append_file = false,
|
|
|
+ sync_run = false;
|
|
|
+ int output_fd = 0;
|
|
|
+ const char *output_name = NULL;
|
|
|
+ const struct option options[] = {
|
|
|
+ OPT_CALLBACK('e', "event", &evsel_list, "event",
|
|
|
+ "event selector. use 'perf list' to list available events",
|
|
|
+ parse_events_option),
|
|
|
+ OPT_CALLBACK(0, "filter", &evsel_list, "filter",
|
|
|
+ "event filter", parse_filter),
|
|
|
+ OPT_BOOLEAN('i', "no-inherit", &no_inherit,
|
|
|
+ "child tasks do not inherit counters"),
|
|
|
+ OPT_STRING('p', "pid", &target.pid, "pid",
|
|
|
+ "stat events on existing process id"),
|
|
|
+ OPT_STRING('t', "tid", &target.tid, "tid",
|
|
|
+ "stat events on existing thread id"),
|
|
|
+ OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
|
|
|
+ "system-wide collection from all CPUs"),
|
|
|
+ OPT_BOOLEAN('g', "group", &group,
|
|
|
+ "put the counters into a counter group"),
|
|
|
+ OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
|
|
|
+ OPT_INCR('v', "verbose", &verbose,
|
|
|
+ "be more verbose (show counter open errors, etc)"),
|
|
|
+ OPT_INTEGER('r', "repeat", &run_count,
|
|
|
+ "repeat command and print average + stddev (max: 100)"),
|
|
|
+ OPT_BOOLEAN('n', "null", &null_run,
|
|
|
+ "null run - dont start any counters"),
|
|
|
+ OPT_INCR('d', "detailed", &detailed_run,
|
|
|
+ "detailed run - start a lot of events"),
|
|
|
+ OPT_BOOLEAN('S', "sync", &sync_run,
|
|
|
+ "call sync() before starting a run"),
|
|
|
+ OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
|
|
|
+ "print large numbers with thousands\' separators",
|
|
|
+ stat__set_big_num),
|
|
|
+ OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
|
|
|
+ "list of cpus to monitor in system-wide"),
|
|
|
+ OPT_BOOLEAN('A', "no-aggr", &no_aggr, "disable CPU count aggregation"),
|
|
|
+ OPT_STRING('x', "field-separator", &csv_sep, "separator",
|
|
|
+ "print counts with custom separator"),
|
|
|
+ OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
|
|
|
+ "monitor event in cgroup name only", parse_cgroups),
|
|
|
+ OPT_STRING('o', "output", &output_name, "file", "output file name"),
|
|
|
+ OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
|
|
|
+ OPT_INTEGER(0, "log-fd", &output_fd,
|
|
|
+ "log output to fd, instead of stderr"),
|
|
|
+ OPT_END()
|
|
|
+ };
|
|
|
+ const char * const stat_usage[] = {
|
|
|
+ "perf stat [<options>] [<command>]",
|
|
|
+ NULL
|
|
|
+ };
|
|
|
struct perf_evsel *pos;
|
|
|
- int status = -ENOMEM;
|
|
|
+ int status = -ENOMEM, run_idx;
|
|
|
const char *mode;
|
|
|
|
|
|
setlocale(LC_ALL, "");
|