|
@@ -261,6 +261,10 @@ struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
|
|
struct stats runtime_branches_stats[MAX_NR_CPUS];
|
|
struct stats runtime_branches_stats[MAX_NR_CPUS];
|
|
struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
|
|
struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
|
|
struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
|
|
struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
|
|
|
|
+struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
|
|
|
|
+struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
|
|
|
|
+struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
|
|
|
|
+struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
|
|
struct stats walltime_nsecs_stats;
|
|
struct stats walltime_nsecs_stats;
|
|
|
|
|
|
static int create_perf_stat_counter(struct perf_evsel *evsel)
|
|
static int create_perf_stat_counter(struct perf_evsel *evsel)
|
|
@@ -317,6 +321,14 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
|
|
update_stats(&runtime_cacherefs_stats[0], count[0]);
|
|
update_stats(&runtime_cacherefs_stats[0], count[0]);
|
|
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
|
|
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
|
|
update_stats(&runtime_l1_dcache_stats[0], count[0]);
|
|
update_stats(&runtime_l1_dcache_stats[0], count[0]);
|
|
|
|
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
|
|
|
|
+ update_stats(&runtime_l1_icache_stats[0], count[0]);
|
|
|
|
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
|
|
|
|
+ update_stats(&runtime_ll_cache_stats[0], count[0]);
|
|
|
|
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
|
|
|
|
+ update_stats(&runtime_dtlb_cache_stats[0], count[0]);
|
|
|
|
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
|
|
|
|
+ update_stats(&runtime_itlb_cache_stats[0], count[0]);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -630,6 +642,98 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou
|
|
fprintf(stderr, " of all L1-dcache hits ");
|
|
fprintf(stderr, " of all L1-dcache hits ");
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
|
|
|
|
+{
|
|
|
|
+ double total, ratio = 0.0;
|
|
|
|
+ const char *color;
|
|
|
|
+
|
|
|
|
+ total = avg_stats(&runtime_l1_icache_stats[cpu]);
|
|
|
|
+
|
|
|
|
+ if (total)
|
|
|
|
+ ratio = avg / total * 100.0;
|
|
|
|
+
|
|
|
|
+ color = PERF_COLOR_NORMAL;
|
|
|
|
+ if (ratio > 20.0)
|
|
|
|
+ color = PERF_COLOR_RED;
|
|
|
|
+ else if (ratio > 10.0)
|
|
|
|
+ color = PERF_COLOR_MAGENTA;
|
|
|
|
+ else if (ratio > 5.0)
|
|
|
|
+ color = PERF_COLOR_YELLOW;
|
|
|
|
+
|
|
|
|
+ fprintf(stderr, " # ");
|
|
|
|
+ color_fprintf(stderr, color, "%6.2f%%", ratio);
|
|
|
|
+ fprintf(stderr, " of all L1-icache hits ");
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
|
|
|
|
+{
|
|
|
|
+ double total, ratio = 0.0;
|
|
|
|
+ const char *color;
|
|
|
|
+
|
|
|
|
+ total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
|
|
|
|
+
|
|
|
|
+ if (total)
|
|
|
|
+ ratio = avg / total * 100.0;
|
|
|
|
+
|
|
|
|
+ color = PERF_COLOR_NORMAL;
|
|
|
|
+ if (ratio > 20.0)
|
|
|
|
+ color = PERF_COLOR_RED;
|
|
|
|
+ else if (ratio > 10.0)
|
|
|
|
+ color = PERF_COLOR_MAGENTA;
|
|
|
|
+ else if (ratio > 5.0)
|
|
|
|
+ color = PERF_COLOR_YELLOW;
|
|
|
|
+
|
|
|
|
+ fprintf(stderr, " # ");
|
|
|
|
+ color_fprintf(stderr, color, "%6.2f%%", ratio);
|
|
|
|
+ fprintf(stderr, " of all dTLB cache hits ");
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
|
|
|
|
+{
|
|
|
|
+ double total, ratio = 0.0;
|
|
|
|
+ const char *color;
|
|
|
|
+
|
|
|
|
+ total = avg_stats(&runtime_itlb_cache_stats[cpu]);
|
|
|
|
+
|
|
|
|
+ if (total)
|
|
|
|
+ ratio = avg / total * 100.0;
|
|
|
|
+
|
|
|
|
+ color = PERF_COLOR_NORMAL;
|
|
|
|
+ if (ratio > 20.0)
|
|
|
|
+ color = PERF_COLOR_RED;
|
|
|
|
+ else if (ratio > 10.0)
|
|
|
|
+ color = PERF_COLOR_MAGENTA;
|
|
|
|
+ else if (ratio > 5.0)
|
|
|
|
+ color = PERF_COLOR_YELLOW;
|
|
|
|
+
|
|
|
|
+ fprintf(stderr, " # ");
|
|
|
|
+ color_fprintf(stderr, color, "%6.2f%%", ratio);
|
|
|
|
+ fprintf(stderr, " of all iTLB cache hits ");
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
|
|
|
|
+{
|
|
|
|
+ double total, ratio = 0.0;
|
|
|
|
+ const char *color;
|
|
|
|
+
|
|
|
|
+ total = avg_stats(&runtime_ll_cache_stats[cpu]);
|
|
|
|
+
|
|
|
|
+ if (total)
|
|
|
|
+ ratio = avg / total * 100.0;
|
|
|
|
+
|
|
|
|
+ color = PERF_COLOR_NORMAL;
|
|
|
|
+ if (ratio > 20.0)
|
|
|
|
+ color = PERF_COLOR_RED;
|
|
|
|
+ else if (ratio > 10.0)
|
|
|
|
+ color = PERF_COLOR_MAGENTA;
|
|
|
|
+ else if (ratio > 5.0)
|
|
|
|
+ color = PERF_COLOR_YELLOW;
|
|
|
|
+
|
|
|
|
+ fprintf(stderr, " # ");
|
|
|
|
+ color_fprintf(stderr, color, "%6.2f%%", ratio);
|
|
|
|
+ fprintf(stderr, " of all LL-cache hits ");
|
|
|
|
+}
|
|
|
|
+
|
|
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
|
|
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
|
|
{
|
|
{
|
|
double total, ratio = 0.0;
|
|
double total, ratio = 0.0;
|
|
@@ -684,6 +788,34 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
|
|
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
|
|
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
|
|
runtime_l1_dcache_stats[cpu].n != 0) {
|
|
runtime_l1_dcache_stats[cpu].n != 0) {
|
|
print_l1_dcache_misses(cpu, evsel, avg);
|
|
print_l1_dcache_misses(cpu, evsel, avg);
|
|
|
|
+ } else if (
|
|
|
|
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
|
|
|
|
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
|
|
|
|
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
|
|
|
|
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
|
|
|
|
+ runtime_l1_icache_stats[cpu].n != 0) {
|
|
|
|
+ print_l1_icache_misses(cpu, evsel, avg);
|
|
|
|
+ } else if (
|
|
|
|
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
|
|
|
|
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
|
|
|
|
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
|
|
|
|
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
|
|
|
|
+ runtime_dtlb_cache_stats[cpu].n != 0) {
|
|
|
|
+ print_dtlb_cache_misses(cpu, evsel, avg);
|
|
|
|
+ } else if (
|
|
|
|
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
|
|
|
|
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
|
|
|
|
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
|
|
|
|
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
|
|
|
|
+ runtime_itlb_cache_stats[cpu].n != 0) {
|
|
|
|
+ print_itlb_cache_misses(cpu, evsel, avg);
|
|
|
|
+ } else if (
|
|
|
|
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
|
|
|
|
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
|
|
|
|
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
|
|
|
|
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
|
|
|
|
+ runtime_ll_cache_stats[cpu].n != 0) {
|
|
|
|
+ print_ll_cache_misses(cpu, evsel, avg);
|
|
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
|
|
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
|
|
runtime_cacherefs_stats[cpu].n != 0) {
|
|
runtime_cacherefs_stats[cpu].n != 0) {
|
|
total = avg_stats(&runtime_cacherefs_stats[cpu]);
|
|
total = avg_stats(&runtime_cacherefs_stats[cpu]);
|
|
@@ -842,10 +974,12 @@ static void print_stat(int argc, const char **argv)
|
|
}
|
|
}
|
|
|
|
|
|
if (!csv_output) {
|
|
if (!csv_output) {
|
|
- fprintf(stderr, "\n");
|
|
|
|
- fprintf(stderr, " %18.9f seconds time elapsed",
|
|
|
|
|
|
+ if (!null_run)
|
|
|
|
+ fprintf(stderr, "\n");
|
|
|
|
+ fprintf(stderr, " %17.9f seconds time elapsed",
|
|
avg_stats(&walltime_nsecs_stats)/1e9);
|
|
avg_stats(&walltime_nsecs_stats)/1e9);
|
|
if (run_count > 1) {
|
|
if (run_count > 1) {
|
|
|
|
+ fprintf(stderr, " ");
|
|
print_noise_pct(stddev_stats(&walltime_nsecs_stats),
|
|
print_noise_pct(stddev_stats(&walltime_nsecs_stats),
|
|
avg_stats(&walltime_nsecs_stats));
|
|
avg_stats(&walltime_nsecs_stats));
|
|
}
|
|
}
|