builtin-stat.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. /*
  2. * builtin-stat.c
  3. *
  4. * Builtin stat command: Give a precise performance counters summary
  5. * overview about any workload, CPU or specific PID.
  6. *
  7. * Sample output:
  8. $ perf stat ~/hackbench 10
  9. Time: 0.104
  10. Performance counter stats for '/home/mingo/hackbench':
  11. 1255.538611 task clock ticks # 10.143 CPU utilization factor
  12. 54011 context switches # 0.043 M/sec
  13. 385 CPU migrations # 0.000 M/sec
  14. 17755 pagefaults # 0.014 M/sec
  15. 3808323185 CPU cycles # 3033.219 M/sec
  16. 1575111190 instructions # 1254.530 M/sec
  17. 17367895 cache references # 13.833 M/sec
  18. 7674421 cache misses # 6.112 M/sec
  19. Wall-clock time elapsed: 123.786620 msecs
  20. *
  21. * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  22. *
  23. * Improvements and fixes by:
  24. *
  25. * Arjan van de Ven <arjan@linux.intel.com>
  26. * Yanmin Zhang <yanmin.zhang@intel.com>
  27. * Wu Fengguang <fengguang.wu@intel.com>
  28. * Mike Galbraith <efault@gmx.de>
  29. * Paul Mackerras <paulus@samba.org>
  30. *
  31. * Released under the GPL v2. (and only v2, not any later version)
  32. */
  33. #include "perf.h"
  34. #include "builtin.h"
  35. #include "util/util.h"
  36. #include "util/parse-options.h"
  37. #include "util/parse-events.h"
  38. #include <sys/prctl.h>
  39. static int system_wide = 0;
  40. static int inherit = 1;
  41. static __u64 default_event_id[MAX_COUNTERS] = {
  42. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
  43. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
  44. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
  45. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
  46. EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
  47. EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
  48. EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
  49. EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
  50. };
  51. static int default_interval = 100000;
  52. static int event_count[MAX_COUNTERS];
  53. static int fd[MAX_NR_CPUS][MAX_COUNTERS];
  54. static int target_pid = -1;
  55. static int nr_cpus = 0;
  56. static unsigned int page_size;
  57. static int scale = 1;
  58. static const unsigned int default_count[] = {
  59. 1000000,
  60. 1000000,
  61. 10000,
  62. 10000,
  63. 1000000,
  64. 10000,
  65. };
  66. static __u64 event_res[MAX_COUNTERS][3];
  67. static __u64 event_scaled[MAX_COUNTERS];
  68. static __u64 runtime_nsecs;
  69. static __u64 walltime_nsecs;
  70. static void create_perfstat_counter(int counter)
  71. {
  72. struct perf_counter_attr attr;
  73. memset(&attr, 0, sizeof(attr));
  74. attr.config = event_id[counter];
  75. attr.sample_type = 0;
  76. attr.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
  77. attr.exclude_user = event_mask[counter] & EVENT_MASK_USER;
  78. if (scale)
  79. attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
  80. PERF_FORMAT_TOTAL_TIME_RUNNING;
  81. if (system_wide) {
  82. int cpu;
  83. for (cpu = 0; cpu < nr_cpus; cpu ++) {
  84. fd[cpu][counter] = sys_perf_counter_open(&attr, -1, cpu, -1, 0);
  85. if (fd[cpu][counter] < 0) {
  86. printf("perfstat error: syscall returned with %d (%s)\n",
  87. fd[cpu][counter], strerror(errno));
  88. exit(-1);
  89. }
  90. }
  91. } else {
  92. attr.inherit = inherit;
  93. attr.disabled = 1;
  94. fd[0][counter] = sys_perf_counter_open(&attr, 0, -1, -1, 0);
  95. if (fd[0][counter] < 0) {
  96. printf("perfstat error: syscall returned with %d (%s)\n",
  97. fd[0][counter], strerror(errno));
  98. exit(-1);
  99. }
  100. }
  101. }
  102. /*
  103. * Does the counter have nsecs as a unit?
  104. */
  105. static inline int nsec_counter(int counter)
  106. {
  107. if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK))
  108. return 1;
  109. if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK))
  110. return 1;
  111. return 0;
  112. }
  113. /*
  114. * Read out the results of a single counter:
  115. */
  116. static void read_counter(int counter)
  117. {
  118. __u64 *count, single_count[3];
  119. ssize_t res;
  120. int cpu, nv;
  121. int scaled;
  122. count = event_res[counter];
  123. count[0] = count[1] = count[2] = 0;
  124. nv = scale ? 3 : 1;
  125. for (cpu = 0; cpu < nr_cpus; cpu ++) {
  126. res = read(fd[cpu][counter], single_count, nv * sizeof(__u64));
  127. assert(res == nv * sizeof(__u64));
  128. count[0] += single_count[0];
  129. if (scale) {
  130. count[1] += single_count[1];
  131. count[2] += single_count[2];
  132. }
  133. }
  134. scaled = 0;
  135. if (scale) {
  136. if (count[2] == 0) {
  137. event_scaled[counter] = -1;
  138. count[0] = 0;
  139. return;
  140. }
  141. if (count[2] < count[1]) {
  142. event_scaled[counter] = 1;
  143. count[0] = (unsigned long long)
  144. ((double)count[0] * count[1] / count[2] + 0.5);
  145. }
  146. }
  147. /*
  148. * Save the full runtime - to allow normalization during printout:
  149. */
  150. if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK))
  151. runtime_nsecs = count[0];
  152. }
  153. /*
  154. * Print out the results of a single counter:
  155. */
  156. static void print_counter(int counter)
  157. {
  158. __u64 *count;
  159. int scaled;
  160. count = event_res[counter];
  161. scaled = event_scaled[counter];
  162. if (scaled == -1) {
  163. fprintf(stderr, " %14s %-20s\n",
  164. "<not counted>", event_name(counter));
  165. return;
  166. }
  167. if (nsec_counter(counter)) {
  168. double msecs = (double)count[0] / 1000000;
  169. fprintf(stderr, " %14.6f %-20s",
  170. msecs, event_name(counter));
  171. if (event_id[counter] ==
  172. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
  173. fprintf(stderr, " # %11.3f CPU utilization factor",
  174. (double)count[0] / (double)walltime_nsecs);
  175. }
  176. } else {
  177. fprintf(stderr, " %14Ld %-20s",
  178. count[0], event_name(counter));
  179. if (runtime_nsecs)
  180. fprintf(stderr, " # %11.3f M/sec",
  181. (double)count[0]/runtime_nsecs*1000.0);
  182. }
  183. if (scaled)
  184. fprintf(stderr, " (scaled from %.2f%%)",
  185. (double) count[2] / count[1] * 100);
  186. fprintf(stderr, "\n");
  187. }
  188. static int do_perfstat(int argc, const char **argv)
  189. {
  190. unsigned long long t0, t1;
  191. int counter;
  192. int status;
  193. int pid;
  194. int i;
  195. if (!system_wide)
  196. nr_cpus = 1;
  197. for (counter = 0; counter < nr_counters; counter++)
  198. create_perfstat_counter(counter);
  199. /*
  200. * Enable counters and exec the command:
  201. */
  202. t0 = rdclock();
  203. prctl(PR_TASK_PERF_COUNTERS_ENABLE);
  204. if ((pid = fork()) < 0)
  205. perror("failed to fork");
  206. if (!pid) {
  207. if (execvp(argv[0], (char **)argv)) {
  208. perror(argv[0]);
  209. exit(-1);
  210. }
  211. }
  212. while (wait(&status) >= 0)
  213. ;
  214. prctl(PR_TASK_PERF_COUNTERS_DISABLE);
  215. t1 = rdclock();
  216. walltime_nsecs = t1 - t0;
  217. fflush(stdout);
  218. fprintf(stderr, "\n");
  219. fprintf(stderr, " Performance counter stats for \'%s", argv[0]);
  220. for (i = 1; i < argc; i++)
  221. fprintf(stderr, " %s", argv[i]);
  222. fprintf(stderr, "\':\n");
  223. fprintf(stderr, "\n");
  224. for (counter = 0; counter < nr_counters; counter++)
  225. read_counter(counter);
  226. for (counter = 0; counter < nr_counters; counter++)
  227. print_counter(counter);
  228. fprintf(stderr, "\n");
  229. fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
  230. (double)(t1-t0)/1e6);
  231. fprintf(stderr, "\n");
  232. return 0;
  233. }
  234. static void skip_signal(int signo)
  235. {
  236. }
  237. static const char * const stat_usage[] = {
  238. "perf stat [<options>] <command>",
  239. NULL
  240. };
  241. static char events_help_msg[EVENTS_HELP_MAX];
  242. static const struct option options[] = {
  243. OPT_CALLBACK('e', "event", NULL, "event",
  244. events_help_msg, parse_events),
  245. OPT_INTEGER('c', "count", &default_interval,
  246. "event period to sample"),
  247. OPT_BOOLEAN('i', "inherit", &inherit,
  248. "child tasks inherit counters"),
  249. OPT_INTEGER('p', "pid", &target_pid,
  250. "stat events on existing pid"),
  251. OPT_BOOLEAN('a', "all-cpus", &system_wide,
  252. "system-wide collection from all CPUs"),
  253. OPT_BOOLEAN('l', "scale", &scale,
  254. "scale/normalize counters"),
  255. OPT_END()
  256. };
  257. int cmd_stat(int argc, const char **argv, const char *prefix)
  258. {
  259. int counter;
  260. page_size = sysconf(_SC_PAGE_SIZE);
  261. create_events_help(events_help_msg);
  262. memcpy(event_id, default_event_id, sizeof(default_event_id));
  263. argc = parse_options(argc, argv, options, stat_usage, 0);
  264. if (!argc)
  265. usage_with_options(stat_usage, options);
  266. if (!nr_counters) {
  267. nr_counters = 8;
  268. }
  269. for (counter = 0; counter < nr_counters; counter++) {
  270. if (event_count[counter])
  271. continue;
  272. event_count[counter] = default_interval;
  273. }
  274. nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
  275. assert(nr_cpus <= MAX_NR_CPUS);
  276. assert(nr_cpus >= 0);
  277. /*
  278. * We dont want to block the signals - that would cause
  279. * child tasks to inherit that and Ctrl-C would not work.
  280. * What we want is for Ctrl-C to work in the exec()-ed
  281. * task, but being ignored by perf stat itself:
  282. */
  283. signal(SIGINT, skip_signal);
  284. signal(SIGALRM, skip_signal);
  285. signal(SIGABRT, skip_signal);
  286. return do_perfstat(argc, argv);
  287. }