builtin-top.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149
  1. /*
  2. * kerneltop.c: show top kernel functions - performance counters showcase
  3. Build with:
  4. make -C Documentation/perf_counter/
  5. Sample output:
  6. ------------------------------------------------------------------------------
  7. KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
  8. ------------------------------------------------------------------------------
  9. weight RIP kernel function
  10. ______ ________________ _______________
  11. 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
  12. 33.00 - ffffffff804cb740 : sock_alloc_send_skb
  13. 31.26 - ffffffff804ce808 : skb_push
  14. 22.43 - ffffffff80510004 : tcp_established_options
  15. 19.00 - ffffffff8027d250 : find_get_page
  16. 15.76 - ffffffff804e4fc9 : eth_type_trans
  17. 15.20 - ffffffff804d8baa : dst_release
  18. 14.86 - ffffffff804cf5d8 : skb_release_head_state
  19. 14.00 - ffffffff802217d5 : read_hpet
  20. 12.00 - ffffffff804ffb7f : __ip_local_out
  21. 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
  22. 8.54 - ffffffff805001a3 : ip_queue_xmit
  23. */
  24. /*
  25. * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  26. *
  27. * Improvements and fixes by:
  28. *
  29. * Arjan van de Ven <arjan@linux.intel.com>
  30. * Yanmin Zhang <yanmin.zhang@intel.com>
  31. * Wu Fengguang <fengguang.wu@intel.com>
  32. * Mike Galbraith <efault@gmx.de>
  33. * Paul Mackerras <paulus@samba.org>
  34. *
  35. * Released under the GPL v2. (and only v2, not any later version)
  36. */
  37. #include "perf.h"
  38. #include "util/util.h"
  39. #include <getopt.h>
  40. #include <assert.h>
  41. #include <fcntl.h>
  42. #include <stdio.h>
  43. #include <errno.h>
  44. #include <time.h>
  45. #include <sched.h>
  46. #include <pthread.h>
  47. #include <sys/syscall.h>
  48. #include <sys/ioctl.h>
  49. #include <sys/poll.h>
  50. #include <sys/prctl.h>
  51. #include <sys/wait.h>
  52. #include <sys/uio.h>
  53. #include <sys/mman.h>
  54. #include <linux/unistd.h>
  55. #include <linux/types.h>
  56. static int system_wide = 0;
  57. static int nr_counters = 0;
  58. static __u64 event_id[MAX_COUNTERS] = {
  59. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
  60. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
  61. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
  62. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
  63. EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
  64. EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
  65. EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
  66. EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
  67. };
  68. static int default_interval = 100000;
  69. static int event_count[MAX_COUNTERS];
  70. static int fd[MAX_NR_CPUS][MAX_COUNTERS];
  71. static __u64 count_filter = 100;
  72. static int tid = -1;
  73. static int profile_cpu = -1;
  74. static int nr_cpus = 0;
  75. static int nmi = 1;
  76. static unsigned int realtime_prio = 0;
  77. static int group = 0;
  78. static unsigned int page_size;
  79. static unsigned int mmap_pages = 16;
  80. static int use_mmap = 0;
  81. static int use_munmap = 0;
  82. static int freq = 0;
  83. static char *vmlinux;
  84. static char *sym_filter;
  85. static unsigned long filter_start;
  86. static unsigned long filter_end;
  87. static int delay_secs = 2;
  88. static int zero;
  89. static int dump_symtab;
  90. static int scale;
  91. struct source_line {
  92. uint64_t EIP;
  93. unsigned long count;
  94. char *line;
  95. struct source_line *next;
  96. };
  97. static struct source_line *lines;
  98. static struct source_line **lines_tail;
  99. static const unsigned int default_count[] = {
  100. 1000000,
  101. 1000000,
  102. 10000,
  103. 10000,
  104. 1000000,
  105. 10000,
  106. };
  107. static char *hw_event_names[] = {
  108. "CPU cycles",
  109. "instructions",
  110. "cache references",
  111. "cache misses",
  112. "branches",
  113. "branch misses",
  114. "bus cycles",
  115. };
  116. static char *sw_event_names[] = {
  117. "cpu clock ticks",
  118. "task clock ticks",
  119. "pagefaults",
  120. "context switches",
  121. "CPU migrations",
  122. "minor faults",
  123. "major faults",
  124. };
  125. struct event_symbol {
  126. __u64 event;
  127. char *symbol;
  128. };
  129. static struct event_symbol event_symbols[] = {
  130. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
  131. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
  132. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
  133. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
  134. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
  135. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
  136. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
  137. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
  138. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
  139. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
  140. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
  141. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
  142. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
  143. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
  144. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
  145. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
  146. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
  147. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
  148. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
  149. };
  150. #define __PERF_COUNTER_FIELD(config, name) \
  151. ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
  152. #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
  153. #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
  154. #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
  155. #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
  156. static void display_events_help(void)
  157. {
  158. unsigned int i;
  159. __u64 e;
  160. printf(
  161. " -e EVENT --event=EVENT # symbolic-name abbreviations");
  162. for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
  163. int type, id;
  164. e = event_symbols[i].event;
  165. type = PERF_COUNTER_TYPE(e);
  166. id = PERF_COUNTER_ID(e);
  167. printf("\n %d:%d: %-20s",
  168. type, id, event_symbols[i].symbol);
  169. }
  170. printf("\n"
  171. " rNNN: raw PMU events (eventsel+umask)\n\n");
  172. }
  173. static void display_help(void)
  174. {
  175. printf(
  176. "Usage: kerneltop [<options>]\n"
  177. " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
  178. "KernelTop Options (up to %d event types can be specified at once):\n\n",
  179. MAX_COUNTERS);
  180. display_events_help();
  181. printf(
  182. " -c CNT --count=CNT # event period to sample\n\n"
  183. " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n"
  184. " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n"
  185. " -l # show scale factor for RR events\n"
  186. " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n"
  187. " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n"
  188. " -r prio --realtime=<prio> # event acquisition runs with SCHED_FIFO policy\n"
  189. " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n"
  190. " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
  191. " -z --zero # zero counts after display\n"
  192. " -D --dump_symtab # dump symbol table to stderr on startup\n"
  193. " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
  194. " -M --mmap_info # print mmap info stream\n"
  195. " -U --munmap_info # print munmap info stream\n"
  196. );
  197. exit(0);
  198. }
  199. static char *event_name(int ctr)
  200. {
  201. __u64 config = event_id[ctr];
  202. int type = PERF_COUNTER_TYPE(config);
  203. int id = PERF_COUNTER_ID(config);
  204. static char buf[32];
  205. if (PERF_COUNTER_RAW(config)) {
  206. sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
  207. return buf;
  208. }
  209. switch (type) {
  210. case PERF_TYPE_HARDWARE:
  211. if (id < PERF_HW_EVENTS_MAX)
  212. return hw_event_names[id];
  213. return "unknown-hardware";
  214. case PERF_TYPE_SOFTWARE:
  215. if (id < PERF_SW_EVENTS_MAX)
  216. return sw_event_names[id];
  217. return "unknown-software";
  218. default:
  219. break;
  220. }
  221. return "unknown";
  222. }
  223. /*
  224. * Each event can have multiple symbolic names.
  225. * Symbolic names are (almost) exactly matched.
  226. */
  227. static __u64 match_event_symbols(char *str)
  228. {
  229. __u64 config, id;
  230. int type;
  231. unsigned int i;
  232. if (sscanf(str, "r%llx", &config) == 1)
  233. return config | PERF_COUNTER_RAW_MASK;
  234. if (sscanf(str, "%d:%llu", &type, &id) == 2)
  235. return EID(type, id);
  236. for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
  237. if (!strncmp(str, event_symbols[i].symbol,
  238. strlen(event_symbols[i].symbol)))
  239. return event_symbols[i].event;
  240. }
  241. return ~0ULL;
  242. }
  243. static int parse_events(char *str)
  244. {
  245. __u64 config;
  246. again:
  247. if (nr_counters == MAX_COUNTERS)
  248. return -1;
  249. config = match_event_symbols(str);
  250. if (config == ~0ULL)
  251. return -1;
  252. event_id[nr_counters] = config;
  253. nr_counters++;
  254. str = strstr(str, ",");
  255. if (str) {
  256. str++;
  257. goto again;
  258. }
  259. return 0;
  260. }
  261. /*
  262. * Symbols
  263. */
  264. static uint64_t min_ip;
  265. static uint64_t max_ip = -1ll;
  266. struct sym_entry {
  267. unsigned long long addr;
  268. char *sym;
  269. unsigned long count[MAX_COUNTERS];
  270. int skip;
  271. struct source_line *source;
  272. };
  273. #define MAX_SYMS 100000
  274. static int sym_table_count;
  275. struct sym_entry *sym_filter_entry;
  276. static struct sym_entry sym_table[MAX_SYMS];
  277. static void show_details(struct sym_entry *sym);
  278. /*
  279. * Ordering weight: count-1 * count-2 * ... / count-n
  280. */
  281. static double sym_weight(const struct sym_entry *sym)
  282. {
  283. double weight;
  284. int counter;
  285. weight = sym->count[0];
  286. for (counter = 1; counter < nr_counters-1; counter++)
  287. weight *= sym->count[counter];
  288. weight /= (sym->count[counter] + 1);
  289. return weight;
  290. }
  291. static int compare(const void *__sym1, const void *__sym2)
  292. {
  293. const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
  294. return sym_weight(sym1) < sym_weight(sym2);
  295. }
  296. static long events;
  297. static long userspace_events;
  298. static const char CONSOLE_CLEAR[] = "";
  299. static struct sym_entry tmp[MAX_SYMS];
  300. static void print_sym_table(void)
  301. {
  302. int i, j, active_count, printed;
  303. int counter;
  304. float events_per_sec = events/delay_secs;
  305. float kevents_per_sec = (events-userspace_events)/delay_secs;
  306. float sum_kevents = 0.0;
  307. events = userspace_events = 0;
  308. /* Iterate over symbol table and copy/tally/decay active symbols. */
  309. for (i = 0, active_count = 0; i < sym_table_count; i++) {
  310. if (sym_table[i].count[0]) {
  311. tmp[active_count++] = sym_table[i];
  312. sum_kevents += sym_table[i].count[0];
  313. for (j = 0; j < nr_counters; j++)
  314. sym_table[i].count[j] = zero ? 0 : sym_table[i].count[j] * 7 / 8;
  315. }
  316. }
  317. qsort(tmp, active_count + 1, sizeof(tmp[0]), compare);
  318. write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
  319. printf(
  320. "------------------------------------------------------------------------------\n");
  321. printf( " KernelTop:%8.0f irqs/sec kernel:%4.1f%% [%s, ",
  322. events_per_sec,
  323. 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
  324. nmi ? "NMI" : "IRQ");
  325. if (nr_counters == 1)
  326. printf("%d ", event_count[0]);
  327. for (counter = 0; counter < nr_counters; counter++) {
  328. if (counter)
  329. printf("/");
  330. printf("%s", event_name(counter));
  331. }
  332. printf( "], ");
  333. if (tid != -1)
  334. printf(" (tid: %d", tid);
  335. else
  336. printf(" (all");
  337. if (profile_cpu != -1)
  338. printf(", cpu: %d)\n", profile_cpu);
  339. else {
  340. if (tid != -1)
  341. printf(")\n");
  342. else
  343. printf(", %d CPUs)\n", nr_cpus);
  344. }
  345. printf("------------------------------------------------------------------------------\n\n");
  346. if (nr_counters == 1)
  347. printf(" events pcnt");
  348. else
  349. printf(" weight events pcnt");
  350. printf(" RIP kernel function\n"
  351. " ______ ______ _____ ________________ _______________\n\n"
  352. );
  353. for (i = 0, printed = 0; i < active_count; i++) {
  354. float pcnt;
  355. if (++printed > 18 || tmp[i].count[0] < count_filter)
  356. break;
  357. pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents));
  358. if (nr_counters == 1)
  359. printf("%19.2f - %4.1f%% - %016llx : %s\n",
  360. sym_weight(tmp + i),
  361. pcnt, tmp[i].addr, tmp[i].sym);
  362. else
  363. printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n",
  364. sym_weight(tmp + i),
  365. tmp[i].count[0],
  366. pcnt, tmp[i].addr, tmp[i].sym);
  367. }
  368. if (sym_filter_entry)
  369. show_details(sym_filter_entry);
  370. {
  371. struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
  372. if (poll(&stdin_poll, 1, 0) == 1) {
  373. printf("key pressed - exiting.\n");
  374. exit(0);
  375. }
  376. }
  377. }
  378. static void *display_thread(void *arg)
  379. {
  380. printf("KernelTop refresh period: %d seconds\n", delay_secs);
  381. while (!sleep(delay_secs))
  382. print_sym_table();
  383. return NULL;
  384. }
  385. static int read_symbol(FILE *in, struct sym_entry *s)
  386. {
  387. static int filter_match = 0;
  388. char *sym, stype;
  389. char str[500];
  390. int rc, pos;
  391. rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
  392. if (rc == EOF)
  393. return -1;
  394. assert(rc == 3);
  395. /* skip until end of line: */
  396. pos = strlen(str);
  397. do {
  398. rc = fgetc(in);
  399. if (rc == '\n' || rc == EOF || pos >= 499)
  400. break;
  401. str[pos] = rc;
  402. pos++;
  403. } while (1);
  404. str[pos] = 0;
  405. sym = str;
  406. /* Filter out known duplicates and non-text symbols. */
  407. if (!strcmp(sym, "_text"))
  408. return 1;
  409. if (!min_ip && !strcmp(sym, "_stext"))
  410. return 1;
  411. if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
  412. return 1;
  413. if (stype != 'T' && stype != 't')
  414. return 1;
  415. if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
  416. return 1;
  417. if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
  418. return 1;
  419. s->sym = malloc(strlen(str)+1);
  420. assert(s->sym);
  421. strcpy((char *)s->sym, str);
  422. s->skip = 0;
  423. /* Tag events to be skipped. */
  424. if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
  425. s->skip = 1;
  426. else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
  427. s->skip = 1;
  428. else if (!strcmp("mwait_idle", s->sym))
  429. s->skip = 1;
  430. if (filter_match == 1) {
  431. filter_end = s->addr;
  432. filter_match = -1;
  433. if (filter_end - filter_start > 10000) {
  434. printf("hm, too large filter symbol <%s> - skipping.\n",
  435. sym_filter);
  436. printf("symbol filter start: %016lx\n", filter_start);
  437. printf(" end: %016lx\n", filter_end);
  438. filter_end = filter_start = 0;
  439. sym_filter = NULL;
  440. sleep(1);
  441. }
  442. }
  443. if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
  444. filter_match = 1;
  445. filter_start = s->addr;
  446. }
  447. return 0;
  448. }
  449. static int compare_addr(const void *__sym1, const void *__sym2)
  450. {
  451. const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
  452. return sym1->addr > sym2->addr;
  453. }
  454. static void sort_symbol_table(void)
  455. {
  456. int i, dups;
  457. do {
  458. qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
  459. for (i = 0, dups = 0; i < sym_table_count; i++) {
  460. if (sym_table[i].addr == sym_table[i+1].addr) {
  461. sym_table[i+1].addr = -1ll;
  462. dups++;
  463. }
  464. }
  465. sym_table_count -= dups;
  466. } while(dups);
  467. }
  468. static void parse_symbols(void)
  469. {
  470. struct sym_entry *last;
  471. FILE *kallsyms = fopen("/proc/kallsyms", "r");
  472. if (!kallsyms) {
  473. printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
  474. exit(-1);
  475. }
  476. while (!feof(kallsyms)) {
  477. if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
  478. sym_table_count++;
  479. assert(sym_table_count <= MAX_SYMS);
  480. }
  481. }
  482. sort_symbol_table();
  483. min_ip = sym_table[0].addr;
  484. max_ip = sym_table[sym_table_count-1].addr;
  485. last = sym_table + sym_table_count++;
  486. last->addr = -1ll;
  487. last->sym = "<end>";
  488. if (filter_end) {
  489. int count;
  490. for (count=0; count < sym_table_count; count ++) {
  491. if (!strcmp(sym_table[count].sym, sym_filter)) {
  492. sym_filter_entry = &sym_table[count];
  493. break;
  494. }
  495. }
  496. }
  497. if (dump_symtab) {
  498. int i;
  499. for (i = 0; i < sym_table_count; i++)
  500. fprintf(stderr, "%llx %s\n",
  501. sym_table[i].addr, sym_table[i].sym);
  502. }
  503. }
  504. /*
  505. * Source lines
  506. */
  507. static void parse_vmlinux(char *filename)
  508. {
  509. FILE *file;
  510. char command[PATH_MAX*2];
  511. if (!filename)
  512. return;
  513. sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
  514. file = popen(command, "r");
  515. if (!file)
  516. return;
  517. lines_tail = &lines;
  518. while (!feof(file)) {
  519. struct source_line *src;
  520. size_t dummy = 0;
  521. char *c;
  522. src = malloc(sizeof(struct source_line));
  523. assert(src != NULL);
  524. memset(src, 0, sizeof(struct source_line));
  525. if (getline(&src->line, &dummy, file) < 0)
  526. break;
  527. if (!src->line)
  528. break;
  529. c = strchr(src->line, '\n');
  530. if (c)
  531. *c = 0;
  532. src->next = NULL;
  533. *lines_tail = src;
  534. lines_tail = &src->next;
  535. if (strlen(src->line)>8 && src->line[8] == ':')
  536. src->EIP = strtoull(src->line, NULL, 16);
  537. if (strlen(src->line)>8 && src->line[16] == ':')
  538. src->EIP = strtoull(src->line, NULL, 16);
  539. }
  540. pclose(file);
  541. }
  542. static void record_precise_ip(uint64_t ip)
  543. {
  544. struct source_line *line;
  545. for (line = lines; line; line = line->next) {
  546. if (line->EIP == ip)
  547. line->count++;
  548. if (line->EIP > ip)
  549. break;
  550. }
  551. }
  552. static void lookup_sym_in_vmlinux(struct sym_entry *sym)
  553. {
  554. struct source_line *line;
  555. char pattern[PATH_MAX];
  556. sprintf(pattern, "<%s>:", sym->sym);
  557. for (line = lines; line; line = line->next) {
  558. if (strstr(line->line, pattern)) {
  559. sym->source = line;
  560. break;
  561. }
  562. }
  563. }
  564. static void show_lines(struct source_line *line_queue, int line_queue_count)
  565. {
  566. int i;
  567. struct source_line *line;
  568. line = line_queue;
  569. for (i = 0; i < line_queue_count; i++) {
  570. printf("%8li\t%s\n", line->count, line->line);
  571. line = line->next;
  572. }
  573. }
  574. #define TRACE_COUNT 3
  575. static void show_details(struct sym_entry *sym)
  576. {
  577. struct source_line *line;
  578. struct source_line *line_queue = NULL;
  579. int displayed = 0;
  580. int line_queue_count = 0;
  581. if (!sym->source)
  582. lookup_sym_in_vmlinux(sym);
  583. if (!sym->source)
  584. return;
  585. printf("Showing details for %s\n", sym->sym);
  586. line = sym->source;
  587. while (line) {
  588. if (displayed && strstr(line->line, ">:"))
  589. break;
  590. if (!line_queue_count)
  591. line_queue = line;
  592. line_queue_count ++;
  593. if (line->count >= count_filter) {
  594. show_lines(line_queue, line_queue_count);
  595. line_queue_count = 0;
  596. line_queue = NULL;
  597. } else if (line_queue_count > TRACE_COUNT) {
  598. line_queue = line_queue->next;
  599. line_queue_count --;
  600. }
  601. line->count = 0;
  602. displayed++;
  603. if (displayed > 300)
  604. break;
  605. line = line->next;
  606. }
  607. }
  608. /*
  609. * Binary search in the histogram table and record the hit:
  610. */
  611. static void record_ip(uint64_t ip, int counter)
  612. {
  613. int left_idx, middle_idx, right_idx, idx;
  614. unsigned long left, middle, right;
  615. record_precise_ip(ip);
  616. left_idx = 0;
  617. right_idx = sym_table_count-1;
  618. assert(ip <= max_ip && ip >= min_ip);
  619. while (left_idx + 1 < right_idx) {
  620. middle_idx = (left_idx + right_idx) / 2;
  621. left = sym_table[ left_idx].addr;
  622. middle = sym_table[middle_idx].addr;
  623. right = sym_table[ right_idx].addr;
  624. if (!(left <= middle && middle <= right)) {
  625. printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
  626. printf("%d %d %d\n", left_idx, middle_idx, right_idx);
  627. }
  628. assert(left <= middle && middle <= right);
  629. if (!(left <= ip && ip <= right)) {
  630. printf(" left: %016lx\n", left);
  631. printf(" ip: %016lx\n", (unsigned long)ip);
  632. printf("right: %016lx\n", right);
  633. }
  634. assert(left <= ip && ip <= right);
  635. /*
  636. * [ left .... target .... middle .... right ]
  637. * => right := middle
  638. */
  639. if (ip < middle) {
  640. right_idx = middle_idx;
  641. continue;
  642. }
  643. /*
  644. * [ left .... middle ... target ... right ]
  645. * => left := middle
  646. */
  647. left_idx = middle_idx;
  648. }
  649. idx = left_idx;
  650. if (!sym_table[idx].skip)
  651. sym_table[idx].count[counter]++;
  652. else events--;
  653. }
  654. static void process_event(uint64_t ip, int counter)
  655. {
  656. events++;
  657. if (ip < min_ip || ip > max_ip) {
  658. userspace_events++;
  659. return;
  660. }
  661. record_ip(ip, counter);
  662. }
  663. static void process_options(int argc, char **argv)
  664. {
  665. int error = 0, counter;
  666. for (;;) {
  667. int option_index = 0;
  668. /** Options for getopt */
  669. static struct option long_options[] = {
  670. {"count", required_argument, NULL, 'c'},
  671. {"cpu", required_argument, NULL, 'C'},
  672. {"delay", required_argument, NULL, 'd'},
  673. {"dump_symtab", no_argument, NULL, 'D'},
  674. {"event", required_argument, NULL, 'e'},
  675. {"filter", required_argument, NULL, 'f'},
  676. {"group", required_argument, NULL, 'g'},
  677. {"help", no_argument, NULL, 'h'},
  678. {"nmi", required_argument, NULL, 'n'},
  679. {"mmap_info", no_argument, NULL, 'M'},
  680. {"mmap_pages", required_argument, NULL, 'm'},
  681. {"munmap_info", no_argument, NULL, 'U'},
  682. {"pid", required_argument, NULL, 'p'},
  683. {"realtime", required_argument, NULL, 'r'},
  684. {"scale", no_argument, NULL, 'l'},
  685. {"symbol", required_argument, NULL, 's'},
  686. {"stat", no_argument, NULL, 'S'},
  687. {"vmlinux", required_argument, NULL, 'x'},
  688. {"zero", no_argument, NULL, 'z'},
  689. {"freq", required_argument, NULL, 'F'},
  690. {NULL, 0, NULL, 0 }
  691. };
  692. int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMUF:",
  693. long_options, &option_index);
  694. if (c == -1)
  695. break;
  696. switch (c) {
  697. case 'a': system_wide = 1; break;
  698. case 'c': default_interval = atoi(optarg); break;
  699. case 'C':
  700. /* CPU and PID are mutually exclusive */
  701. if (tid != -1) {
  702. printf("WARNING: CPU switch overriding PID\n");
  703. sleep(1);
  704. tid = -1;
  705. }
  706. profile_cpu = atoi(optarg); break;
  707. case 'd': delay_secs = atoi(optarg); break;
  708. case 'D': dump_symtab = 1; break;
  709. case 'e': error = parse_events(optarg); break;
  710. case 'f': count_filter = atoi(optarg); break;
  711. case 'g': group = atoi(optarg); break;
  712. case 'h': display_help(); break;
  713. case 'l': scale = 1; break;
  714. case 'n': nmi = atoi(optarg); break;
  715. case 'p':
  716. /* CPU and PID are mutually exclusive */
  717. if (profile_cpu != -1) {
  718. printf("WARNING: PID switch overriding CPU\n");
  719. sleep(1);
  720. profile_cpu = -1;
  721. }
  722. tid = atoi(optarg); break;
  723. case 'r': realtime_prio = atoi(optarg); break;
  724. case 's': sym_filter = strdup(optarg); break;
  725. case 'x': vmlinux = strdup(optarg); break;
  726. case 'z': zero = 1; break;
  727. case 'm': mmap_pages = atoi(optarg); break;
  728. case 'M': use_mmap = 1; break;
  729. case 'U': use_munmap = 1; break;
  730. case 'F': freq = 1; default_interval = atoi(optarg); break;
  731. default: error = 1; break;
  732. }
  733. }
  734. if (error)
  735. display_help();
  736. if (!nr_counters) {
  737. nr_counters = 1;
  738. event_id[0] = 0;
  739. }
  740. for (counter = 0; counter < nr_counters; counter++) {
  741. if (event_count[counter])
  742. continue;
  743. event_count[counter] = default_interval;
  744. }
  745. }
  746. struct mmap_data {
  747. int counter;
  748. void *base;
  749. unsigned int mask;
  750. unsigned int prev;
  751. };
  752. static unsigned int mmap_read_head(struct mmap_data *md)
  753. {
  754. struct perf_counter_mmap_page *pc = md->base;
  755. int head;
  756. head = pc->data_head;
  757. rmb();
  758. return head;
  759. }
  760. struct timeval last_read, this_read;
  761. static void mmap_read(struct mmap_data *md)
  762. {
  763. unsigned int head = mmap_read_head(md);
  764. unsigned int old = md->prev;
  765. unsigned char *data = md->base + page_size;
  766. int diff;
  767. gettimeofday(&this_read, NULL);
  768. /*
  769. * If we're further behind than half the buffer, there's a chance
  770. * the writer will bite our tail and screw up the events under us.
  771. *
  772. * If we somehow ended up ahead of the head, we got messed up.
  773. *
  774. * In either case, truncate and restart at head.
  775. */
  776. diff = head - old;
  777. if (diff > md->mask / 2 || diff < 0) {
  778. struct timeval iv;
  779. unsigned long msecs;
  780. timersub(&this_read, &last_read, &iv);
  781. msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
  782. fprintf(stderr, "WARNING: failed to keep up with mmap data."
  783. " Last read %lu msecs ago.\n", msecs);
  784. /*
  785. * head points to a known good entry, start there.
  786. */
  787. old = head;
  788. }
  789. last_read = this_read;
  790. for (; old != head;) {
  791. struct ip_event {
  792. struct perf_event_header header;
  793. __u64 ip;
  794. __u32 pid, tid;
  795. };
  796. struct mmap_event {
  797. struct perf_event_header header;
  798. __u32 pid, tid;
  799. __u64 start;
  800. __u64 len;
  801. __u64 pgoff;
  802. char filename[PATH_MAX];
  803. };
  804. typedef union event_union {
  805. struct perf_event_header header;
  806. struct ip_event ip;
  807. struct mmap_event mmap;
  808. } event_t;
  809. event_t *event = (event_t *)&data[old & md->mask];
  810. event_t event_copy;
  811. size_t size = event->header.size;
  812. /*
  813. * Event straddles the mmap boundary -- header should always
  814. * be inside due to u64 alignment of output.
  815. */
  816. if ((old & md->mask) + size != ((old + size) & md->mask)) {
  817. unsigned int offset = old;
  818. unsigned int len = min(sizeof(*event), size), cpy;
  819. void *dst = &event_copy;
  820. do {
  821. cpy = min(md->mask + 1 - (offset & md->mask), len);
  822. memcpy(dst, &data[offset & md->mask], cpy);
  823. offset += cpy;
  824. dst += cpy;
  825. len -= cpy;
  826. } while (len);
  827. event = &event_copy;
  828. }
  829. old += size;
  830. if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
  831. if (event->header.type & PERF_RECORD_IP)
  832. process_event(event->ip.ip, md->counter);
  833. } else {
  834. switch (event->header.type) {
  835. case PERF_EVENT_MMAP:
  836. case PERF_EVENT_MUNMAP:
  837. printf("%s: %Lu %Lu %Lu %s\n",
  838. event->header.type == PERF_EVENT_MMAP
  839. ? "mmap" : "munmap",
  840. event->mmap.start,
  841. event->mmap.len,
  842. event->mmap.pgoff,
  843. event->mmap.filename);
  844. break;
  845. }
  846. }
  847. }
  848. md->prev = old;
  849. }
  850. static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
  851. static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
  852. int cmd_top(int argc, char **argv, const char *prefix)
  853. {
  854. struct perf_counter_hw_event hw_event;
  855. pthread_t thread;
  856. int i, counter, group_fd, nr_poll = 0;
  857. unsigned int cpu;
  858. int ret;
  859. page_size = sysconf(_SC_PAGE_SIZE);
  860. process_options(argc, argv);
  861. nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
  862. assert(nr_cpus <= MAX_NR_CPUS);
  863. assert(nr_cpus >= 0);
  864. if (tid != -1 || profile_cpu != -1)
  865. nr_cpus = 1;
  866. parse_symbols();
  867. if (vmlinux && sym_filter_entry)
  868. parse_vmlinux(vmlinux);
  869. for (i = 0; i < nr_cpus; i++) {
  870. group_fd = -1;
  871. for (counter = 0; counter < nr_counters; counter++) {
  872. cpu = profile_cpu;
  873. if (tid == -1 && profile_cpu == -1)
  874. cpu = i;
  875. memset(&hw_event, 0, sizeof(hw_event));
  876. hw_event.config = event_id[counter];
  877. hw_event.irq_period = event_count[counter];
  878. hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID;
  879. hw_event.nmi = nmi;
  880. hw_event.mmap = use_mmap;
  881. hw_event.munmap = use_munmap;
  882. hw_event.freq = freq;
  883. fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
  884. if (fd[i][counter] < 0) {
  885. int err = errno;
  886. printf("kerneltop error: syscall returned with %d (%s)\n",
  887. fd[i][counter], strerror(err));
  888. if (err == EPERM)
  889. printf("Are you root?\n");
  890. exit(-1);
  891. }
  892. assert(fd[i][counter] >= 0);
  893. fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
  894. /*
  895. * First counter acts as the group leader:
  896. */
  897. if (group && group_fd == -1)
  898. group_fd = fd[i][counter];
  899. event_array[nr_poll].fd = fd[i][counter];
  900. event_array[nr_poll].events = POLLIN;
  901. nr_poll++;
  902. mmap_array[i][counter].counter = counter;
  903. mmap_array[i][counter].prev = 0;
  904. mmap_array[i][counter].mask = mmap_pages*page_size - 1;
  905. mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
  906. PROT_READ, MAP_SHARED, fd[i][counter], 0);
  907. if (mmap_array[i][counter].base == MAP_FAILED) {
  908. printf("kerneltop error: failed to mmap with %d (%s)\n",
  909. errno, strerror(errno));
  910. exit(-1);
  911. }
  912. }
  913. }
  914. if (pthread_create(&thread, NULL, display_thread, NULL)) {
  915. printf("Could not create display thread.\n");
  916. exit(-1);
  917. }
  918. if (realtime_prio) {
  919. struct sched_param param;
  920. param.sched_priority = realtime_prio;
  921. if (sched_setscheduler(0, SCHED_FIFO, &param)) {
  922. printf("Could not set realtime priority.\n");
  923. exit(-1);
  924. }
  925. }
  926. while (1) {
  927. int hits = events;
  928. for (i = 0; i < nr_cpus; i++) {
  929. for (counter = 0; counter < nr_counters; counter++)
  930. mmap_read(&mmap_array[i][counter]);
  931. }
  932. if (hits == events)
  933. ret = poll(event_array, nr_poll, 100);
  934. }
  935. return 0;
  936. }