builtin-top.c 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204
  1. /*
  2. * kerneltop.c: show top kernel functions - performance counters showcase
  3. Build with:
  4. cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
  5. Sample output:
  6. ------------------------------------------------------------------------------
  7. KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
  8. ------------------------------------------------------------------------------
  9. weight RIP kernel function
  10. ______ ________________ _______________
  11. 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
  12. 33.00 - ffffffff804cb740 : sock_alloc_send_skb
  13. 31.26 - ffffffff804ce808 : skb_push
  14. 22.43 - ffffffff80510004 : tcp_established_options
  15. 19.00 - ffffffff8027d250 : find_get_page
  16. 15.76 - ffffffff804e4fc9 : eth_type_trans
  17. 15.20 - ffffffff804d8baa : dst_release
  18. 14.86 - ffffffff804cf5d8 : skb_release_head_state
  19. 14.00 - ffffffff802217d5 : read_hpet
  20. 12.00 - ffffffff804ffb7f : __ip_local_out
  21. 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
  22. 8.54 - ffffffff805001a3 : ip_queue_xmit
  23. */
  24. /*
  25. * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  26. *
  27. * Improvements and fixes by:
  28. *
  29. * Arjan van de Ven <arjan@linux.intel.com>
  30. * Yanmin Zhang <yanmin.zhang@intel.com>
  31. * Wu Fengguang <fengguang.wu@intel.com>
  32. * Mike Galbraith <efault@gmx.de>
  33. * Paul Mackerras <paulus@samba.org>
  34. *
  35. * Released under the GPL v2. (and only v2, not any later version)
  36. */
  37. #include "util.h"
  38. #include <getopt.h>
  39. #include <assert.h>
  40. #include <fcntl.h>
  41. #include <stdio.h>
  42. #include <errno.h>
  43. #include <ctype.h>
  44. #include <time.h>
  45. #include <sched.h>
  46. #include <pthread.h>
  47. #include <sys/syscall.h>
  48. #include <sys/ioctl.h>
  49. #include <sys/poll.h>
  50. #include <sys/prctl.h>
  51. #include <sys/wait.h>
  52. #include <sys/uio.h>
  53. #include <sys/mman.h>
  54. #include <linux/unistd.h>
  55. #include <linux/types.h>
  56. #include "../../include/linux/perf_counter.h"
  57. /*
  58. * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
  59. * counters in the current task.
  60. */
  61. #define PR_TASK_PERF_COUNTERS_DISABLE 31
  62. #define PR_TASK_PERF_COUNTERS_ENABLE 32
  63. #define rdclock() \
  64. ({ \
  65. struct timespec ts; \
  66. \
  67. clock_gettime(CLOCK_MONOTONIC, &ts); \
  68. ts.tv_sec * 1000000000ULL + ts.tv_nsec; \
  69. })
  70. /*
  71. * Pick up some kernel type conventions:
  72. */
  73. #define __user
  74. #define asmlinkage
  75. #ifdef __x86_64__
  76. #define __NR_perf_counter_open 295
  77. #define rmb() asm volatile("lfence" ::: "memory")
  78. #define cpu_relax() asm volatile("rep; nop" ::: "memory");
  79. #endif
  80. #ifdef __i386__
  81. #define __NR_perf_counter_open 333
  82. #define rmb() asm volatile("lfence" ::: "memory")
  83. #define cpu_relax() asm volatile("rep; nop" ::: "memory");
  84. #endif
  85. #ifdef __powerpc__
  86. #define __NR_perf_counter_open 319
  87. #define rmb() asm volatile ("sync" ::: "memory")
  88. #define cpu_relax() asm volatile ("" ::: "memory");
  89. #endif
  90. #define unlikely(x) __builtin_expect(!!(x), 0)
  91. #define min(x, y) ({ \
  92. typeof(x) _min1 = (x); \
  93. typeof(y) _min2 = (y); \
  94. (void) (&_min1 == &_min2); \
  95. _min1 < _min2 ? _min1 : _min2; })
  96. asmlinkage int sys_perf_counter_open(
  97. struct perf_counter_hw_event *hw_event_uptr __user,
  98. pid_t pid,
  99. int cpu,
  100. int group_fd,
  101. unsigned long flags)
  102. {
  103. return syscall(
  104. __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
  105. }
  106. #define MAX_COUNTERS 64
  107. #define MAX_NR_CPUS 256
  108. #define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
  109. static int system_wide = 0;
  110. static int nr_counters = 0;
  111. static __u64 event_id[MAX_COUNTERS] = {
  112. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
  113. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
  114. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
  115. EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
  116. EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
  117. EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
  118. EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
  119. EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
  120. };
  121. static int default_interval = 100000;
  122. static int event_count[MAX_COUNTERS];
  123. static int fd[MAX_NR_CPUS][MAX_COUNTERS];
  124. static __u64 count_filter = 100;
  125. static int tid = -1;
  126. static int profile_cpu = -1;
  127. static int nr_cpus = 0;
  128. static int nmi = 1;
  129. static unsigned int realtime_prio = 0;
  130. static int group = 0;
  131. static unsigned int page_size;
  132. static unsigned int mmap_pages = 16;
  133. static int use_mmap = 0;
  134. static int use_munmap = 0;
  135. static char *vmlinux;
  136. static char *sym_filter;
  137. static unsigned long filter_start;
  138. static unsigned long filter_end;
  139. static int delay_secs = 2;
  140. static int zero;
  141. static int dump_symtab;
  142. static int scale;
  143. struct source_line {
  144. uint64_t EIP;
  145. unsigned long count;
  146. char *line;
  147. struct source_line *next;
  148. };
  149. static struct source_line *lines;
  150. static struct source_line **lines_tail;
  151. static const unsigned int default_count[] = {
  152. 1000000,
  153. 1000000,
  154. 10000,
  155. 10000,
  156. 1000000,
  157. 10000,
  158. };
  159. static char *hw_event_names[] = {
  160. "CPU cycles",
  161. "instructions",
  162. "cache references",
  163. "cache misses",
  164. "branches",
  165. "branch misses",
  166. "bus cycles",
  167. };
  168. static char *sw_event_names[] = {
  169. "cpu clock ticks",
  170. "task clock ticks",
  171. "pagefaults",
  172. "context switches",
  173. "CPU migrations",
  174. "minor faults",
  175. "major faults",
  176. };
  177. struct event_symbol {
  178. __u64 event;
  179. char *symbol;
  180. };
  181. static struct event_symbol event_symbols[] = {
  182. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
  183. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
  184. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
  185. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
  186. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
  187. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
  188. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
  189. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
  190. {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
  191. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
  192. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
  193. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
  194. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
  195. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
  196. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
  197. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
  198. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
  199. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
  200. {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
  201. };
  202. #define __PERF_COUNTER_FIELD(config, name) \
  203. ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
  204. #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
  205. #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
  206. #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
  207. #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
  208. static void display_events_help(void)
  209. {
  210. unsigned int i;
  211. __u64 e;
  212. printf(
  213. " -e EVENT --event=EVENT # symbolic-name abbreviations");
  214. for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
  215. int type, id;
  216. e = event_symbols[i].event;
  217. type = PERF_COUNTER_TYPE(e);
  218. id = PERF_COUNTER_ID(e);
  219. printf("\n %d:%d: %-20s",
  220. type, id, event_symbols[i].symbol);
  221. }
  222. printf("\n"
  223. " rNNN: raw PMU events (eventsel+umask)\n\n");
  224. }
  225. static void display_help(void)
  226. {
  227. printf(
  228. "Usage: kerneltop [<options>]\n"
  229. " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
  230. "KernelTop Options (up to %d event types can be specified at once):\n\n",
  231. MAX_COUNTERS);
  232. display_events_help();
  233. printf(
  234. " -c CNT --count=CNT # event period to sample\n\n"
  235. " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n"
  236. " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n"
  237. " -l # show scale factor for RR events\n"
  238. " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n"
  239. " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n"
  240. " -r prio --realtime=<prio> # event acquisition runs with SCHED_FIFO policy\n"
  241. " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n"
  242. " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
  243. " -z --zero # zero counts after display\n"
  244. " -D --dump_symtab # dump symbol table to stderr on startup\n"
  245. " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
  246. " -M --mmap_info # print mmap info stream\n"
  247. " -U --munmap_info # print munmap info stream\n"
  248. );
  249. exit(0);
  250. }
  251. static char *event_name(int ctr)
  252. {
  253. __u64 config = event_id[ctr];
  254. int type = PERF_COUNTER_TYPE(config);
  255. int id = PERF_COUNTER_ID(config);
  256. static char buf[32];
  257. if (PERF_COUNTER_RAW(config)) {
  258. sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
  259. return buf;
  260. }
  261. switch (type) {
  262. case PERF_TYPE_HARDWARE:
  263. if (id < PERF_HW_EVENTS_MAX)
  264. return hw_event_names[id];
  265. return "unknown-hardware";
  266. case PERF_TYPE_SOFTWARE:
  267. if (id < PERF_SW_EVENTS_MAX)
  268. return sw_event_names[id];
  269. return "unknown-software";
  270. default:
  271. break;
  272. }
  273. return "unknown";
  274. }
  275. /*
  276. * Each event can have multiple symbolic names.
  277. * Symbolic names are (almost) exactly matched.
  278. */
  279. static __u64 match_event_symbols(char *str)
  280. {
  281. __u64 config, id;
  282. int type;
  283. unsigned int i;
  284. if (sscanf(str, "r%llx", &config) == 1)
  285. return config | PERF_COUNTER_RAW_MASK;
  286. if (sscanf(str, "%d:%llu", &type, &id) == 2)
  287. return EID(type, id);
  288. for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
  289. if (!strncmp(str, event_symbols[i].symbol,
  290. strlen(event_symbols[i].symbol)))
  291. return event_symbols[i].event;
  292. }
  293. return ~0ULL;
  294. }
  295. static int parse_events(char *str)
  296. {
  297. __u64 config;
  298. again:
  299. if (nr_counters == MAX_COUNTERS)
  300. return -1;
  301. config = match_event_symbols(str);
  302. if (config == ~0ULL)
  303. return -1;
  304. event_id[nr_counters] = config;
  305. nr_counters++;
  306. str = strstr(str, ",");
  307. if (str) {
  308. str++;
  309. goto again;
  310. }
  311. return 0;
  312. }
  313. /*
  314. * Symbols
  315. */
  316. static uint64_t min_ip;
  317. static uint64_t max_ip = -1ll;
  318. struct sym_entry {
  319. unsigned long long addr;
  320. char *sym;
  321. unsigned long count[MAX_COUNTERS];
  322. int skip;
  323. struct source_line *source;
  324. };
  325. #define MAX_SYMS 100000
  326. static int sym_table_count;
  327. struct sym_entry *sym_filter_entry;
  328. static struct sym_entry sym_table[MAX_SYMS];
  329. static void show_details(struct sym_entry *sym);
  330. /*
  331. * Ordering weight: count-1 * count-2 * ... / count-n
  332. */
  333. static double sym_weight(const struct sym_entry *sym)
  334. {
  335. double weight;
  336. int counter;
  337. weight = sym->count[0];
  338. for (counter = 1; counter < nr_counters-1; counter++)
  339. weight *= sym->count[counter];
  340. weight /= (sym->count[counter] + 1);
  341. return weight;
  342. }
  343. static int compare(const void *__sym1, const void *__sym2)
  344. {
  345. const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
  346. return sym_weight(sym1) < sym_weight(sym2);
  347. }
  348. static long events;
  349. static long userspace_events;
  350. static const char CONSOLE_CLEAR[] = "";
  351. static struct sym_entry tmp[MAX_SYMS];
  352. static void print_sym_table(void)
  353. {
  354. int i, printed;
  355. int counter;
  356. float events_per_sec = events/delay_secs;
  357. float kevents_per_sec = (events-userspace_events)/delay_secs;
  358. float sum_kevents = 0.0;
  359. events = userspace_events = 0;
  360. memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
  361. qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
  362. for (i = 0; i < sym_table_count && tmp[i].count[0]; i++)
  363. sum_kevents += tmp[i].count[0];
  364. write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
  365. printf(
  366. "------------------------------------------------------------------------------\n");
  367. printf( " KernelTop:%8.0f irqs/sec kernel:%4.1f%% [%s, ",
  368. events_per_sec,
  369. 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
  370. nmi ? "NMI" : "IRQ");
  371. if (nr_counters == 1)
  372. printf("%d ", event_count[0]);
  373. for (counter = 0; counter < nr_counters; counter++) {
  374. if (counter)
  375. printf("/");
  376. printf("%s", event_name(counter));
  377. }
  378. printf( "], ");
  379. if (tid != -1)
  380. printf(" (tid: %d", tid);
  381. else
  382. printf(" (all");
  383. if (profile_cpu != -1)
  384. printf(", cpu: %d)\n", profile_cpu);
  385. else {
  386. if (tid != -1)
  387. printf(")\n");
  388. else
  389. printf(", %d CPUs)\n", nr_cpus);
  390. }
  391. printf("------------------------------------------------------------------------------\n\n");
  392. if (nr_counters == 1)
  393. printf(" events pcnt");
  394. else
  395. printf(" weight events pcnt");
  396. printf(" RIP kernel function\n"
  397. " ______ ______ _____ ________________ _______________\n\n"
  398. );
  399. for (i = 0, printed = 0; i < sym_table_count; i++) {
  400. float pcnt;
  401. int count;
  402. if (printed <= 18 && tmp[i].count[0] >= count_filter) {
  403. pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents));
  404. if (nr_counters == 1)
  405. printf("%19.2f - %4.1f%% - %016llx : %s\n",
  406. sym_weight(tmp + i),
  407. pcnt, tmp[i].addr, tmp[i].sym);
  408. else
  409. printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n",
  410. sym_weight(tmp + i),
  411. tmp[i].count[0],
  412. pcnt, tmp[i].addr, tmp[i].sym);
  413. printed++;
  414. }
  415. /*
  416. * Add decay to the counts:
  417. */
  418. for (count = 0; count < nr_counters; count++)
  419. sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
  420. }
  421. if (sym_filter_entry)
  422. show_details(sym_filter_entry);
  423. {
  424. struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
  425. if (poll(&stdin_poll, 1, 0) == 1) {
  426. printf("key pressed - exiting.\n");
  427. exit(0);
  428. }
  429. }
  430. }
  431. static void *display_thread(void *arg)
  432. {
  433. printf("KernelTop refresh period: %d seconds\n", delay_secs);
  434. while (!sleep(delay_secs))
  435. print_sym_table();
  436. return NULL;
  437. }
  438. static int read_symbol(FILE *in, struct sym_entry *s)
  439. {
  440. static int filter_match = 0;
  441. char *sym, stype;
  442. char str[500];
  443. int rc, pos;
  444. rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
  445. if (rc == EOF)
  446. return -1;
  447. assert(rc == 3);
  448. /* skip until end of line: */
  449. pos = strlen(str);
  450. do {
  451. rc = fgetc(in);
  452. if (rc == '\n' || rc == EOF || pos >= 499)
  453. break;
  454. str[pos] = rc;
  455. pos++;
  456. } while (1);
  457. str[pos] = 0;
  458. sym = str;
  459. /* Filter out known duplicates and non-text symbols. */
  460. if (!strcmp(sym, "_text"))
  461. return 1;
  462. if (!min_ip && !strcmp(sym, "_stext"))
  463. return 1;
  464. if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
  465. return 1;
  466. if (stype != 'T' && stype != 't')
  467. return 1;
  468. if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
  469. return 1;
  470. if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
  471. return 1;
  472. s->sym = malloc(strlen(str));
  473. assert(s->sym);
  474. strcpy((char *)s->sym, str);
  475. s->skip = 0;
  476. /* Tag events to be skipped. */
  477. if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
  478. s->skip = 1;
  479. else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
  480. s->skip = 1;
  481. else if (!strcmp("mwait_idle", s->sym))
  482. s->skip = 1;
  483. if (filter_match == 1) {
  484. filter_end = s->addr;
  485. filter_match = -1;
  486. if (filter_end - filter_start > 10000) {
  487. printf("hm, too large filter symbol <%s> - skipping.\n",
  488. sym_filter);
  489. printf("symbol filter start: %016lx\n", filter_start);
  490. printf(" end: %016lx\n", filter_end);
  491. filter_end = filter_start = 0;
  492. sym_filter = NULL;
  493. sleep(1);
  494. }
  495. }
  496. if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
  497. filter_match = 1;
  498. filter_start = s->addr;
  499. }
  500. return 0;
  501. }
  502. static int compare_addr(const void *__sym1, const void *__sym2)
  503. {
  504. const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
  505. return sym1->addr > sym2->addr;
  506. }
  507. static void sort_symbol_table(void)
  508. {
  509. int i, dups;
  510. do {
  511. qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
  512. for (i = 0, dups = 0; i < sym_table_count; i++) {
  513. if (sym_table[i].addr == sym_table[i+1].addr) {
  514. sym_table[i+1].addr = -1ll;
  515. dups++;
  516. }
  517. }
  518. sym_table_count -= dups;
  519. } while(dups);
  520. }
  521. static void parse_symbols(void)
  522. {
  523. struct sym_entry *last;
  524. FILE *kallsyms = fopen("/proc/kallsyms", "r");
  525. if (!kallsyms) {
  526. printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
  527. exit(-1);
  528. }
  529. while (!feof(kallsyms)) {
  530. if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
  531. sym_table_count++;
  532. assert(sym_table_count <= MAX_SYMS);
  533. }
  534. }
  535. sort_symbol_table();
  536. min_ip = sym_table[0].addr;
  537. max_ip = sym_table[sym_table_count-1].addr;
  538. last = sym_table + sym_table_count++;
  539. last->addr = -1ll;
  540. last->sym = "<end>";
  541. if (filter_end) {
  542. int count;
  543. for (count=0; count < sym_table_count; count ++) {
  544. if (!strcmp(sym_table[count].sym, sym_filter)) {
  545. sym_filter_entry = &sym_table[count];
  546. break;
  547. }
  548. }
  549. }
  550. if (dump_symtab) {
  551. int i;
  552. for (i = 0; i < sym_table_count; i++)
  553. fprintf(stderr, "%llx %s\n",
  554. sym_table[i].addr, sym_table[i].sym);
  555. }
  556. }
  557. /*
  558. * Source lines
  559. */
  560. static void parse_vmlinux(char *filename)
  561. {
  562. FILE *file;
  563. char command[PATH_MAX*2];
  564. if (!filename)
  565. return;
  566. sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
  567. file = popen(command, "r");
  568. if (!file)
  569. return;
  570. lines_tail = &lines;
  571. while (!feof(file)) {
  572. struct source_line *src;
  573. size_t dummy = 0;
  574. char *c;
  575. src = malloc(sizeof(struct source_line));
  576. assert(src != NULL);
  577. memset(src, 0, sizeof(struct source_line));
  578. if (getline(&src->line, &dummy, file) < 0)
  579. break;
  580. if (!src->line)
  581. break;
  582. c = strchr(src->line, '\n');
  583. if (c)
  584. *c = 0;
  585. src->next = NULL;
  586. *lines_tail = src;
  587. lines_tail = &src->next;
  588. if (strlen(src->line)>8 && src->line[8] == ':')
  589. src->EIP = strtoull(src->line, NULL, 16);
  590. if (strlen(src->line)>8 && src->line[16] == ':')
  591. src->EIP = strtoull(src->line, NULL, 16);
  592. }
  593. pclose(file);
  594. }
  595. static void record_precise_ip(uint64_t ip)
  596. {
  597. struct source_line *line;
  598. for (line = lines; line; line = line->next) {
  599. if (line->EIP == ip)
  600. line->count++;
  601. if (line->EIP > ip)
  602. break;
  603. }
  604. }
  605. static void lookup_sym_in_vmlinux(struct sym_entry *sym)
  606. {
  607. struct source_line *line;
  608. char pattern[PATH_MAX];
  609. sprintf(pattern, "<%s>:", sym->sym);
  610. for (line = lines; line; line = line->next) {
  611. if (strstr(line->line, pattern)) {
  612. sym->source = line;
  613. break;
  614. }
  615. }
  616. }
  617. static void show_lines(struct source_line *line_queue, int line_queue_count)
  618. {
  619. int i;
  620. struct source_line *line;
  621. line = line_queue;
  622. for (i = 0; i < line_queue_count; i++) {
  623. printf("%8li\t%s\n", line->count, line->line);
  624. line = line->next;
  625. }
  626. }
  627. #define TRACE_COUNT 3
  628. static void show_details(struct sym_entry *sym)
  629. {
  630. struct source_line *line;
  631. struct source_line *line_queue = NULL;
  632. int displayed = 0;
  633. int line_queue_count = 0;
  634. if (!sym->source)
  635. lookup_sym_in_vmlinux(sym);
  636. if (!sym->source)
  637. return;
  638. printf("Showing details for %s\n", sym->sym);
  639. line = sym->source;
  640. while (line) {
  641. if (displayed && strstr(line->line, ">:"))
  642. break;
  643. if (!line_queue_count)
  644. line_queue = line;
  645. line_queue_count ++;
  646. if (line->count >= count_filter) {
  647. show_lines(line_queue, line_queue_count);
  648. line_queue_count = 0;
  649. line_queue = NULL;
  650. } else if (line_queue_count > TRACE_COUNT) {
  651. line_queue = line_queue->next;
  652. line_queue_count --;
  653. }
  654. line->count = 0;
  655. displayed++;
  656. if (displayed > 300)
  657. break;
  658. line = line->next;
  659. }
  660. }
  661. /*
  662. * Binary search in the histogram table and record the hit:
  663. */
  664. static void record_ip(uint64_t ip, int counter)
  665. {
  666. int left_idx, middle_idx, right_idx, idx;
  667. unsigned long left, middle, right;
  668. record_precise_ip(ip);
  669. left_idx = 0;
  670. right_idx = sym_table_count-1;
  671. assert(ip <= max_ip && ip >= min_ip);
  672. while (left_idx + 1 < right_idx) {
  673. middle_idx = (left_idx + right_idx) / 2;
  674. left = sym_table[ left_idx].addr;
  675. middle = sym_table[middle_idx].addr;
  676. right = sym_table[ right_idx].addr;
  677. if (!(left <= middle && middle <= right)) {
  678. printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
  679. printf("%d %d %d\n", left_idx, middle_idx, right_idx);
  680. }
  681. assert(left <= middle && middle <= right);
  682. if (!(left <= ip && ip <= right)) {
  683. printf(" left: %016lx\n", left);
  684. printf(" ip: %016lx\n", (unsigned long)ip);
  685. printf("right: %016lx\n", right);
  686. }
  687. assert(left <= ip && ip <= right);
  688. /*
  689. * [ left .... target .... middle .... right ]
  690. * => right := middle
  691. */
  692. if (ip < middle) {
  693. right_idx = middle_idx;
  694. continue;
  695. }
  696. /*
  697. * [ left .... middle ... target ... right ]
  698. * => left := middle
  699. */
  700. left_idx = middle_idx;
  701. }
  702. idx = left_idx;
  703. if (!sym_table[idx].skip)
  704. sym_table[idx].count[counter]++;
  705. else events--;
  706. }
  707. static void process_event(uint64_t ip, int counter)
  708. {
  709. events++;
  710. if (ip < min_ip || ip > max_ip) {
  711. userspace_events++;
  712. return;
  713. }
  714. record_ip(ip, counter);
  715. }
  716. static void process_options(int argc, char **argv)
  717. {
  718. int error = 0, counter;
  719. for (;;) {
  720. int option_index = 0;
  721. /** Options for getopt */
  722. static struct option long_options[] = {
  723. {"count", required_argument, NULL, 'c'},
  724. {"cpu", required_argument, NULL, 'C'},
  725. {"delay", required_argument, NULL, 'd'},
  726. {"dump_symtab", no_argument, NULL, 'D'},
  727. {"event", required_argument, NULL, 'e'},
  728. {"filter", required_argument, NULL, 'f'},
  729. {"group", required_argument, NULL, 'g'},
  730. {"help", no_argument, NULL, 'h'},
  731. {"nmi", required_argument, NULL, 'n'},
  732. {"mmap_info", no_argument, NULL, 'M'},
  733. {"mmap_pages", required_argument, NULL, 'm'},
  734. {"munmap_info", no_argument, NULL, 'U'},
  735. {"pid", required_argument, NULL, 'p'},
  736. {"realtime", required_argument, NULL, 'r'},
  737. {"scale", no_argument, NULL, 'l'},
  738. {"symbol", required_argument, NULL, 's'},
  739. {"stat", no_argument, NULL, 'S'},
  740. {"vmlinux", required_argument, NULL, 'x'},
  741. {"zero", no_argument, NULL, 'z'},
  742. {NULL, 0, NULL, 0 }
  743. };
  744. int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
  745. long_options, &option_index);
  746. if (c == -1)
  747. break;
  748. switch (c) {
  749. case 'a': system_wide = 1; break;
  750. case 'c': default_interval = atoi(optarg); break;
  751. case 'C':
  752. /* CPU and PID are mutually exclusive */
  753. if (tid != -1) {
  754. printf("WARNING: CPU switch overriding PID\n");
  755. sleep(1);
  756. tid = -1;
  757. }
  758. profile_cpu = atoi(optarg); break;
  759. case 'd': delay_secs = atoi(optarg); break;
  760. case 'D': dump_symtab = 1; break;
  761. case 'e': error = parse_events(optarg); break;
  762. case 'f': count_filter = atoi(optarg); break;
  763. case 'g': group = atoi(optarg); break;
  764. case 'h': display_help(); break;
  765. case 'l': scale = 1; break;
  766. case 'n': nmi = atoi(optarg); break;
  767. case 'p':
  768. /* CPU and PID are mutually exclusive */
  769. if (profile_cpu != -1) {
  770. printf("WARNING: PID switch overriding CPU\n");
  771. sleep(1);
  772. profile_cpu = -1;
  773. }
  774. tid = atoi(optarg); break;
  775. case 'r': realtime_prio = atoi(optarg); break;
  776. case 's': sym_filter = strdup(optarg); break;
  777. case 'x': vmlinux = strdup(optarg); break;
  778. case 'z': zero = 1; break;
  779. case 'm': mmap_pages = atoi(optarg); break;
  780. case 'M': use_mmap = 1; break;
  781. case 'U': use_munmap = 1; break;
  782. default: error = 1; break;
  783. }
  784. }
  785. if (error)
  786. display_help();
  787. if (!nr_counters) {
  788. nr_counters = 1;
  789. event_id[0] = 0;
  790. }
  791. for (counter = 0; counter < nr_counters; counter++) {
  792. if (event_count[counter])
  793. continue;
  794. event_count[counter] = default_interval;
  795. }
  796. }
  797. struct mmap_data {
  798. int counter;
  799. void *base;
  800. unsigned int mask;
  801. unsigned int prev;
  802. };
  803. static unsigned int mmap_read_head(struct mmap_data *md)
  804. {
  805. struct perf_counter_mmap_page *pc = md->base;
  806. int head;
  807. head = pc->data_head;
  808. rmb();
  809. return head;
  810. }
  811. struct timeval last_read, this_read;
  812. static void mmap_read(struct mmap_data *md)
  813. {
  814. unsigned int head = mmap_read_head(md);
  815. unsigned int old = md->prev;
  816. unsigned char *data = md->base + page_size;
  817. int diff;
  818. gettimeofday(&this_read, NULL);
  819. /*
  820. * If we're further behind than half the buffer, there's a chance
  821. * the writer will bite our tail and screw up the events under us.
  822. *
  823. * If we somehow ended up ahead of the head, we got messed up.
  824. *
  825. * In either case, truncate and restart at head.
  826. */
  827. diff = head - old;
  828. if (diff > md->mask / 2 || diff < 0) {
  829. struct timeval iv;
  830. unsigned long msecs;
  831. timersub(&this_read, &last_read, &iv);
  832. msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
  833. fprintf(stderr, "WARNING: failed to keep up with mmap data."
  834. " Last read %lu msecs ago.\n", msecs);
  835. /*
  836. * head points to a known good entry, start there.
  837. */
  838. old = head;
  839. }
  840. last_read = this_read;
  841. for (; old != head;) {
  842. struct ip_event {
  843. struct perf_event_header header;
  844. __u64 ip;
  845. __u32 pid, tid;
  846. };
  847. struct mmap_event {
  848. struct perf_event_header header;
  849. __u32 pid, tid;
  850. __u64 start;
  851. __u64 len;
  852. __u64 pgoff;
  853. char filename[PATH_MAX];
  854. };
  855. typedef union event_union {
  856. struct perf_event_header header;
  857. struct ip_event ip;
  858. struct mmap_event mmap;
  859. } event_t;
  860. event_t *event = (event_t *)&data[old & md->mask];
  861. event_t event_copy;
  862. size_t size = event->header.size;
  863. /*
  864. * Event straddles the mmap boundary -- header should always
  865. * be inside due to u64 alignment of output.
  866. */
  867. if ((old & md->mask) + size != ((old + size) & md->mask)) {
  868. unsigned int offset = old;
  869. unsigned int len = min(sizeof(*event), size), cpy;
  870. void *dst = &event_copy;
  871. do {
  872. cpy = min(md->mask + 1 - (offset & md->mask), len);
  873. memcpy(dst, &data[offset & md->mask], cpy);
  874. offset += cpy;
  875. dst += cpy;
  876. len -= cpy;
  877. } while (len);
  878. event = &event_copy;
  879. }
  880. old += size;
  881. if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
  882. if (event->header.type & PERF_RECORD_IP)
  883. process_event(event->ip.ip, md->counter);
  884. } else {
  885. switch (event->header.type) {
  886. case PERF_EVENT_MMAP:
  887. case PERF_EVENT_MUNMAP:
  888. printf("%s: %Lu %Lu %Lu %s\n",
  889. event->header.type == PERF_EVENT_MMAP
  890. ? "mmap" : "munmap",
  891. event->mmap.start,
  892. event->mmap.len,
  893. event->mmap.pgoff,
  894. event->mmap.filename);
  895. break;
  896. }
  897. }
  898. }
  899. md->prev = old;
  900. }
  901. int cmd_top(int argc, char **argv, const char *prefix)
  902. {
  903. struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
  904. struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
  905. struct perf_counter_hw_event hw_event;
  906. pthread_t thread;
  907. int i, counter, group_fd, nr_poll = 0;
  908. unsigned int cpu;
  909. int ret;
  910. page_size = sysconf(_SC_PAGE_SIZE);
  911. process_options(argc, argv);
  912. nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
  913. assert(nr_cpus <= MAX_NR_CPUS);
  914. assert(nr_cpus >= 0);
  915. if (tid != -1 || profile_cpu != -1)
  916. nr_cpus = 1;
  917. parse_symbols();
  918. if (vmlinux && sym_filter_entry)
  919. parse_vmlinux(vmlinux);
  920. for (i = 0; i < nr_cpus; i++) {
  921. group_fd = -1;
  922. for (counter = 0; counter < nr_counters; counter++) {
  923. cpu = profile_cpu;
  924. if (tid == -1 && profile_cpu == -1)
  925. cpu = i;
  926. memset(&hw_event, 0, sizeof(hw_event));
  927. hw_event.config = event_id[counter];
  928. hw_event.irq_period = event_count[counter];
  929. hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID;
  930. hw_event.nmi = nmi;
  931. hw_event.mmap = use_mmap;
  932. hw_event.munmap = use_munmap;
  933. fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
  934. if (fd[i][counter] < 0) {
  935. int err = errno;
  936. printf("kerneltop error: syscall returned with %d (%s)\n",
  937. fd[i][counter], strerror(err));
  938. if (err == EPERM)
  939. printf("Are you root?\n");
  940. exit(-1);
  941. }
  942. assert(fd[i][counter] >= 0);
  943. fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
  944. /*
  945. * First counter acts as the group leader:
  946. */
  947. if (group && group_fd == -1)
  948. group_fd = fd[i][counter];
  949. event_array[nr_poll].fd = fd[i][counter];
  950. event_array[nr_poll].events = POLLIN;
  951. nr_poll++;
  952. mmap_array[i][counter].counter = counter;
  953. mmap_array[i][counter].prev = 0;
  954. mmap_array[i][counter].mask = mmap_pages*page_size - 1;
  955. mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
  956. PROT_READ, MAP_SHARED, fd[i][counter], 0);
  957. if (mmap_array[i][counter].base == MAP_FAILED) {
  958. printf("kerneltop error: failed to mmap with %d (%s)\n",
  959. errno, strerror(errno));
  960. exit(-1);
  961. }
  962. }
  963. }
  964. if (pthread_create(&thread, NULL, display_thread, NULL)) {
  965. printf("Could not create display thread.\n");
  966. exit(-1);
  967. }
  968. if (realtime_prio) {
  969. struct sched_param param;
  970. param.sched_priority = realtime_prio;
  971. if (sched_setscheduler(0, SCHED_FIFO, &param)) {
  972. printf("Could not set realtime priority.\n");
  973. exit(-1);
  974. }
  975. }
  976. while (1) {
  977. int hits = events;
  978. for (i = 0; i < nr_cpus; i++) {
  979. for (counter = 0; counter < nr_counters; counter++)
  980. mmap_read(&mmap_array[i][counter]);
  981. }
  982. if (hits == events)
  983. ret = poll(event_array, nr_poll, 100);
  984. }
  985. return 0;
  986. }