builtin-kmem.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. #include "builtin.h"
  2. #include "perf.h"
  3. #include "util/util.h"
  4. #include "util/cache.h"
  5. #include "util/symbol.h"
  6. #include "util/thread.h"
  7. #include "util/header.h"
  8. #include "util/parse-options.h"
  9. #include "util/trace-event.h"
  10. #include "util/debug.h"
  11. #include "util/data_map.h"
  12. #include <linux/rbtree.h>
  13. struct alloc_stat;
  14. typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
  15. static char const *input_name = "perf.data";
  16. static struct perf_header *header;
  17. static u64 sample_type;
  18. static int alloc_flag;
  19. static int caller_flag;
  20. sort_fn_t alloc_sort_fn;
  21. sort_fn_t caller_sort_fn;
  22. static int alloc_lines = -1;
  23. static int caller_lines = -1;
  24. static char *cwd;
  25. static int cwdlen;
  26. struct alloc_stat {
  27. union {
  28. struct {
  29. char *name;
  30. u64 call_site;
  31. };
  32. u64 ptr;
  33. };
  34. u64 bytes_req;
  35. u64 bytes_alloc;
  36. u32 hit;
  37. struct rb_node node;
  38. };
  39. static struct rb_root root_alloc_stat;
  40. static struct rb_root root_alloc_sorted;
  41. static struct rb_root root_caller_stat;
  42. static struct rb_root root_caller_sorted;
  43. static unsigned long total_requested, total_allocated;
  44. struct raw_event_sample {
  45. u32 size;
  46. char data[0];
  47. };
  48. static int
  49. process_comm_event(event_t *event, unsigned long offset, unsigned long head)
  50. {
  51. struct thread *thread = threads__findnew(event->comm.pid);
  52. dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
  53. (void *)(offset + head),
  54. (void *)(long)(event->header.size),
  55. event->comm.comm, event->comm.pid);
  56. if (thread == NULL ||
  57. thread__set_comm(thread, event->comm.comm)) {
  58. dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
  59. return -1;
  60. }
  61. return 0;
  62. }
  63. static void insert_alloc_stat(unsigned long ptr,
  64. int bytes_req, int bytes_alloc)
  65. {
  66. struct rb_node **node = &root_alloc_stat.rb_node;
  67. struct rb_node *parent = NULL;
  68. struct alloc_stat *data = NULL;
  69. if (!alloc_flag)
  70. return;
  71. while (*node) {
  72. parent = *node;
  73. data = rb_entry(*node, struct alloc_stat, node);
  74. if (ptr > data->ptr)
  75. node = &(*node)->rb_right;
  76. else if (ptr < data->ptr)
  77. node = &(*node)->rb_left;
  78. else
  79. break;
  80. }
  81. if (data && data->ptr == ptr) {
  82. data->hit++;
  83. data->bytes_req += bytes_req;
  84. data->bytes_alloc += bytes_req;
  85. } else {
  86. data = malloc(sizeof(*data));
  87. data->ptr = ptr;
  88. data->hit = 1;
  89. data->bytes_req = bytes_req;
  90. data->bytes_alloc = bytes_alloc;
  91. rb_link_node(&data->node, parent, node);
  92. rb_insert_color(&data->node, &root_alloc_stat);
  93. }
  94. }
  95. static void insert_caller_stat(unsigned long call_site,
  96. int bytes_req, int bytes_alloc)
  97. {
  98. struct rb_node **node = &root_caller_stat.rb_node;
  99. struct rb_node *parent = NULL;
  100. struct alloc_stat *data = NULL;
  101. if (!caller_flag)
  102. return;
  103. while (*node) {
  104. parent = *node;
  105. data = rb_entry(*node, struct alloc_stat, node);
  106. if (call_site > data->call_site)
  107. node = &(*node)->rb_right;
  108. else if (call_site < data->call_site)
  109. node = &(*node)->rb_left;
  110. else
  111. break;
  112. }
  113. if (data && data->call_site == call_site) {
  114. data->hit++;
  115. data->bytes_req += bytes_req;
  116. data->bytes_alloc += bytes_req;
  117. } else {
  118. data = malloc(sizeof(*data));
  119. data->call_site = call_site;
  120. data->hit = 1;
  121. data->bytes_req = bytes_req;
  122. data->bytes_alloc = bytes_alloc;
  123. rb_link_node(&data->node, parent, node);
  124. rb_insert_color(&data->node, &root_caller_stat);
  125. }
  126. }
  127. static void process_alloc_event(struct raw_event_sample *raw,
  128. struct event *event,
  129. int cpu __used,
  130. u64 timestamp __used,
  131. struct thread *thread __used,
  132. int node __used)
  133. {
  134. unsigned long call_site;
  135. unsigned long ptr;
  136. int bytes_req;
  137. int bytes_alloc;
  138. ptr = raw_field_value(event, "ptr", raw->data);
  139. call_site = raw_field_value(event, "call_site", raw->data);
  140. bytes_req = raw_field_value(event, "bytes_req", raw->data);
  141. bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data);
  142. insert_alloc_stat(ptr, bytes_req, bytes_alloc);
  143. insert_caller_stat(call_site, bytes_req, bytes_alloc);
  144. total_requested += bytes_req;
  145. total_allocated += bytes_alloc;
  146. }
  147. static void process_free_event(struct raw_event_sample *raw __used,
  148. struct event *event __used,
  149. int cpu __used,
  150. u64 timestamp __used,
  151. struct thread *thread __used)
  152. {
  153. }
  154. static void
  155. process_raw_event(event_t *raw_event __used, void *more_data,
  156. int cpu, u64 timestamp, struct thread *thread)
  157. {
  158. struct raw_event_sample *raw = more_data;
  159. struct event *event;
  160. int type;
  161. type = trace_parse_common_type(raw->data);
  162. event = trace_find_event(type);
  163. if (!strcmp(event->name, "kmalloc") ||
  164. !strcmp(event->name, "kmem_cache_alloc")) {
  165. process_alloc_event(raw, event, cpu, timestamp, thread, 0);
  166. return;
  167. }
  168. if (!strcmp(event->name, "kmalloc_node") ||
  169. !strcmp(event->name, "kmem_cache_alloc_node")) {
  170. process_alloc_event(raw, event, cpu, timestamp, thread, 1);
  171. return;
  172. }
  173. if (!strcmp(event->name, "kfree") ||
  174. !strcmp(event->name, "kmem_cache_free")) {
  175. process_free_event(raw, event, cpu, timestamp, thread);
  176. return;
  177. }
  178. }
  179. static int
  180. process_sample_event(event_t *event, unsigned long offset, unsigned long head)
  181. {
  182. u64 ip = event->ip.ip;
  183. u64 timestamp = -1;
  184. u32 cpu = -1;
  185. u64 period = 1;
  186. void *more_data = event->ip.__more_data;
  187. struct thread *thread = threads__findnew(event->ip.pid);
  188. if (sample_type & PERF_SAMPLE_TIME) {
  189. timestamp = *(u64 *)more_data;
  190. more_data += sizeof(u64);
  191. }
  192. if (sample_type & PERF_SAMPLE_CPU) {
  193. cpu = *(u32 *)more_data;
  194. more_data += sizeof(u32);
  195. more_data += sizeof(u32); /* reserved */
  196. }
  197. if (sample_type & PERF_SAMPLE_PERIOD) {
  198. period = *(u64 *)more_data;
  199. more_data += sizeof(u64);
  200. }
  201. dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
  202. (void *)(offset + head),
  203. (void *)(long)(event->header.size),
  204. event->header.misc,
  205. event->ip.pid, event->ip.tid,
  206. (void *)(long)ip,
  207. (long long)period);
  208. if (thread == NULL) {
  209. pr_debug("problem processing %d event, skipping it.\n",
  210. event->header.type);
  211. return -1;
  212. }
  213. dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
  214. process_raw_event(event, more_data, cpu, timestamp, thread);
  215. return 0;
  216. }
  217. static int sample_type_check(u64 type)
  218. {
  219. sample_type = type;
  220. if (!(sample_type & PERF_SAMPLE_RAW)) {
  221. fprintf(stderr,
  222. "No trace sample to read. Did you call perf record "
  223. "without -R?");
  224. return -1;
  225. }
  226. return 0;
  227. }
  228. static struct perf_file_handler file_handler = {
  229. .process_sample_event = process_sample_event,
  230. .process_comm_event = process_comm_event,
  231. .sample_type_check = sample_type_check,
  232. };
  233. static int read_events(void)
  234. {
  235. register_idle_thread();
  236. register_perf_file_handler(&file_handler);
  237. return mmap_dispatch_perf_file(&header, input_name, 0, 0,
  238. &cwdlen, &cwd);
  239. }
  240. static double fragmentation(unsigned long n_req, unsigned long n_alloc)
  241. {
  242. if (n_alloc == 0)
  243. return 0.0;
  244. else
  245. return 100.0 - (100.0 * n_req / n_alloc);
  246. }
  247. static void __print_result(struct rb_root *root, int n_lines, int is_caller)
  248. {
  249. struct rb_node *next;
  250. printf("\n ------------------------------------------------------------------------------\n");
  251. if (is_caller)
  252. printf(" Callsite |");
  253. else
  254. printf(" Alloc Ptr |");
  255. printf(" Total_alloc/Per | Total_req/Per | Hit | Fragmentation\n");
  256. printf(" ------------------------------------------------------------------------------\n");
  257. next = rb_first(root);
  258. while (next && n_lines--) {
  259. struct alloc_stat *data;
  260. data = rb_entry(next, struct alloc_stat, node);
  261. printf(" %-16p | %8llu/%-6lu | %8llu/%-6lu | %6lu | %8.3f%%\n",
  262. is_caller ? (void *)(unsigned long)data->call_site :
  263. (void *)(unsigned long)data->ptr,
  264. (unsigned long long)data->bytes_alloc,
  265. (unsigned long)data->bytes_alloc / data->hit,
  266. (unsigned long long)data->bytes_req,
  267. (unsigned long)data->bytes_req / data->hit,
  268. (unsigned long)data->hit,
  269. fragmentation(data->bytes_req, data->bytes_alloc));
  270. next = rb_next(next);
  271. }
  272. if (n_lines == -1)
  273. printf(" ... | ... | ... | ... | ... \n");
  274. printf(" ------------------------------------------------------------------------------\n");
  275. }
  276. static void print_summary(void)
  277. {
  278. printf("\nSUMMARY\n=======\n");
  279. printf("Total bytes requested: %lu\n", total_requested);
  280. printf("Total bytes allocated: %lu\n", total_allocated);
  281. printf("Total bytes wasted on internal fragmentation: %lu\n",
  282. total_allocated - total_requested);
  283. printf("Internal fragmentation: %f%%\n",
  284. fragmentation(total_requested, total_allocated));
  285. }
  286. static void print_result(void)
  287. {
  288. if (caller_flag)
  289. __print_result(&root_caller_sorted, caller_lines, 1);
  290. if (alloc_flag)
  291. __print_result(&root_alloc_sorted, alloc_lines, 0);
  292. print_summary();
  293. }
  294. static void sort_insert(struct rb_root *root, struct alloc_stat *data,
  295. sort_fn_t sort_fn)
  296. {
  297. struct rb_node **new = &(root->rb_node);
  298. struct rb_node *parent = NULL;
  299. while (*new) {
  300. struct alloc_stat *this;
  301. int cmp;
  302. this = rb_entry(*new, struct alloc_stat, node);
  303. parent = *new;
  304. cmp = sort_fn(data, this);
  305. if (cmp > 0)
  306. new = &((*new)->rb_left);
  307. else
  308. new = &((*new)->rb_right);
  309. }
  310. rb_link_node(&data->node, parent, new);
  311. rb_insert_color(&data->node, root);
  312. }
  313. static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
  314. sort_fn_t sort_fn)
  315. {
  316. struct rb_node *node;
  317. struct alloc_stat *data;
  318. for (;;) {
  319. node = rb_first(root);
  320. if (!node)
  321. break;
  322. rb_erase(node, root);
  323. data = rb_entry(node, struct alloc_stat, node);
  324. sort_insert(root_sorted, data, sort_fn);
  325. }
  326. }
  327. static void sort_result(void)
  328. {
  329. __sort_result(&root_alloc_stat, &root_alloc_sorted, alloc_sort_fn);
  330. __sort_result(&root_caller_stat, &root_caller_sorted, caller_sort_fn);
  331. }
  332. static int __cmd_kmem(void)
  333. {
  334. setup_pager();
  335. read_events();
  336. sort_result();
  337. print_result();
  338. return 0;
  339. }
  340. static const char * const kmem_usage[] = {
  341. "perf kmem [<options>] {record}",
  342. NULL
  343. };
  344. static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
  345. {
  346. if (l->ptr < r->ptr)
  347. return -1;
  348. else if (l->ptr > r->ptr)
  349. return 1;
  350. return 0;
  351. }
  352. static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
  353. {
  354. if (l->call_site < r->call_site)
  355. return -1;
  356. else if (l->call_site > r->call_site)
  357. return 1;
  358. return 0;
  359. }
  360. static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
  361. {
  362. if (l->bytes_alloc < r->bytes_alloc)
  363. return -1;
  364. else if (l->bytes_alloc > r->bytes_alloc)
  365. return 1;
  366. return 0;
  367. }
  368. static int parse_sort_opt(const struct option *opt __used,
  369. const char *arg, int unset __used)
  370. {
  371. sort_fn_t sort_fn;
  372. if (!arg)
  373. return -1;
  374. if (strcmp(arg, "ptr") == 0)
  375. sort_fn = ptr_cmp;
  376. else if (strcmp(arg, "call_site") == 0)
  377. sort_fn = callsite_cmp;
  378. else if (strcmp(arg, "bytes") == 0)
  379. sort_fn = bytes_cmp;
  380. else
  381. return -1;
  382. if (caller_flag > alloc_flag)
  383. caller_sort_fn = sort_fn;
  384. else
  385. alloc_sort_fn = sort_fn;
  386. return 0;
  387. }
  388. static int parse_stat_opt(const struct option *opt __used,
  389. const char *arg, int unset __used)
  390. {
  391. if (!arg)
  392. return -1;
  393. if (strcmp(arg, "alloc") == 0)
  394. alloc_flag = (caller_flag + 1);
  395. else if (strcmp(arg, "caller") == 0)
  396. caller_flag = (alloc_flag + 1);
  397. else
  398. return -1;
  399. return 0;
  400. }
  401. static int parse_line_opt(const struct option *opt __used,
  402. const char *arg, int unset __used)
  403. {
  404. int lines;
  405. if (!arg)
  406. return -1;
  407. lines = strtoul(arg, NULL, 10);
  408. if (caller_flag > alloc_flag)
  409. caller_lines = lines;
  410. else
  411. alloc_lines = lines;
  412. return 0;
  413. }
  414. static const struct option kmem_options[] = {
  415. OPT_STRING('i', "input", &input_name, "file",
  416. "input file name"),
  417. OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>",
  418. "stat selector, Pass 'alloc' or 'caller'.",
  419. parse_stat_opt),
  420. OPT_CALLBACK('s', "sort", NULL, "key",
  421. "sort by key: ptr, call_site, hit, bytes",
  422. parse_sort_opt),
  423. OPT_CALLBACK('l', "line", NULL, "num",
  424. "show n lins",
  425. parse_line_opt),
  426. OPT_END()
  427. };
  428. static const char *record_args[] = {
  429. "record",
  430. "-a",
  431. "-R",
  432. "-M",
  433. "-f",
  434. "-c", "1",
  435. "-e", "kmem:kmalloc",
  436. "-e", "kmem:kmalloc_node",
  437. "-e", "kmem:kfree",
  438. "-e", "kmem:kmem_cache_alloc",
  439. "-e", "kmem:kmem_cache_alloc_node",
  440. "-e", "kmem:kmem_cache_free",
  441. };
  442. static int __cmd_record(int argc, const char **argv)
  443. {
  444. unsigned int rec_argc, i, j;
  445. const char **rec_argv;
  446. rec_argc = ARRAY_SIZE(record_args) + argc - 1;
  447. rec_argv = calloc(rec_argc + 1, sizeof(char *));
  448. for (i = 0; i < ARRAY_SIZE(record_args); i++)
  449. rec_argv[i] = strdup(record_args[i]);
  450. for (j = 1; j < (unsigned int)argc; j++, i++)
  451. rec_argv[i] = argv[j];
  452. return cmd_record(i, rec_argv, NULL);
  453. }
  454. int cmd_kmem(int argc, const char **argv, const char *prefix __used)
  455. {
  456. symbol__init(0);
  457. argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
  458. if (argc && !strncmp(argv[0], "rec", 3))
  459. return __cmd_record(argc, argv);
  460. else if (argc)
  461. usage_with_options(kmem_usage, kmem_options);
  462. if (!alloc_sort_fn)
  463. alloc_sort_fn = bytes_cmp;
  464. if (!caller_sort_fn)
  465. caller_sort_fn = bytes_cmp;
  466. return __cmd_kmem();
  467. }