builtin-sched.c 22 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063
  1. #include "builtin.h"
  2. #include "util/util.h"
  3. #include "util/cache.h"
  4. #include "util/symbol.h"
  5. #include "util/thread.h"
  6. #include "util/header.h"
  7. #include "util/parse-options.h"
  8. #include "perf.h"
  9. #include "util/debug.h"
  10. #include "util/trace-event.h"
  11. #include <sys/types.h>
  12. static char const *input_name = "perf.data";
  13. static int input;
  14. static unsigned long page_size;
  15. static unsigned long mmap_window = 32;
  16. static unsigned long total_comm = 0;
  17. static struct rb_root threads;
  18. static struct thread *last_match;
  19. static struct perf_header *header;
  20. static u64 sample_type;
  21. /*
  22. * Scheduler benchmarks
  23. */
  24. #include <sys/resource.h>
  25. #include <sys/types.h>
  26. #include <sys/stat.h>
  27. #include <sys/time.h>
  28. #include <sys/prctl.h>
  29. #include <linux/unistd.h>
  30. #include <semaphore.h>
  31. #include <pthread.h>
  32. #include <signal.h>
  33. #include <values.h>
  34. #include <string.h>
  35. #include <unistd.h>
  36. #include <stdlib.h>
  37. #include <assert.h>
  38. #include <fcntl.h>
  39. #include <time.h>
  40. #include <math.h>
  41. #include <stdio.h>
  42. #define PR_SET_NAME 15 /* Set process name */
  43. #define BUG_ON(x) assert(!(x))
  44. #define DEBUG 0
  45. typedef unsigned long long nsec_t;
  46. static nsec_t run_measurement_overhead;
  47. static nsec_t sleep_measurement_overhead;
  48. static nsec_t get_nsecs(void)
  49. {
  50. struct timespec ts;
  51. clock_gettime(CLOCK_MONOTONIC, &ts);
  52. return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
  53. }
  54. static void burn_nsecs(nsec_t nsecs)
  55. {
  56. nsec_t T0 = get_nsecs(), T1;
  57. do {
  58. T1 = get_nsecs();
  59. } while (T1 + run_measurement_overhead < T0 + nsecs);
  60. }
  61. static void sleep_nsecs(nsec_t nsecs)
  62. {
  63. struct timespec ts;
  64. ts.tv_nsec = nsecs % 999999999;
  65. ts.tv_sec = nsecs / 999999999;
  66. nanosleep(&ts, NULL);
  67. }
  68. static void calibrate_run_measurement_overhead(void)
  69. {
  70. nsec_t T0, T1, delta, min_delta = 1000000000ULL;
  71. int i;
  72. for (i = 0; i < 10; i++) {
  73. T0 = get_nsecs();
  74. burn_nsecs(0);
  75. T1 = get_nsecs();
  76. delta = T1-T0;
  77. min_delta = min(min_delta, delta);
  78. }
  79. run_measurement_overhead = min_delta;
  80. printf("run measurement overhead: %Ld nsecs\n", min_delta);
  81. }
  82. static void calibrate_sleep_measurement_overhead(void)
  83. {
  84. nsec_t T0, T1, delta, min_delta = 1000000000ULL;
  85. int i;
  86. for (i = 0; i < 10; i++) {
  87. T0 = get_nsecs();
  88. sleep_nsecs(10000);
  89. T1 = get_nsecs();
  90. delta = T1-T0;
  91. min_delta = min(min_delta, delta);
  92. }
  93. min_delta -= 10000;
  94. sleep_measurement_overhead = min_delta;
  95. printf("sleep measurement overhead: %Ld nsecs\n", min_delta);
  96. }
  97. #define COMM_LEN 20
  98. #define SYM_LEN 129
  99. #define MAX_PID 65536
  100. static unsigned long nr_tasks;
  101. struct sched_event;
  102. struct task_desc {
  103. unsigned long nr;
  104. unsigned long pid;
  105. char comm[COMM_LEN];
  106. unsigned long nr_events;
  107. unsigned long curr_event;
  108. struct sched_event **events;
  109. pthread_t thread;
  110. sem_t sleep_sem;
  111. sem_t ready_for_work;
  112. sem_t work_done_sem;
  113. nsec_t cpu_usage;
  114. };
  115. enum sched_event_type {
  116. SCHED_EVENT_RUN,
  117. SCHED_EVENT_SLEEP,
  118. SCHED_EVENT_WAKEUP,
  119. };
  120. struct sched_event {
  121. enum sched_event_type type;
  122. nsec_t timestamp;
  123. nsec_t duration;
  124. unsigned long nr;
  125. int specific_wait;
  126. sem_t *wait_sem;
  127. struct task_desc *wakee;
  128. };
  129. static struct task_desc *pid_to_task[MAX_PID];
  130. static struct task_desc **tasks;
  131. static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER;
  132. static nsec_t start_time;
  133. static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER;
  134. static unsigned long nr_run_events;
  135. static unsigned long nr_sleep_events;
  136. static unsigned long nr_wakeup_events;
  137. static unsigned long nr_sleep_corrections;
  138. static unsigned long nr_run_events_optimized;
  139. static struct sched_event *
  140. get_new_event(struct task_desc *task, nsec_t timestamp)
  141. {
  142. struct sched_event *event = calloc(1, sizeof(*event));
  143. unsigned long idx = task->nr_events;
  144. size_t size;
  145. event->timestamp = timestamp;
  146. event->nr = idx;
  147. task->nr_events++;
  148. size = sizeof(struct sched_event *) * task->nr_events;
  149. task->events = realloc(task->events, size);
  150. BUG_ON(!task->events);
  151. task->events[idx] = event;
  152. return event;
  153. }
  154. static struct sched_event *last_event(struct task_desc *task)
  155. {
  156. if (!task->nr_events)
  157. return NULL;
  158. return task->events[task->nr_events - 1];
  159. }
  160. static void
  161. add_sched_event_run(struct task_desc *task, nsec_t timestamp, u64 duration)
  162. {
  163. struct sched_event *event, *curr_event = last_event(task);
  164. /*
  165. * optimize an existing RUN event by merging this one
  166. * to it:
  167. */
  168. if (curr_event && curr_event->type == SCHED_EVENT_RUN) {
  169. nr_run_events_optimized++;
  170. curr_event->duration += duration;
  171. return;
  172. }
  173. event = get_new_event(task, timestamp);
  174. event->type = SCHED_EVENT_RUN;
  175. event->duration = duration;
  176. nr_run_events++;
  177. }
  178. static unsigned long targetless_wakeups;
  179. static unsigned long multitarget_wakeups;
  180. static void
  181. add_sched_event_wakeup(struct task_desc *task, nsec_t timestamp,
  182. struct task_desc *wakee)
  183. {
  184. struct sched_event *event, *wakee_event;
  185. event = get_new_event(task, timestamp);
  186. event->type = SCHED_EVENT_WAKEUP;
  187. event->wakee = wakee;
  188. wakee_event = last_event(wakee);
  189. if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) {
  190. targetless_wakeups++;
  191. return;
  192. }
  193. if (wakee_event->wait_sem) {
  194. multitarget_wakeups++;
  195. return;
  196. }
  197. wakee_event->wait_sem = calloc(1, sizeof(*wakee_event->wait_sem));
  198. sem_init(wakee_event->wait_sem, 0, 0);
  199. wakee_event->specific_wait = 1;
  200. event->wait_sem = wakee_event->wait_sem;
  201. nr_wakeup_events++;
  202. }
  203. static void
  204. add_sched_event_sleep(struct task_desc *task, nsec_t timestamp,
  205. u64 task_state __used)
  206. {
  207. struct sched_event *event = get_new_event(task, timestamp);
  208. event->type = SCHED_EVENT_SLEEP;
  209. nr_sleep_events++;
  210. }
  211. static struct task_desc *register_pid(unsigned long pid, const char *comm)
  212. {
  213. struct task_desc *task;
  214. BUG_ON(pid >= MAX_PID);
  215. task = pid_to_task[pid];
  216. if (task)
  217. return task;
  218. task = calloc(1, sizeof(*task));
  219. task->pid = pid;
  220. task->nr = nr_tasks;
  221. strcpy(task->comm, comm);
  222. /*
  223. * every task starts in sleeping state - this gets ignored
  224. * if there's no wakeup pointing to this sleep state:
  225. */
  226. add_sched_event_sleep(task, 0, 0);
  227. pid_to_task[pid] = task;
  228. nr_tasks++;
  229. tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *));
  230. BUG_ON(!tasks);
  231. tasks[task->nr] = task;
  232. if (verbose)
  233. printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm);
  234. return task;
  235. }
  236. static void print_task_traces(void)
  237. {
  238. struct task_desc *task;
  239. unsigned long i;
  240. for (i = 0; i < nr_tasks; i++) {
  241. task = tasks[i];
  242. printf("task %6ld (%20s:%10ld), nr_events: %ld\n",
  243. task->nr, task->comm, task->pid, task->nr_events);
  244. }
  245. }
  246. static void add_cross_task_wakeups(void)
  247. {
  248. struct task_desc *task1, *task2;
  249. unsigned long i, j;
  250. for (i = 0; i < nr_tasks; i++) {
  251. task1 = tasks[i];
  252. j = i + 1;
  253. if (j == nr_tasks)
  254. j = 0;
  255. task2 = tasks[j];
  256. add_sched_event_wakeup(task1, 0, task2);
  257. }
  258. }
  259. static void
  260. process_sched_event(struct task_desc *this_task __used, struct sched_event *event)
  261. {
  262. int ret = 0;
  263. nsec_t now;
  264. long long delta;
  265. now = get_nsecs();
  266. delta = start_time + event->timestamp - now;
  267. switch (event->type) {
  268. case SCHED_EVENT_RUN:
  269. burn_nsecs(event->duration);
  270. break;
  271. case SCHED_EVENT_SLEEP:
  272. if (event->wait_sem)
  273. ret = sem_wait(event->wait_sem);
  274. BUG_ON(ret);
  275. break;
  276. case SCHED_EVENT_WAKEUP:
  277. if (event->wait_sem)
  278. ret = sem_post(event->wait_sem);
  279. BUG_ON(ret);
  280. break;
  281. default:
  282. BUG_ON(1);
  283. }
  284. }
  285. static nsec_t get_cpu_usage_nsec_parent(void)
  286. {
  287. struct rusage ru;
  288. nsec_t sum;
  289. int err;
  290. err = getrusage(RUSAGE_SELF, &ru);
  291. BUG_ON(err);
  292. sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3;
  293. sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3;
  294. return sum;
  295. }
  296. static nsec_t get_cpu_usage_nsec_self(void)
  297. {
  298. char filename [] = "/proc/1234567890/sched";
  299. unsigned long msecs, nsecs;
  300. char *line = NULL;
  301. nsec_t total = 0;
  302. size_t len = 0;
  303. ssize_t chars;
  304. FILE *file;
  305. int ret;
  306. sprintf(filename, "/proc/%d/sched", getpid());
  307. file = fopen(filename, "r");
  308. BUG_ON(!file);
  309. while ((chars = getline(&line, &len, file)) != -1) {
  310. ret = sscanf(line, "se.sum_exec_runtime : %ld.%06ld\n",
  311. &msecs, &nsecs);
  312. if (ret == 2) {
  313. total = msecs*1e6 + nsecs;
  314. break;
  315. }
  316. }
  317. if (line)
  318. free(line);
  319. fclose(file);
  320. return total;
  321. }
  322. static void *thread_func(void *ctx)
  323. {
  324. struct task_desc *this_task = ctx;
  325. nsec_t cpu_usage_0, cpu_usage_1;
  326. unsigned long i, ret;
  327. char comm2[22];
  328. sprintf(comm2, ":%s", this_task->comm);
  329. prctl(PR_SET_NAME, comm2);
  330. again:
  331. ret = sem_post(&this_task->ready_for_work);
  332. BUG_ON(ret);
  333. ret = pthread_mutex_lock(&start_work_mutex);
  334. BUG_ON(ret);
  335. ret = pthread_mutex_unlock(&start_work_mutex);
  336. BUG_ON(ret);
  337. cpu_usage_0 = get_cpu_usage_nsec_self();
  338. for (i = 0; i < this_task->nr_events; i++) {
  339. this_task->curr_event = i;
  340. process_sched_event(this_task, this_task->events[i]);
  341. }
  342. cpu_usage_1 = get_cpu_usage_nsec_self();
  343. this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
  344. ret = sem_post(&this_task->work_done_sem);
  345. BUG_ON(ret);
  346. ret = pthread_mutex_lock(&work_done_wait_mutex);
  347. BUG_ON(ret);
  348. ret = pthread_mutex_unlock(&work_done_wait_mutex);
  349. BUG_ON(ret);
  350. goto again;
  351. }
  352. static void create_tasks(void)
  353. {
  354. struct task_desc *task;
  355. pthread_attr_t attr;
  356. unsigned long i;
  357. int err;
  358. err = pthread_attr_init(&attr);
  359. BUG_ON(err);
  360. err = pthread_attr_setstacksize(&attr, (size_t)(16*1024));
  361. BUG_ON(err);
  362. err = pthread_mutex_lock(&start_work_mutex);
  363. BUG_ON(err);
  364. err = pthread_mutex_lock(&work_done_wait_mutex);
  365. BUG_ON(err);
  366. for (i = 0; i < nr_tasks; i++) {
  367. task = tasks[i];
  368. sem_init(&task->sleep_sem, 0, 0);
  369. sem_init(&task->ready_for_work, 0, 0);
  370. sem_init(&task->work_done_sem, 0, 0);
  371. task->curr_event = 0;
  372. err = pthread_create(&task->thread, &attr, thread_func, task);
  373. BUG_ON(err);
  374. }
  375. }
  376. static nsec_t cpu_usage;
  377. static nsec_t runavg_cpu_usage;
  378. static nsec_t parent_cpu_usage;
  379. static nsec_t runavg_parent_cpu_usage;
  380. static void wait_for_tasks(void)
  381. {
  382. nsec_t cpu_usage_0, cpu_usage_1;
  383. struct task_desc *task;
  384. unsigned long i, ret;
  385. start_time = get_nsecs();
  386. cpu_usage = 0;
  387. pthread_mutex_unlock(&work_done_wait_mutex);
  388. for (i = 0; i < nr_tasks; i++) {
  389. task = tasks[i];
  390. ret = sem_wait(&task->ready_for_work);
  391. BUG_ON(ret);
  392. sem_init(&task->ready_for_work, 0, 0);
  393. }
  394. ret = pthread_mutex_lock(&work_done_wait_mutex);
  395. BUG_ON(ret);
  396. cpu_usage_0 = get_cpu_usage_nsec_parent();
  397. pthread_mutex_unlock(&start_work_mutex);
  398. for (i = 0; i < nr_tasks; i++) {
  399. task = tasks[i];
  400. ret = sem_wait(&task->work_done_sem);
  401. BUG_ON(ret);
  402. sem_init(&task->work_done_sem, 0, 0);
  403. cpu_usage += task->cpu_usage;
  404. task->cpu_usage = 0;
  405. }
  406. cpu_usage_1 = get_cpu_usage_nsec_parent();
  407. if (!runavg_cpu_usage)
  408. runavg_cpu_usage = cpu_usage;
  409. runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10;
  410. parent_cpu_usage = cpu_usage_1 - cpu_usage_0;
  411. if (!runavg_parent_cpu_usage)
  412. runavg_parent_cpu_usage = parent_cpu_usage;
  413. runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 +
  414. parent_cpu_usage)/10;
  415. ret = pthread_mutex_lock(&start_work_mutex);
  416. BUG_ON(ret);
  417. for (i = 0; i < nr_tasks; i++) {
  418. task = tasks[i];
  419. sem_init(&task->sleep_sem, 0, 0);
  420. task->curr_event = 0;
  421. }
  422. }
  423. static int __cmd_sched(void);
  424. static void parse_trace(void)
  425. {
  426. __cmd_sched();
  427. printf("nr_run_events: %ld\n", nr_run_events);
  428. printf("nr_sleep_events: %ld\n", nr_sleep_events);
  429. printf("nr_wakeup_events: %ld\n", nr_wakeup_events);
  430. if (targetless_wakeups)
  431. printf("target-less wakeups: %ld\n", targetless_wakeups);
  432. if (multitarget_wakeups)
  433. printf("multi-target wakeups: %ld\n", multitarget_wakeups);
  434. if (nr_run_events_optimized)
  435. printf("run events optimized: %ld\n",
  436. nr_run_events_optimized);
  437. }
  438. static unsigned long nr_runs;
  439. static nsec_t sum_runtime;
  440. static nsec_t sum_fluct;
  441. static nsec_t run_avg;
  442. static void run_one_test(void)
  443. {
  444. nsec_t T0, T1, delta, avg_delta, fluct, std_dev;
  445. T0 = get_nsecs();
  446. wait_for_tasks();
  447. T1 = get_nsecs();
  448. delta = T1 - T0;
  449. sum_runtime += delta;
  450. nr_runs++;
  451. avg_delta = sum_runtime / nr_runs;
  452. if (delta < avg_delta)
  453. fluct = avg_delta - delta;
  454. else
  455. fluct = delta - avg_delta;
  456. sum_fluct += fluct;
  457. std_dev = sum_fluct / nr_runs / sqrt(nr_runs);
  458. if (!run_avg)
  459. run_avg = delta;
  460. run_avg = (run_avg*9 + delta)/10;
  461. printf("#%-3ld: %0.3f, ",
  462. nr_runs, (double)delta/1000000.0);
  463. #if 0
  464. printf("%0.2f +- %0.2f, ",
  465. (double)avg_delta/1e6, (double)std_dev/1e6);
  466. #endif
  467. printf("ravg: %0.2f, ",
  468. (double)run_avg/1e6);
  469. printf("cpu: %0.2f / %0.2f",
  470. (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6);
  471. #if 0
  472. /*
  473. * rusage statistics done by the parent, these are less
  474. * accurate than the sum_exec_runtime based statistics:
  475. */
  476. printf(" [%0.2f / %0.2f]",
  477. (double)parent_cpu_usage/1e6,
  478. (double)runavg_parent_cpu_usage/1e6);
  479. #endif
  480. printf("\n");
  481. if (nr_sleep_corrections)
  482. printf(" (%ld sleep corrections)\n", nr_sleep_corrections);
  483. nr_sleep_corrections = 0;
  484. }
  485. static void test_calibrations(void)
  486. {
  487. nsec_t T0, T1;
  488. T0 = get_nsecs();
  489. burn_nsecs(1e6);
  490. T1 = get_nsecs();
  491. printf("the run test took %Ld nsecs\n", T1-T0);
  492. T0 = get_nsecs();
  493. sleep_nsecs(1e6);
  494. T1 = get_nsecs();
  495. printf("the sleep test took %Ld nsecs\n", T1-T0);
  496. }
  497. static int
  498. process_comm_event(event_t *event, unsigned long offset, unsigned long head)
  499. {
  500. struct thread *thread;
  501. thread = threads__findnew(event->comm.pid, &threads, &last_match);
  502. dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
  503. (void *)(offset + head),
  504. (void *)(long)(event->header.size),
  505. event->comm.comm, event->comm.pid);
  506. if (thread == NULL ||
  507. thread__set_comm(thread, event->comm.comm)) {
  508. dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
  509. return -1;
  510. }
  511. total_comm++;
  512. return 0;
  513. }
  514. struct trace_wakeup_event {
  515. u32 size;
  516. u16 common_type;
  517. u8 common_flags;
  518. u8 common_preempt_count;
  519. u32 common_pid;
  520. u32 common_tgid;
  521. char comm[16];
  522. u32 pid;
  523. u32 prio;
  524. u32 success;
  525. u32 cpu;
  526. };
  527. static void
  528. process_sched_wakeup_event(struct trace_wakeup_event *wakeup_event, struct event *event,
  529. int cpu __used, u64 timestamp __used, struct thread *thread __used)
  530. {
  531. struct task_desc *waker, *wakee;
  532. if (verbose) {
  533. printf("sched_wakeup event %p\n", event);
  534. printf(" ... pid %d woke up %s/%d\n",
  535. wakeup_event->common_pid,
  536. wakeup_event->comm,
  537. wakeup_event->pid);
  538. }
  539. waker = register_pid(wakeup_event->common_pid, "<unknown>");
  540. wakee = register_pid(wakeup_event->pid, wakeup_event->comm);
  541. add_sched_event_wakeup(waker, timestamp, wakee);
  542. }
  543. struct trace_switch_event {
  544. u32 size;
  545. u16 common_type;
  546. u8 common_flags;
  547. u8 common_preempt_count;
  548. u32 common_pid;
  549. u32 common_tgid;
  550. char prev_comm[16];
  551. u32 prev_pid;
  552. u32 prev_prio;
  553. u64 prev_state;
  554. char next_comm[16];
  555. u32 next_pid;
  556. u32 next_prio;
  557. };
  558. #define MAX_CPUS 4096
  559. unsigned long cpu_last_switched[MAX_CPUS];
  560. static void
  561. process_sched_switch_event(struct trace_switch_event *switch_event, struct event *event,
  562. int cpu __used, u64 timestamp __used, struct thread *thread __used)
  563. {
  564. struct task_desc *prev, *next;
  565. u64 timestamp0;
  566. s64 delta;
  567. if (verbose)
  568. printf("sched_switch event %p\n", event);
  569. if (cpu >= MAX_CPUS || cpu < 0)
  570. return;
  571. timestamp0 = cpu_last_switched[cpu];
  572. if (timestamp0)
  573. delta = timestamp - timestamp0;
  574. else
  575. delta = 0;
  576. if (delta < 0)
  577. die("hm, delta: %Ld < 0 ?\n", delta);
  578. if (verbose) {
  579. printf(" ... switch from %s/%d to %s/%d [ran %Ld nsecs]\n",
  580. switch_event->prev_comm, switch_event->prev_pid,
  581. switch_event->next_comm, switch_event->next_pid,
  582. delta);
  583. }
  584. prev = register_pid(switch_event->prev_pid, switch_event->prev_comm);
  585. next = register_pid(switch_event->next_pid, switch_event->next_comm);
  586. cpu_last_switched[cpu] = timestamp;
  587. add_sched_event_run(prev, timestamp, delta);
  588. add_sched_event_sleep(prev, timestamp, switch_event->prev_state);
  589. }
  590. struct trace_fork_event {
  591. u32 size;
  592. u16 common_type;
  593. u8 common_flags;
  594. u8 common_preempt_count;
  595. u32 common_pid;
  596. u32 common_tgid;
  597. char parent_comm[16];
  598. u32 parent_pid;
  599. char child_comm[16];
  600. u32 child_pid;
  601. };
  602. static void
  603. process_sched_fork_event(struct trace_fork_event *fork_event, struct event *event,
  604. int cpu __used, u64 timestamp __used, struct thread *thread __used)
  605. {
  606. if (verbose) {
  607. printf("sched_fork event %p\n", event);
  608. printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid);
  609. printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid);
  610. }
  611. register_pid(fork_event->parent_pid, fork_event->parent_comm);
  612. register_pid(fork_event->child_pid, fork_event->child_comm);
  613. }
  614. static void process_sched_exit_event(struct event *event,
  615. int cpu __used, u64 timestamp __used, struct thread *thread __used)
  616. {
  617. if (verbose)
  618. printf("sched_exit event %p\n", event);
  619. }
  620. static void
  621. process_raw_event(event_t *raw_event __used, void *more_data,
  622. int cpu, u64 timestamp, struct thread *thread)
  623. {
  624. struct {
  625. u32 size;
  626. char data[0];
  627. } *raw = more_data;
  628. struct event *event;
  629. int type;
  630. type = trace_parse_common_type(raw->data);
  631. event = trace_find_event(type);
  632. if (!strcmp(event->name, "sched_switch"))
  633. process_sched_switch_event(more_data, event, cpu, timestamp, thread);
  634. if (!strcmp(event->name, "sched_wakeup"))
  635. process_sched_wakeup_event(more_data, event, cpu, timestamp, thread);
  636. if (!strcmp(event->name, "sched_wakeup_new"))
  637. process_sched_wakeup_event(more_data, event, cpu, timestamp, thread);
  638. if (!strcmp(event->name, "sched_process_fork"))
  639. process_sched_fork_event(more_data, event, cpu, timestamp, thread);
  640. if (!strcmp(event->name, "sched_process_exit"))
  641. process_sched_exit_event(event, cpu, timestamp, thread);
  642. }
  643. static int
  644. process_sample_event(event_t *event, unsigned long offset, unsigned long head)
  645. {
  646. char level;
  647. int show = 0;
  648. struct dso *dso = NULL;
  649. struct thread *thread;
  650. u64 ip = event->ip.ip;
  651. u64 timestamp = -1;
  652. u32 cpu = -1;
  653. u64 period = 1;
  654. void *more_data = event->ip.__more_data;
  655. int cpumode;
  656. thread = threads__findnew(event->ip.pid, &threads, &last_match);
  657. if (sample_type & PERF_SAMPLE_TIME) {
  658. timestamp = *(u64 *)more_data;
  659. more_data += sizeof(u64);
  660. }
  661. if (sample_type & PERF_SAMPLE_CPU) {
  662. cpu = *(u32 *)more_data;
  663. more_data += sizeof(u32);
  664. more_data += sizeof(u32); /* reserved */
  665. }
  666. if (sample_type & PERF_SAMPLE_PERIOD) {
  667. period = *(u64 *)more_data;
  668. more_data += sizeof(u64);
  669. }
  670. dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
  671. (void *)(offset + head),
  672. (void *)(long)(event->header.size),
  673. event->header.misc,
  674. event->ip.pid, event->ip.tid,
  675. (void *)(long)ip,
  676. (long long)period);
  677. dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
  678. if (thread == NULL) {
  679. eprintf("problem processing %d event, skipping it.\n",
  680. event->header.type);
  681. return -1;
  682. }
  683. cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK;
  684. if (cpumode == PERF_EVENT_MISC_KERNEL) {
  685. show = SHOW_KERNEL;
  686. level = 'k';
  687. dso = kernel_dso;
  688. dump_printf(" ...... dso: %s\n", dso->name);
  689. } else if (cpumode == PERF_EVENT_MISC_USER) {
  690. show = SHOW_USER;
  691. level = '.';
  692. } else {
  693. show = SHOW_HV;
  694. level = 'H';
  695. dso = hypervisor_dso;
  696. dump_printf(" ...... dso: [hypervisor]\n");
  697. }
  698. if (sample_type & PERF_SAMPLE_RAW)
  699. process_raw_event(event, more_data, cpu, timestamp, thread);
  700. return 0;
  701. }
  702. static int
  703. process_event(event_t *event, unsigned long offset, unsigned long head)
  704. {
  705. trace_event(event);
  706. switch (event->header.type) {
  707. case PERF_EVENT_MMAP ... PERF_EVENT_LOST:
  708. return 0;
  709. case PERF_EVENT_COMM:
  710. return process_comm_event(event, offset, head);
  711. case PERF_EVENT_EXIT ... PERF_EVENT_READ:
  712. return 0;
  713. case PERF_EVENT_SAMPLE:
  714. return process_sample_event(event, offset, head);
  715. case PERF_EVENT_MAX:
  716. default:
  717. return -1;
  718. }
  719. return 0;
  720. }
  721. static int __cmd_sched(void)
  722. {
  723. int ret, rc = EXIT_FAILURE;
  724. unsigned long offset = 0;
  725. unsigned long head = 0;
  726. struct stat perf_stat;
  727. event_t *event;
  728. uint32_t size;
  729. char *buf;
  730. trace_report();
  731. register_idle_thread(&threads, &last_match);
  732. input = open(input_name, O_RDONLY);
  733. if (input < 0) {
  734. perror("failed to open file");
  735. exit(-1);
  736. }
  737. ret = fstat(input, &perf_stat);
  738. if (ret < 0) {
  739. perror("failed to stat file");
  740. exit(-1);
  741. }
  742. if (!perf_stat.st_size) {
  743. fprintf(stderr, "zero-sized file, nothing to do!\n");
  744. exit(0);
  745. }
  746. header = perf_header__read(input);
  747. head = header->data_offset;
  748. sample_type = perf_header__sample_type(header);
  749. if (!(sample_type & PERF_SAMPLE_RAW))
  750. die("No trace sample to read. Did you call perf record "
  751. "without -R?");
  752. if (load_kernel() < 0) {
  753. perror("failed to load kernel symbols");
  754. return EXIT_FAILURE;
  755. }
  756. remap:
  757. buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
  758. MAP_SHARED, input, offset);
  759. if (buf == MAP_FAILED) {
  760. perror("failed to mmap file");
  761. exit(-1);
  762. }
  763. more:
  764. event = (event_t *)(buf + head);
  765. size = event->header.size;
  766. if (!size)
  767. size = 8;
  768. if (head + event->header.size >= page_size * mmap_window) {
  769. unsigned long shift = page_size * (head / page_size);
  770. int res;
  771. res = munmap(buf, page_size * mmap_window);
  772. assert(res == 0);
  773. offset += shift;
  774. head -= shift;
  775. goto remap;
  776. }
  777. size = event->header.size;
  778. if (!size || process_event(event, offset, head) < 0) {
  779. /*
  780. * assume we lost track of the stream, check alignment, and
  781. * increment a single u64 in the hope to catch on again 'soon'.
  782. */
  783. if (unlikely(head & 7))
  784. head &= ~7ULL;
  785. size = 8;
  786. }
  787. head += size;
  788. if (offset + head < (unsigned long)perf_stat.st_size)
  789. goto more;
  790. rc = EXIT_SUCCESS;
  791. close(input);
  792. return rc;
  793. }
  794. static const char * const annotate_usage[] = {
  795. "perf trace [<options>] <command>",
  796. NULL
  797. };
  798. static const struct option options[] = {
  799. OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
  800. "dump raw trace in ASCII"),
  801. OPT_BOOLEAN('v', "verbose", &verbose,
  802. "be more verbose (show symbol address, etc)"),
  803. OPT_END()
  804. };
  805. int cmd_sched(int argc, const char **argv, const char *prefix __used)
  806. {
  807. long nr_iterations = 10, i;
  808. symbol__init();
  809. page_size = getpagesize();
  810. argc = parse_options(argc, argv, options, annotate_usage, 0);
  811. if (argc) {
  812. /*
  813. * Special case: if there's an argument left then assume tha
  814. * it's a symbol filter:
  815. */
  816. if (argc > 1)
  817. usage_with_options(annotate_usage, options);
  818. }
  819. // setup_pager();
  820. calibrate_run_measurement_overhead();
  821. calibrate_sleep_measurement_overhead();
  822. test_calibrations();
  823. parse_trace();
  824. print_task_traces();
  825. add_cross_task_wakeups();
  826. create_tasks();
  827. printf("------------------------------------------------------------\n");
  828. for (i = 0; i < nr_iterations; i++)
  829. run_one_test();
  830. return 0;
  831. }