trace_syscalls.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. #include <trace/syscall.h>
  2. #include <trace/events/syscalls.h>
  3. #include <linux/kernel.h>
  4. #include <linux/ftrace.h>
  5. #include <linux/perf_event.h>
  6. #include <asm/syscall.h>
  7. #include "trace_output.h"
  8. #include "trace.h"
  9. static DEFINE_MUTEX(syscall_trace_lock);
  10. static int sys_refcount_enter;
  11. static int sys_refcount_exit;
  12. static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
  13. static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
  14. extern unsigned long __start_syscalls_metadata[];
  15. extern unsigned long __stop_syscalls_metadata[];
  16. static struct syscall_metadata **syscalls_metadata;
  17. static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
  18. {
  19. struct syscall_metadata *start;
  20. struct syscall_metadata *stop;
  21. char str[KSYM_SYMBOL_LEN];
  22. start = (struct syscall_metadata *)__start_syscalls_metadata;
  23. stop = (struct syscall_metadata *)__stop_syscalls_metadata;
  24. kallsyms_lookup(syscall, NULL, NULL, NULL, str);
  25. for ( ; start < stop; start++) {
  26. /*
  27. * Only compare after the "sys" prefix. Archs that use
  28. * syscall wrappers may have syscalls symbols aliases prefixed
  29. * with "SyS" instead of "sys", leading to an unwanted
  30. * mismatch.
  31. */
  32. if (start->name && !strcmp(start->name + 3, str + 3))
  33. return start;
  34. }
  35. return NULL;
  36. }
  37. static struct syscall_metadata *syscall_nr_to_meta(int nr)
  38. {
  39. if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
  40. return NULL;
  41. return syscalls_metadata[nr];
  42. }
  43. int syscall_name_to_nr(char *name)
  44. {
  45. int i;
  46. if (!syscalls_metadata)
  47. return -1;
  48. for (i = 0; i < NR_syscalls; i++) {
  49. if (syscalls_metadata[i]) {
  50. if (!strcmp(syscalls_metadata[i]->name, name))
  51. return i;
  52. }
  53. }
  54. return -1;
  55. }
  56. void set_syscall_enter_id(int num, int id)
  57. {
  58. syscalls_metadata[num]->enter_id = id;
  59. }
  60. void set_syscall_exit_id(int num, int id)
  61. {
  62. syscalls_metadata[num]->exit_id = id;
  63. }
  64. enum print_line_t
  65. print_syscall_enter(struct trace_iterator *iter, int flags)
  66. {
  67. struct trace_seq *s = &iter->seq;
  68. struct trace_entry *ent = iter->ent;
  69. struct syscall_trace_enter *trace;
  70. struct syscall_metadata *entry;
  71. int i, ret, syscall;
  72. trace = (typeof(trace))ent;
  73. syscall = trace->nr;
  74. entry = syscall_nr_to_meta(syscall);
  75. if (!entry)
  76. goto end;
  77. if (entry->enter_id != ent->type) {
  78. WARN_ON_ONCE(1);
  79. goto end;
  80. }
  81. ret = trace_seq_printf(s, "%s(", entry->name);
  82. if (!ret)
  83. return TRACE_TYPE_PARTIAL_LINE;
  84. for (i = 0; i < entry->nb_args; i++) {
  85. /* parameter types */
  86. if (trace_flags & TRACE_ITER_VERBOSE) {
  87. ret = trace_seq_printf(s, "%s ", entry->types[i]);
  88. if (!ret)
  89. return TRACE_TYPE_PARTIAL_LINE;
  90. }
  91. /* parameter values */
  92. ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
  93. trace->args[i],
  94. i == entry->nb_args - 1 ? "" : ", ");
  95. if (!ret)
  96. return TRACE_TYPE_PARTIAL_LINE;
  97. }
  98. ret = trace_seq_putc(s, ')');
  99. if (!ret)
  100. return TRACE_TYPE_PARTIAL_LINE;
  101. end:
  102. ret = trace_seq_putc(s, '\n');
  103. if (!ret)
  104. return TRACE_TYPE_PARTIAL_LINE;
  105. return TRACE_TYPE_HANDLED;
  106. }
  107. enum print_line_t
  108. print_syscall_exit(struct trace_iterator *iter, int flags)
  109. {
  110. struct trace_seq *s = &iter->seq;
  111. struct trace_entry *ent = iter->ent;
  112. struct syscall_trace_exit *trace;
  113. int syscall;
  114. struct syscall_metadata *entry;
  115. int ret;
  116. trace = (typeof(trace))ent;
  117. syscall = trace->nr;
  118. entry = syscall_nr_to_meta(syscall);
  119. if (!entry) {
  120. trace_seq_printf(s, "\n");
  121. return TRACE_TYPE_HANDLED;
  122. }
  123. if (entry->exit_id != ent->type) {
  124. WARN_ON_ONCE(1);
  125. return TRACE_TYPE_UNHANDLED;
  126. }
  127. ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
  128. trace->ret);
  129. if (!ret)
  130. return TRACE_TYPE_PARTIAL_LINE;
  131. return TRACE_TYPE_HANDLED;
  132. }
  133. extern char *__bad_type_size(void);
  134. #define SYSCALL_FIELD(type, name) \
  135. sizeof(type) != sizeof(trace.name) ? \
  136. __bad_type_size() : \
  137. #type, #name, offsetof(typeof(trace), name), \
  138. sizeof(trace.name), is_signed_type(type)
  139. int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
  140. {
  141. int i;
  142. int nr;
  143. int ret;
  144. struct syscall_metadata *entry;
  145. struct syscall_trace_enter trace;
  146. int offset = offsetof(struct syscall_trace_enter, args);
  147. nr = syscall_name_to_nr(call->data);
  148. entry = syscall_nr_to_meta(nr);
  149. if (!entry)
  150. return 0;
  151. ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
  152. "\tsigned:%u;\n",
  153. SYSCALL_FIELD(int, nr));
  154. if (!ret)
  155. return 0;
  156. for (i = 0; i < entry->nb_args; i++) {
  157. ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
  158. entry->args[i]);
  159. if (!ret)
  160. return 0;
  161. ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
  162. "\tsigned:%u;\n", offset,
  163. sizeof(unsigned long),
  164. is_signed_type(unsigned long));
  165. if (!ret)
  166. return 0;
  167. offset += sizeof(unsigned long);
  168. }
  169. trace_seq_puts(s, "\nprint fmt: \"");
  170. for (i = 0; i < entry->nb_args; i++) {
  171. ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
  172. sizeof(unsigned long),
  173. i == entry->nb_args - 1 ? "" : ", ");
  174. if (!ret)
  175. return 0;
  176. }
  177. trace_seq_putc(s, '"');
  178. for (i = 0; i < entry->nb_args; i++) {
  179. ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
  180. entry->args[i]);
  181. if (!ret)
  182. return 0;
  183. }
  184. return trace_seq_putc(s, '\n');
  185. }
  186. int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
  187. {
  188. int ret;
  189. struct syscall_trace_exit trace;
  190. ret = trace_seq_printf(s,
  191. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
  192. "\tsigned:%u;\n"
  193. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
  194. "\tsigned:%u;\n",
  195. SYSCALL_FIELD(int, nr),
  196. SYSCALL_FIELD(long, ret));
  197. if (!ret)
  198. return 0;
  199. return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
  200. }
  201. int syscall_enter_define_fields(struct ftrace_event_call *call)
  202. {
  203. struct syscall_trace_enter trace;
  204. struct syscall_metadata *meta;
  205. int ret;
  206. int nr;
  207. int i;
  208. int offset = offsetof(typeof(trace), args);
  209. nr = syscall_name_to_nr(call->data);
  210. meta = syscall_nr_to_meta(nr);
  211. if (!meta)
  212. return 0;
  213. ret = trace_define_common_fields(call);
  214. if (ret)
  215. return ret;
  216. ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
  217. if (ret)
  218. return ret;
  219. for (i = 0; i < meta->nb_args; i++) {
  220. ret = trace_define_field(call, meta->types[i],
  221. meta->args[i], offset,
  222. sizeof(unsigned long), 0,
  223. FILTER_OTHER);
  224. offset += sizeof(unsigned long);
  225. }
  226. return ret;
  227. }
  228. int syscall_exit_define_fields(struct ftrace_event_call *call)
  229. {
  230. struct syscall_trace_exit trace;
  231. int ret;
  232. ret = trace_define_common_fields(call);
  233. if (ret)
  234. return ret;
  235. ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
  236. if (ret)
  237. return ret;
  238. ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
  239. FILTER_OTHER);
  240. return ret;
  241. }
  242. void ftrace_syscall_enter(struct pt_regs *regs, long id)
  243. {
  244. struct syscall_trace_enter *entry;
  245. struct syscall_metadata *sys_data;
  246. struct ring_buffer_event *event;
  247. struct ring_buffer *buffer;
  248. int size;
  249. int syscall_nr;
  250. syscall_nr = syscall_get_nr(current, regs);
  251. if (syscall_nr < 0)
  252. return;
  253. if (!test_bit(syscall_nr, enabled_enter_syscalls))
  254. return;
  255. sys_data = syscall_nr_to_meta(syscall_nr);
  256. if (!sys_data)
  257. return;
  258. size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
  259. event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
  260. size, 0, 0);
  261. if (!event)
  262. return;
  263. entry = ring_buffer_event_data(event);
  264. entry->nr = syscall_nr;
  265. syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
  266. if (!filter_current_check_discard(buffer, sys_data->enter_event,
  267. entry, event))
  268. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  269. }
  270. void ftrace_syscall_exit(struct pt_regs *regs, long ret)
  271. {
  272. struct syscall_trace_exit *entry;
  273. struct syscall_metadata *sys_data;
  274. struct ring_buffer_event *event;
  275. struct ring_buffer *buffer;
  276. int syscall_nr;
  277. syscall_nr = syscall_get_nr(current, regs);
  278. if (syscall_nr < 0)
  279. return;
  280. if (!test_bit(syscall_nr, enabled_exit_syscalls))
  281. return;
  282. sys_data = syscall_nr_to_meta(syscall_nr);
  283. if (!sys_data)
  284. return;
  285. event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
  286. sizeof(*entry), 0, 0);
  287. if (!event)
  288. return;
  289. entry = ring_buffer_event_data(event);
  290. entry->nr = syscall_nr;
  291. entry->ret = syscall_get_return_value(current, regs);
  292. if (!filter_current_check_discard(buffer, sys_data->exit_event,
  293. entry, event))
  294. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  295. }
  296. int reg_event_syscall_enter(struct ftrace_event_call *call)
  297. {
  298. int ret = 0;
  299. int num;
  300. char *name;
  301. name = (char *)call->data;
  302. num = syscall_name_to_nr(name);
  303. if (num < 0 || num >= NR_syscalls)
  304. return -ENOSYS;
  305. mutex_lock(&syscall_trace_lock);
  306. if (!sys_refcount_enter)
  307. ret = register_trace_sys_enter(ftrace_syscall_enter);
  308. if (ret) {
  309. pr_info("event trace: Could not activate"
  310. "syscall entry trace point");
  311. } else {
  312. set_bit(num, enabled_enter_syscalls);
  313. sys_refcount_enter++;
  314. }
  315. mutex_unlock(&syscall_trace_lock);
  316. return ret;
  317. }
  318. void unreg_event_syscall_enter(struct ftrace_event_call *call)
  319. {
  320. int num;
  321. char *name;
  322. name = (char *)call->data;
  323. num = syscall_name_to_nr(name);
  324. if (num < 0 || num >= NR_syscalls)
  325. return;
  326. mutex_lock(&syscall_trace_lock);
  327. sys_refcount_enter--;
  328. clear_bit(num, enabled_enter_syscalls);
  329. if (!sys_refcount_enter)
  330. unregister_trace_sys_enter(ftrace_syscall_enter);
  331. mutex_unlock(&syscall_trace_lock);
  332. }
  333. int reg_event_syscall_exit(struct ftrace_event_call *call)
  334. {
  335. int ret = 0;
  336. int num;
  337. char *name;
  338. name = call->data;
  339. num = syscall_name_to_nr(name);
  340. if (num < 0 || num >= NR_syscalls)
  341. return -ENOSYS;
  342. mutex_lock(&syscall_trace_lock);
  343. if (!sys_refcount_exit)
  344. ret = register_trace_sys_exit(ftrace_syscall_exit);
  345. if (ret) {
  346. pr_info("event trace: Could not activate"
  347. "syscall exit trace point");
  348. } else {
  349. set_bit(num, enabled_exit_syscalls);
  350. sys_refcount_exit++;
  351. }
  352. mutex_unlock(&syscall_trace_lock);
  353. return ret;
  354. }
  355. void unreg_event_syscall_exit(struct ftrace_event_call *call)
  356. {
  357. int num;
  358. char *name;
  359. name = call->data;
  360. num = syscall_name_to_nr(name);
  361. if (num < 0 || num >= NR_syscalls)
  362. return;
  363. mutex_lock(&syscall_trace_lock);
  364. sys_refcount_exit--;
  365. clear_bit(num, enabled_exit_syscalls);
  366. if (!sys_refcount_exit)
  367. unregister_trace_sys_exit(ftrace_syscall_exit);
  368. mutex_unlock(&syscall_trace_lock);
  369. }
  370. struct trace_event event_syscall_enter = {
  371. .trace = print_syscall_enter,
  372. };
  373. struct trace_event event_syscall_exit = {
  374. .trace = print_syscall_exit,
  375. };
  376. int __init init_ftrace_syscalls(void)
  377. {
  378. struct syscall_metadata *meta;
  379. unsigned long addr;
  380. int i;
  381. syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
  382. NR_syscalls, GFP_KERNEL);
  383. if (!syscalls_metadata) {
  384. WARN_ON(1);
  385. return -ENOMEM;
  386. }
  387. for (i = 0; i < NR_syscalls; i++) {
  388. addr = arch_syscall_addr(i);
  389. meta = find_syscall_meta(addr);
  390. syscalls_metadata[i] = meta;
  391. }
  392. return 0;
  393. }
  394. core_initcall(init_ftrace_syscalls);
  395. #ifdef CONFIG_EVENT_PROFILE
  396. static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
  397. static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
  398. static int sys_prof_refcount_enter;
  399. static int sys_prof_refcount_exit;
  400. static void prof_syscall_enter(struct pt_regs *regs, long id)
  401. {
  402. struct syscall_metadata *sys_data;
  403. struct syscall_trace_enter *rec;
  404. unsigned long flags;
  405. char *trace_buf;
  406. char *raw_data;
  407. int syscall_nr;
  408. int rctx;
  409. int size;
  410. int cpu;
  411. syscall_nr = syscall_get_nr(current, regs);
  412. if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
  413. return;
  414. sys_data = syscall_nr_to_meta(syscall_nr);
  415. if (!sys_data)
  416. return;
  417. /* get the size after alignment with the u32 buffer size field */
  418. size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
  419. size = ALIGN(size + sizeof(u32), sizeof(u64));
  420. size -= sizeof(u32);
  421. if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
  422. "profile buffer not large enough"))
  423. return;
  424. /* Protect the per cpu buffer, begin the rcu read side */
  425. local_irq_save(flags);
  426. rctx = perf_swevent_get_recursion_context();
  427. if (rctx < 0)
  428. goto end_recursion;
  429. cpu = smp_processor_id();
  430. trace_buf = rcu_dereference(perf_trace_buf);
  431. if (!trace_buf)
  432. goto end;
  433. raw_data = per_cpu_ptr(trace_buf, cpu);
  434. /* zero the dead bytes from align to not leak stack to user */
  435. *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
  436. rec = (struct syscall_trace_enter *) raw_data;
  437. tracing_generic_entry_update(&rec->ent, 0, 0);
  438. rec->ent.type = sys_data->enter_id;
  439. rec->nr = syscall_nr;
  440. syscall_get_arguments(current, regs, 0, sys_data->nb_args,
  441. (unsigned long *)&rec->args);
  442. perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
  443. end:
  444. perf_swevent_put_recursion_context(rctx);
  445. end_recursion:
  446. local_irq_restore(flags);
  447. }
  448. int reg_prof_syscall_enter(char *name)
  449. {
  450. int ret = 0;
  451. int num;
  452. num = syscall_name_to_nr(name);
  453. if (num < 0 || num >= NR_syscalls)
  454. return -ENOSYS;
  455. mutex_lock(&syscall_trace_lock);
  456. if (!sys_prof_refcount_enter)
  457. ret = register_trace_sys_enter(prof_syscall_enter);
  458. if (ret) {
  459. pr_info("event trace: Could not activate"
  460. "syscall entry trace point");
  461. } else {
  462. set_bit(num, enabled_prof_enter_syscalls);
  463. sys_prof_refcount_enter++;
  464. }
  465. mutex_unlock(&syscall_trace_lock);
  466. return ret;
  467. }
  468. void unreg_prof_syscall_enter(char *name)
  469. {
  470. int num;
  471. num = syscall_name_to_nr(name);
  472. if (num < 0 || num >= NR_syscalls)
  473. return;
  474. mutex_lock(&syscall_trace_lock);
  475. sys_prof_refcount_enter--;
  476. clear_bit(num, enabled_prof_enter_syscalls);
  477. if (!sys_prof_refcount_enter)
  478. unregister_trace_sys_enter(prof_syscall_enter);
  479. mutex_unlock(&syscall_trace_lock);
  480. }
  481. static void prof_syscall_exit(struct pt_regs *regs, long ret)
  482. {
  483. struct syscall_metadata *sys_data;
  484. struct syscall_trace_exit *rec;
  485. unsigned long flags;
  486. int syscall_nr;
  487. char *trace_buf;
  488. char *raw_data;
  489. int rctx;
  490. int size;
  491. int cpu;
  492. syscall_nr = syscall_get_nr(current, regs);
  493. if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
  494. return;
  495. sys_data = syscall_nr_to_meta(syscall_nr);
  496. if (!sys_data)
  497. return;
  498. /* We can probably do that at build time */
  499. size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
  500. size -= sizeof(u32);
  501. /*
  502. * Impossible, but be paranoid with the future
  503. * How to put this check outside runtime?
  504. */
  505. if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
  506. "exit event has grown above profile buffer size"))
  507. return;
  508. /* Protect the per cpu buffer, begin the rcu read side */
  509. local_irq_save(flags);
  510. rctx = perf_swevent_get_recursion_context();
  511. if (rctx < 0)
  512. goto end_recursion;
  513. cpu = smp_processor_id();
  514. trace_buf = rcu_dereference(perf_trace_buf);
  515. if (!trace_buf)
  516. goto end;
  517. raw_data = per_cpu_ptr(trace_buf, cpu);
  518. /* zero the dead bytes from align to not leak stack to user */
  519. *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
  520. rec = (struct syscall_trace_exit *)raw_data;
  521. tracing_generic_entry_update(&rec->ent, 0, 0);
  522. rec->ent.type = sys_data->exit_id;
  523. rec->nr = syscall_nr;
  524. rec->ret = syscall_get_return_value(current, regs);
  525. perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
  526. end:
  527. perf_swevent_put_recursion_context(rctx);
  528. end_recursion:
  529. local_irq_restore(flags);
  530. }
  531. int reg_prof_syscall_exit(char *name)
  532. {
  533. int ret = 0;
  534. int num;
  535. num = syscall_name_to_nr(name);
  536. if (num < 0 || num >= NR_syscalls)
  537. return -ENOSYS;
  538. mutex_lock(&syscall_trace_lock);
  539. if (!sys_prof_refcount_exit)
  540. ret = register_trace_sys_exit(prof_syscall_exit);
  541. if (ret) {
  542. pr_info("event trace: Could not activate"
  543. "syscall entry trace point");
  544. } else {
  545. set_bit(num, enabled_prof_exit_syscalls);
  546. sys_prof_refcount_exit++;
  547. }
  548. mutex_unlock(&syscall_trace_lock);
  549. return ret;
  550. }
  551. void unreg_prof_syscall_exit(char *name)
  552. {
  553. int num;
  554. num = syscall_name_to_nr(name);
  555. if (num < 0 || num >= NR_syscalls)
  556. return;
  557. mutex_lock(&syscall_trace_lock);
  558. sys_prof_refcount_exit--;
  559. clear_bit(num, enabled_prof_exit_syscalls);
  560. if (!sys_prof_refcount_exit)
  561. unregister_trace_sys_exit(prof_syscall_exit);
  562. mutex_unlock(&syscall_trace_lock);
  563. }
  564. #endif