trace_syscalls.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. #include <trace/syscall.h>
  2. #include <trace/events/syscalls.h>
  3. #include <linux/kernel.h>
  4. #include <linux/ftrace.h>
  5. #include <linux/perf_event.h>
  6. #include <asm/syscall.h>
  7. #include "trace_output.h"
  8. #include "trace.h"
  9. static DEFINE_MUTEX(syscall_trace_lock);
  10. static int sys_refcount_enter;
  11. static int sys_refcount_exit;
  12. static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
  13. static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
  14. extern unsigned long __start_syscalls_metadata[];
  15. extern unsigned long __stop_syscalls_metadata[];
  16. static struct syscall_metadata **syscalls_metadata;
  17. static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
  18. {
  19. struct syscall_metadata *start;
  20. struct syscall_metadata *stop;
  21. char str[KSYM_SYMBOL_LEN];
  22. start = (struct syscall_metadata *)__start_syscalls_metadata;
  23. stop = (struct syscall_metadata *)__stop_syscalls_metadata;
  24. kallsyms_lookup(syscall, NULL, NULL, NULL, str);
  25. for ( ; start < stop; start++) {
  26. /*
  27. * Only compare after the "sys" prefix. Archs that use
  28. * syscall wrappers may have syscalls symbols aliases prefixed
  29. * with "SyS" instead of "sys", leading to an unwanted
  30. * mismatch.
  31. */
  32. if (start->name && !strcmp(start->name + 3, str + 3))
  33. return start;
  34. }
  35. return NULL;
  36. }
  37. static struct syscall_metadata *syscall_nr_to_meta(int nr)
  38. {
  39. if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
  40. return NULL;
  41. return syscalls_metadata[nr];
  42. }
  43. int syscall_name_to_nr(const char *name)
  44. {
  45. int i;
  46. if (!syscalls_metadata)
  47. return -1;
  48. for (i = 0; i < NR_syscalls; i++) {
  49. if (syscalls_metadata[i]) {
  50. if (!strcmp(syscalls_metadata[i]->name, name))
  51. return i;
  52. }
  53. }
  54. return -1;
  55. }
  56. enum print_line_t
  57. print_syscall_enter(struct trace_iterator *iter, int flags)
  58. {
  59. struct trace_seq *s = &iter->seq;
  60. struct trace_entry *ent = iter->ent;
  61. struct syscall_trace_enter *trace;
  62. struct syscall_metadata *entry;
  63. int i, ret, syscall;
  64. trace = (typeof(trace))ent;
  65. syscall = trace->nr;
  66. entry = syscall_nr_to_meta(syscall);
  67. if (!entry)
  68. goto end;
  69. if (entry->enter_event->id != ent->type) {
  70. WARN_ON_ONCE(1);
  71. goto end;
  72. }
  73. ret = trace_seq_printf(s, "%s(", entry->name);
  74. if (!ret)
  75. return TRACE_TYPE_PARTIAL_LINE;
  76. for (i = 0; i < entry->nb_args; i++) {
  77. /* parameter types */
  78. if (trace_flags & TRACE_ITER_VERBOSE) {
  79. ret = trace_seq_printf(s, "%s ", entry->types[i]);
  80. if (!ret)
  81. return TRACE_TYPE_PARTIAL_LINE;
  82. }
  83. /* parameter values */
  84. ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
  85. trace->args[i],
  86. i == entry->nb_args - 1 ? "" : ", ");
  87. if (!ret)
  88. return TRACE_TYPE_PARTIAL_LINE;
  89. }
  90. ret = trace_seq_putc(s, ')');
  91. if (!ret)
  92. return TRACE_TYPE_PARTIAL_LINE;
  93. end:
  94. ret = trace_seq_putc(s, '\n');
  95. if (!ret)
  96. return TRACE_TYPE_PARTIAL_LINE;
  97. return TRACE_TYPE_HANDLED;
  98. }
  99. enum print_line_t
  100. print_syscall_exit(struct trace_iterator *iter, int flags)
  101. {
  102. struct trace_seq *s = &iter->seq;
  103. struct trace_entry *ent = iter->ent;
  104. struct syscall_trace_exit *trace;
  105. int syscall;
  106. struct syscall_metadata *entry;
  107. int ret;
  108. trace = (typeof(trace))ent;
  109. syscall = trace->nr;
  110. entry = syscall_nr_to_meta(syscall);
  111. if (!entry) {
  112. trace_seq_printf(s, "\n");
  113. return TRACE_TYPE_HANDLED;
  114. }
  115. if (entry->exit_event->id != ent->type) {
  116. WARN_ON_ONCE(1);
  117. return TRACE_TYPE_UNHANDLED;
  118. }
  119. ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
  120. trace->ret);
  121. if (!ret)
  122. return TRACE_TYPE_PARTIAL_LINE;
  123. return TRACE_TYPE_HANDLED;
  124. }
  125. extern char *__bad_type_size(void);
  126. #define SYSCALL_FIELD(type, name) \
  127. sizeof(type) != sizeof(trace.name) ? \
  128. __bad_type_size() : \
  129. #type, #name, offsetof(typeof(trace), name), \
  130. sizeof(trace.name), is_signed_type(type)
  131. int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
  132. {
  133. int i;
  134. int ret;
  135. struct syscall_metadata *entry = call->data;
  136. struct syscall_trace_enter trace;
  137. int offset = offsetof(struct syscall_trace_enter, args);
  138. ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
  139. "\tsigned:%u;\n",
  140. SYSCALL_FIELD(int, nr));
  141. if (!ret)
  142. return 0;
  143. for (i = 0; i < entry->nb_args; i++) {
  144. ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
  145. entry->args[i]);
  146. if (!ret)
  147. return 0;
  148. ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
  149. "\tsigned:%u;\n", offset,
  150. sizeof(unsigned long),
  151. is_signed_type(unsigned long));
  152. if (!ret)
  153. return 0;
  154. offset += sizeof(unsigned long);
  155. }
  156. trace_seq_puts(s, "\nprint fmt: \"");
  157. for (i = 0; i < entry->nb_args; i++) {
  158. ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
  159. sizeof(unsigned long),
  160. i == entry->nb_args - 1 ? "" : ", ");
  161. if (!ret)
  162. return 0;
  163. }
  164. trace_seq_putc(s, '"');
  165. for (i = 0; i < entry->nb_args; i++) {
  166. ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
  167. entry->args[i]);
  168. if (!ret)
  169. return 0;
  170. }
  171. return trace_seq_putc(s, '\n');
  172. }
  173. int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
  174. {
  175. int ret;
  176. struct syscall_trace_exit trace;
  177. ret = trace_seq_printf(s,
  178. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
  179. "\tsigned:%u;\n"
  180. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
  181. "\tsigned:%u;\n",
  182. SYSCALL_FIELD(int, nr),
  183. SYSCALL_FIELD(long, ret));
  184. if (!ret)
  185. return 0;
  186. return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
  187. }
  188. int syscall_enter_define_fields(struct ftrace_event_call *call)
  189. {
  190. struct syscall_trace_enter trace;
  191. struct syscall_metadata *meta = call->data;
  192. int ret;
  193. int i;
  194. int offset = offsetof(typeof(trace), args);
  195. ret = trace_define_common_fields(call);
  196. if (ret)
  197. return ret;
  198. ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
  199. if (ret)
  200. return ret;
  201. for (i = 0; i < meta->nb_args; i++) {
  202. ret = trace_define_field(call, meta->types[i],
  203. meta->args[i], offset,
  204. sizeof(unsigned long), 0,
  205. FILTER_OTHER);
  206. offset += sizeof(unsigned long);
  207. }
  208. return ret;
  209. }
  210. int syscall_exit_define_fields(struct ftrace_event_call *call)
  211. {
  212. struct syscall_trace_exit trace;
  213. int ret;
  214. ret = trace_define_common_fields(call);
  215. if (ret)
  216. return ret;
  217. ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
  218. if (ret)
  219. return ret;
  220. ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
  221. FILTER_OTHER);
  222. return ret;
  223. }
  224. void ftrace_syscall_enter(struct pt_regs *regs, long id)
  225. {
  226. struct syscall_trace_enter *entry;
  227. struct syscall_metadata *sys_data;
  228. struct ring_buffer_event *event;
  229. struct ring_buffer *buffer;
  230. int size;
  231. int syscall_nr;
  232. syscall_nr = syscall_get_nr(current, regs);
  233. if (syscall_nr < 0)
  234. return;
  235. if (!test_bit(syscall_nr, enabled_enter_syscalls))
  236. return;
  237. sys_data = syscall_nr_to_meta(syscall_nr);
  238. if (!sys_data)
  239. return;
  240. size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
  241. event = trace_current_buffer_lock_reserve(&buffer,
  242. sys_data->enter_event->id, size, 0, 0);
  243. if (!event)
  244. return;
  245. entry = ring_buffer_event_data(event);
  246. entry->nr = syscall_nr;
  247. syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
  248. if (!filter_current_check_discard(buffer, sys_data->enter_event,
  249. entry, event))
  250. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  251. }
  252. void ftrace_syscall_exit(struct pt_regs *regs, long ret)
  253. {
  254. struct syscall_trace_exit *entry;
  255. struct syscall_metadata *sys_data;
  256. struct ring_buffer_event *event;
  257. struct ring_buffer *buffer;
  258. int syscall_nr;
  259. syscall_nr = syscall_get_nr(current, regs);
  260. if (syscall_nr < 0)
  261. return;
  262. if (!test_bit(syscall_nr, enabled_exit_syscalls))
  263. return;
  264. sys_data = syscall_nr_to_meta(syscall_nr);
  265. if (!sys_data)
  266. return;
  267. event = trace_current_buffer_lock_reserve(&buffer,
  268. sys_data->exit_event->id, sizeof(*entry), 0, 0);
  269. if (!event)
  270. return;
  271. entry = ring_buffer_event_data(event);
  272. entry->nr = syscall_nr;
  273. entry->ret = syscall_get_return_value(current, regs);
  274. if (!filter_current_check_discard(buffer, sys_data->exit_event,
  275. entry, event))
  276. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  277. }
  278. int reg_event_syscall_enter(struct ftrace_event_call *call)
  279. {
  280. int ret = 0;
  281. int num;
  282. const char *name;
  283. name = ((struct syscall_metadata *)call->data)->name;
  284. num = syscall_name_to_nr(name);
  285. if (num < 0 || num >= NR_syscalls)
  286. return -ENOSYS;
  287. mutex_lock(&syscall_trace_lock);
  288. if (!sys_refcount_enter)
  289. ret = register_trace_sys_enter(ftrace_syscall_enter);
  290. if (ret) {
  291. pr_info("event trace: Could not activate"
  292. "syscall entry trace point");
  293. } else {
  294. set_bit(num, enabled_enter_syscalls);
  295. sys_refcount_enter++;
  296. }
  297. mutex_unlock(&syscall_trace_lock);
  298. return ret;
  299. }
  300. void unreg_event_syscall_enter(struct ftrace_event_call *call)
  301. {
  302. int num;
  303. const char *name;
  304. name = ((struct syscall_metadata *)call->data)->name;
  305. num = syscall_name_to_nr(name);
  306. if (num < 0 || num >= NR_syscalls)
  307. return;
  308. mutex_lock(&syscall_trace_lock);
  309. sys_refcount_enter--;
  310. clear_bit(num, enabled_enter_syscalls);
  311. if (!sys_refcount_enter)
  312. unregister_trace_sys_enter(ftrace_syscall_enter);
  313. mutex_unlock(&syscall_trace_lock);
  314. }
  315. int reg_event_syscall_exit(struct ftrace_event_call *call)
  316. {
  317. int ret = 0;
  318. int num;
  319. const char *name;
  320. name = ((struct syscall_metadata *)call->data)->name;
  321. num = syscall_name_to_nr(name);
  322. if (num < 0 || num >= NR_syscalls)
  323. return -ENOSYS;
  324. mutex_lock(&syscall_trace_lock);
  325. if (!sys_refcount_exit)
  326. ret = register_trace_sys_exit(ftrace_syscall_exit);
  327. if (ret) {
  328. pr_info("event trace: Could not activate"
  329. "syscall exit trace point");
  330. } else {
  331. set_bit(num, enabled_exit_syscalls);
  332. sys_refcount_exit++;
  333. }
  334. mutex_unlock(&syscall_trace_lock);
  335. return ret;
  336. }
  337. void unreg_event_syscall_exit(struct ftrace_event_call *call)
  338. {
  339. int num;
  340. const char *name;
  341. name = ((struct syscall_metadata *)call->data)->name;
  342. num = syscall_name_to_nr(name);
  343. if (num < 0 || num >= NR_syscalls)
  344. return;
  345. mutex_lock(&syscall_trace_lock);
  346. sys_refcount_exit--;
  347. clear_bit(num, enabled_exit_syscalls);
  348. if (!sys_refcount_exit)
  349. unregister_trace_sys_exit(ftrace_syscall_exit);
  350. mutex_unlock(&syscall_trace_lock);
  351. }
  352. int __init init_ftrace_syscalls(void)
  353. {
  354. struct syscall_metadata *meta;
  355. unsigned long addr;
  356. int i;
  357. syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
  358. NR_syscalls, GFP_KERNEL);
  359. if (!syscalls_metadata) {
  360. WARN_ON(1);
  361. return -ENOMEM;
  362. }
  363. for (i = 0; i < NR_syscalls; i++) {
  364. addr = arch_syscall_addr(i);
  365. meta = find_syscall_meta(addr);
  366. syscalls_metadata[i] = meta;
  367. }
  368. return 0;
  369. }
  370. core_initcall(init_ftrace_syscalls);
  371. #ifdef CONFIG_EVENT_PROFILE
  372. static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
  373. static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
  374. static int sys_prof_refcount_enter;
  375. static int sys_prof_refcount_exit;
  376. static void prof_syscall_enter(struct pt_regs *regs, long id)
  377. {
  378. struct syscall_metadata *sys_data;
  379. struct syscall_trace_enter *rec;
  380. unsigned long flags;
  381. char *trace_buf;
  382. char *raw_data;
  383. int syscall_nr;
  384. int rctx;
  385. int size;
  386. int cpu;
  387. syscall_nr = syscall_get_nr(current, regs);
  388. if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
  389. return;
  390. sys_data = syscall_nr_to_meta(syscall_nr);
  391. if (!sys_data)
  392. return;
  393. /* get the size after alignment with the u32 buffer size field */
  394. size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
  395. size = ALIGN(size + sizeof(u32), sizeof(u64));
  396. size -= sizeof(u32);
  397. if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
  398. "profile buffer not large enough"))
  399. return;
  400. /* Protect the per cpu buffer, begin the rcu read side */
  401. local_irq_save(flags);
  402. rctx = perf_swevent_get_recursion_context();
  403. if (rctx < 0)
  404. goto end_recursion;
  405. cpu = smp_processor_id();
  406. trace_buf = rcu_dereference(perf_trace_buf);
  407. if (!trace_buf)
  408. goto end;
  409. raw_data = per_cpu_ptr(trace_buf, cpu);
  410. /* zero the dead bytes from align to not leak stack to user */
  411. *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
  412. rec = (struct syscall_trace_enter *) raw_data;
  413. tracing_generic_entry_update(&rec->ent, 0, 0);
  414. rec->ent.type = sys_data->enter_event->id;
  415. rec->nr = syscall_nr;
  416. syscall_get_arguments(current, regs, 0, sys_data->nb_args,
  417. (unsigned long *)&rec->args);
  418. perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
  419. end:
  420. perf_swevent_put_recursion_context(rctx);
  421. end_recursion:
  422. local_irq_restore(flags);
  423. }
  424. int reg_prof_syscall_enter(char *name)
  425. {
  426. int ret = 0;
  427. int num;
  428. num = syscall_name_to_nr(name);
  429. if (num < 0 || num >= NR_syscalls)
  430. return -ENOSYS;
  431. mutex_lock(&syscall_trace_lock);
  432. if (!sys_prof_refcount_enter)
  433. ret = register_trace_sys_enter(prof_syscall_enter);
  434. if (ret) {
  435. pr_info("event trace: Could not activate"
  436. "syscall entry trace point");
  437. } else {
  438. set_bit(num, enabled_prof_enter_syscalls);
  439. sys_prof_refcount_enter++;
  440. }
  441. mutex_unlock(&syscall_trace_lock);
  442. return ret;
  443. }
  444. void unreg_prof_syscall_enter(char *name)
  445. {
  446. int num;
  447. num = syscall_name_to_nr(name);
  448. if (num < 0 || num >= NR_syscalls)
  449. return;
  450. mutex_lock(&syscall_trace_lock);
  451. sys_prof_refcount_enter--;
  452. clear_bit(num, enabled_prof_enter_syscalls);
  453. if (!sys_prof_refcount_enter)
  454. unregister_trace_sys_enter(prof_syscall_enter);
  455. mutex_unlock(&syscall_trace_lock);
  456. }
  457. static void prof_syscall_exit(struct pt_regs *regs, long ret)
  458. {
  459. struct syscall_metadata *sys_data;
  460. struct syscall_trace_exit *rec;
  461. unsigned long flags;
  462. int syscall_nr;
  463. char *trace_buf;
  464. char *raw_data;
  465. int rctx;
  466. int size;
  467. int cpu;
  468. syscall_nr = syscall_get_nr(current, regs);
  469. if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
  470. return;
  471. sys_data = syscall_nr_to_meta(syscall_nr);
  472. if (!sys_data)
  473. return;
  474. /* We can probably do that at build time */
  475. size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
  476. size -= sizeof(u32);
  477. /*
  478. * Impossible, but be paranoid with the future
  479. * How to put this check outside runtime?
  480. */
  481. if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
  482. "exit event has grown above profile buffer size"))
  483. return;
  484. /* Protect the per cpu buffer, begin the rcu read side */
  485. local_irq_save(flags);
  486. rctx = perf_swevent_get_recursion_context();
  487. if (rctx < 0)
  488. goto end_recursion;
  489. cpu = smp_processor_id();
  490. trace_buf = rcu_dereference(perf_trace_buf);
  491. if (!trace_buf)
  492. goto end;
  493. raw_data = per_cpu_ptr(trace_buf, cpu);
  494. /* zero the dead bytes from align to not leak stack to user */
  495. *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
  496. rec = (struct syscall_trace_exit *)raw_data;
  497. tracing_generic_entry_update(&rec->ent, 0, 0);
  498. rec->ent.type = sys_data->exit_event->id;
  499. rec->nr = syscall_nr;
  500. rec->ret = syscall_get_return_value(current, regs);
  501. perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
  502. end:
  503. perf_swevent_put_recursion_context(rctx);
  504. end_recursion:
  505. local_irq_restore(flags);
  506. }
  507. int reg_prof_syscall_exit(char *name)
  508. {
  509. int ret = 0;
  510. int num;
  511. num = syscall_name_to_nr(name);
  512. if (num < 0 || num >= NR_syscalls)
  513. return -ENOSYS;
  514. mutex_lock(&syscall_trace_lock);
  515. if (!sys_prof_refcount_exit)
  516. ret = register_trace_sys_exit(prof_syscall_exit);
  517. if (ret) {
  518. pr_info("event trace: Could not activate"
  519. "syscall entry trace point");
  520. } else {
  521. set_bit(num, enabled_prof_exit_syscalls);
  522. sys_prof_refcount_exit++;
  523. }
  524. mutex_unlock(&syscall_trace_lock);
  525. return ret;
  526. }
  527. void unreg_prof_syscall_exit(char *name)
  528. {
  529. int num;
  530. num = syscall_name_to_nr(name);
  531. if (num < 0 || num >= NR_syscalls)
  532. return;
  533. mutex_lock(&syscall_trace_lock);
  534. sys_prof_refcount_exit--;
  535. clear_bit(num, enabled_prof_exit_syscalls);
  536. if (!sys_prof_refcount_exit)
  537. unregister_trace_sys_exit(prof_syscall_exit);
  538. mutex_unlock(&syscall_trace_lock);
  539. }
  540. #endif