trace_syscalls.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645
  1. #include <trace/syscall.h>
  2. #include <trace/events/syscalls.h>
  3. #include <linux/kernel.h>
  4. #include <linux/ftrace.h>
  5. #include <linux/perf_event.h>
  6. #include <asm/syscall.h>
  7. #include "trace_output.h"
  8. #include "trace.h"
  9. static DEFINE_MUTEX(syscall_trace_lock);
  10. static int sys_refcount_enter;
  11. static int sys_refcount_exit;
  12. static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
  13. static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
  14. extern unsigned long __start_syscalls_metadata[];
  15. extern unsigned long __stop_syscalls_metadata[];
  16. static struct syscall_metadata **syscalls_metadata;
  17. static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
  18. {
  19. struct syscall_metadata *start;
  20. struct syscall_metadata *stop;
  21. char str[KSYM_SYMBOL_LEN];
  22. start = (struct syscall_metadata *)__start_syscalls_metadata;
  23. stop = (struct syscall_metadata *)__stop_syscalls_metadata;
  24. kallsyms_lookup(syscall, NULL, NULL, NULL, str);
  25. for ( ; start < stop; start++) {
  26. /*
  27. * Only compare after the "sys" prefix. Archs that use
  28. * syscall wrappers may have syscalls symbols aliases prefixed
  29. * with "SyS" instead of "sys", leading to an unwanted
  30. * mismatch.
  31. */
  32. if (start->name && !strcmp(start->name + 3, str + 3))
  33. return start;
  34. }
  35. return NULL;
  36. }
  37. static struct syscall_metadata *syscall_nr_to_meta(int nr)
  38. {
  39. if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
  40. return NULL;
  41. return syscalls_metadata[nr];
  42. }
  43. enum print_line_t
  44. print_syscall_enter(struct trace_iterator *iter, int flags)
  45. {
  46. struct trace_seq *s = &iter->seq;
  47. struct trace_entry *ent = iter->ent;
  48. struct syscall_trace_enter *trace;
  49. struct syscall_metadata *entry;
  50. int i, ret, syscall;
  51. trace = (typeof(trace))ent;
  52. syscall = trace->nr;
  53. entry = syscall_nr_to_meta(syscall);
  54. if (!entry)
  55. goto end;
  56. if (entry->enter_event->id != ent->type) {
  57. WARN_ON_ONCE(1);
  58. goto end;
  59. }
  60. ret = trace_seq_printf(s, "%s(", entry->name);
  61. if (!ret)
  62. return TRACE_TYPE_PARTIAL_LINE;
  63. for (i = 0; i < entry->nb_args; i++) {
  64. /* parameter types */
  65. if (trace_flags & TRACE_ITER_VERBOSE) {
  66. ret = trace_seq_printf(s, "%s ", entry->types[i]);
  67. if (!ret)
  68. return TRACE_TYPE_PARTIAL_LINE;
  69. }
  70. /* parameter values */
  71. ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
  72. trace->args[i],
  73. i == entry->nb_args - 1 ? "" : ", ");
  74. if (!ret)
  75. return TRACE_TYPE_PARTIAL_LINE;
  76. }
  77. ret = trace_seq_putc(s, ')');
  78. if (!ret)
  79. return TRACE_TYPE_PARTIAL_LINE;
  80. end:
  81. ret = trace_seq_putc(s, '\n');
  82. if (!ret)
  83. return TRACE_TYPE_PARTIAL_LINE;
  84. return TRACE_TYPE_HANDLED;
  85. }
  86. enum print_line_t
  87. print_syscall_exit(struct trace_iterator *iter, int flags)
  88. {
  89. struct trace_seq *s = &iter->seq;
  90. struct trace_entry *ent = iter->ent;
  91. struct syscall_trace_exit *trace;
  92. int syscall;
  93. struct syscall_metadata *entry;
  94. int ret;
  95. trace = (typeof(trace))ent;
  96. syscall = trace->nr;
  97. entry = syscall_nr_to_meta(syscall);
  98. if (!entry) {
  99. trace_seq_printf(s, "\n");
  100. return TRACE_TYPE_HANDLED;
  101. }
  102. if (entry->exit_event->id != ent->type) {
  103. WARN_ON_ONCE(1);
  104. return TRACE_TYPE_UNHANDLED;
  105. }
  106. ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
  107. trace->ret);
  108. if (!ret)
  109. return TRACE_TYPE_PARTIAL_LINE;
  110. return TRACE_TYPE_HANDLED;
  111. }
  112. extern char *__bad_type_size(void);
  113. #define SYSCALL_FIELD(type, name) \
  114. sizeof(type) != sizeof(trace.name) ? \
  115. __bad_type_size() : \
  116. #type, #name, offsetof(typeof(trace), name), \
  117. sizeof(trace.name), is_signed_type(type)
  118. int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
  119. {
  120. int i;
  121. int ret;
  122. struct syscall_metadata *entry = call->data;
  123. struct syscall_trace_enter trace;
  124. int offset = offsetof(struct syscall_trace_enter, args);
  125. ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
  126. "\tsigned:%u;\n",
  127. SYSCALL_FIELD(int, nr));
  128. if (!ret)
  129. return 0;
  130. for (i = 0; i < entry->nb_args; i++) {
  131. ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
  132. entry->args[i]);
  133. if (!ret)
  134. return 0;
  135. ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
  136. "\tsigned:%u;\n", offset,
  137. sizeof(unsigned long),
  138. is_signed_type(unsigned long));
  139. if (!ret)
  140. return 0;
  141. offset += sizeof(unsigned long);
  142. }
  143. trace_seq_puts(s, "\nprint fmt: \"");
  144. for (i = 0; i < entry->nb_args; i++) {
  145. ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
  146. sizeof(unsigned long),
  147. i == entry->nb_args - 1 ? "" : ", ");
  148. if (!ret)
  149. return 0;
  150. }
  151. trace_seq_putc(s, '"');
  152. for (i = 0; i < entry->nb_args; i++) {
  153. ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
  154. entry->args[i]);
  155. if (!ret)
  156. return 0;
  157. }
  158. return trace_seq_putc(s, '\n');
  159. }
  160. int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
  161. {
  162. int ret;
  163. struct syscall_trace_exit trace;
  164. ret = trace_seq_printf(s,
  165. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
  166. "\tsigned:%u;\n"
  167. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
  168. "\tsigned:%u;\n",
  169. SYSCALL_FIELD(int, nr),
  170. SYSCALL_FIELD(long, ret));
  171. if (!ret)
  172. return 0;
  173. return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
  174. }
  175. int syscall_enter_define_fields(struct ftrace_event_call *call)
  176. {
  177. struct syscall_trace_enter trace;
  178. struct syscall_metadata *meta = call->data;
  179. int ret;
  180. int i;
  181. int offset = offsetof(typeof(trace), args);
  182. ret = trace_define_common_fields(call);
  183. if (ret)
  184. return ret;
  185. ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
  186. if (ret)
  187. return ret;
  188. for (i = 0; i < meta->nb_args; i++) {
  189. ret = trace_define_field(call, meta->types[i],
  190. meta->args[i], offset,
  191. sizeof(unsigned long), 0,
  192. FILTER_OTHER);
  193. offset += sizeof(unsigned long);
  194. }
  195. return ret;
  196. }
  197. int syscall_exit_define_fields(struct ftrace_event_call *call)
  198. {
  199. struct syscall_trace_exit trace;
  200. int ret;
  201. ret = trace_define_common_fields(call);
  202. if (ret)
  203. return ret;
  204. ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
  205. if (ret)
  206. return ret;
  207. ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
  208. FILTER_OTHER);
  209. return ret;
  210. }
  211. void ftrace_syscall_enter(struct pt_regs *regs, long id)
  212. {
  213. struct syscall_trace_enter *entry;
  214. struct syscall_metadata *sys_data;
  215. struct ring_buffer_event *event;
  216. struct ring_buffer *buffer;
  217. int size;
  218. int syscall_nr;
  219. syscall_nr = syscall_get_nr(current, regs);
  220. if (syscall_nr < 0)
  221. return;
  222. if (!test_bit(syscall_nr, enabled_enter_syscalls))
  223. return;
  224. sys_data = syscall_nr_to_meta(syscall_nr);
  225. if (!sys_data)
  226. return;
  227. size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
  228. event = trace_current_buffer_lock_reserve(&buffer,
  229. sys_data->enter_event->id, size, 0, 0);
  230. if (!event)
  231. return;
  232. entry = ring_buffer_event_data(event);
  233. entry->nr = syscall_nr;
  234. syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
  235. if (!filter_current_check_discard(buffer, sys_data->enter_event,
  236. entry, event))
  237. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  238. }
  239. void ftrace_syscall_exit(struct pt_regs *regs, long ret)
  240. {
  241. struct syscall_trace_exit *entry;
  242. struct syscall_metadata *sys_data;
  243. struct ring_buffer_event *event;
  244. struct ring_buffer *buffer;
  245. int syscall_nr;
  246. syscall_nr = syscall_get_nr(current, regs);
  247. if (syscall_nr < 0)
  248. return;
  249. if (!test_bit(syscall_nr, enabled_exit_syscalls))
  250. return;
  251. sys_data = syscall_nr_to_meta(syscall_nr);
  252. if (!sys_data)
  253. return;
  254. event = trace_current_buffer_lock_reserve(&buffer,
  255. sys_data->exit_event->id, sizeof(*entry), 0, 0);
  256. if (!event)
  257. return;
  258. entry = ring_buffer_event_data(event);
  259. entry->nr = syscall_nr;
  260. entry->ret = syscall_get_return_value(current, regs);
  261. if (!filter_current_check_discard(buffer, sys_data->exit_event,
  262. entry, event))
  263. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  264. }
  265. int reg_event_syscall_enter(struct ftrace_event_call *call)
  266. {
  267. int ret = 0;
  268. int num;
  269. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  270. if (num < 0 || num >= NR_syscalls)
  271. return -ENOSYS;
  272. mutex_lock(&syscall_trace_lock);
  273. if (!sys_refcount_enter)
  274. ret = register_trace_sys_enter(ftrace_syscall_enter);
  275. if (ret) {
  276. pr_info("event trace: Could not activate"
  277. "syscall entry trace point");
  278. } else {
  279. set_bit(num, enabled_enter_syscalls);
  280. sys_refcount_enter++;
  281. }
  282. mutex_unlock(&syscall_trace_lock);
  283. return ret;
  284. }
  285. void unreg_event_syscall_enter(struct ftrace_event_call *call)
  286. {
  287. int num;
  288. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  289. if (num < 0 || num >= NR_syscalls)
  290. return;
  291. mutex_lock(&syscall_trace_lock);
  292. sys_refcount_enter--;
  293. clear_bit(num, enabled_enter_syscalls);
  294. if (!sys_refcount_enter)
  295. unregister_trace_sys_enter(ftrace_syscall_enter);
  296. mutex_unlock(&syscall_trace_lock);
  297. }
  298. int reg_event_syscall_exit(struct ftrace_event_call *call)
  299. {
  300. int ret = 0;
  301. int num;
  302. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  303. if (num < 0 || num >= NR_syscalls)
  304. return -ENOSYS;
  305. mutex_lock(&syscall_trace_lock);
  306. if (!sys_refcount_exit)
  307. ret = register_trace_sys_exit(ftrace_syscall_exit);
  308. if (ret) {
  309. pr_info("event trace: Could not activate"
  310. "syscall exit trace point");
  311. } else {
  312. set_bit(num, enabled_exit_syscalls);
  313. sys_refcount_exit++;
  314. }
  315. mutex_unlock(&syscall_trace_lock);
  316. return ret;
  317. }
  318. void unreg_event_syscall_exit(struct ftrace_event_call *call)
  319. {
  320. int num;
  321. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  322. if (num < 0 || num >= NR_syscalls)
  323. return;
  324. mutex_lock(&syscall_trace_lock);
  325. sys_refcount_exit--;
  326. clear_bit(num, enabled_exit_syscalls);
  327. if (!sys_refcount_exit)
  328. unregister_trace_sys_exit(ftrace_syscall_exit);
  329. mutex_unlock(&syscall_trace_lock);
  330. }
  331. int init_syscall_trace(struct ftrace_event_call *call)
  332. {
  333. int id;
  334. id = register_ftrace_event(call->event);
  335. if (!id)
  336. return -ENODEV;
  337. call->id = id;
  338. INIT_LIST_HEAD(&call->fields);
  339. return 0;
  340. }
  341. int __init init_ftrace_syscalls(void)
  342. {
  343. struct syscall_metadata *meta;
  344. unsigned long addr;
  345. int i;
  346. syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
  347. NR_syscalls, GFP_KERNEL);
  348. if (!syscalls_metadata) {
  349. WARN_ON(1);
  350. return -ENOMEM;
  351. }
  352. for (i = 0; i < NR_syscalls; i++) {
  353. addr = arch_syscall_addr(i);
  354. meta = find_syscall_meta(addr);
  355. if (!meta)
  356. continue;
  357. meta->syscall_nr = i;
  358. syscalls_metadata[i] = meta;
  359. }
  360. return 0;
  361. }
  362. core_initcall(init_ftrace_syscalls);
  363. #ifdef CONFIG_EVENT_PROFILE
  364. static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
  365. static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
  366. static int sys_prof_refcount_enter;
  367. static int sys_prof_refcount_exit;
  368. static void prof_syscall_enter(struct pt_regs *regs, long id)
  369. {
  370. struct syscall_metadata *sys_data;
  371. struct syscall_trace_enter *rec;
  372. unsigned long flags;
  373. char *trace_buf;
  374. char *raw_data;
  375. int syscall_nr;
  376. int rctx;
  377. int size;
  378. int cpu;
  379. syscall_nr = syscall_get_nr(current, regs);
  380. if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
  381. return;
  382. sys_data = syscall_nr_to_meta(syscall_nr);
  383. if (!sys_data)
  384. return;
  385. /* get the size after alignment with the u32 buffer size field */
  386. size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
  387. size = ALIGN(size + sizeof(u32), sizeof(u64));
  388. size -= sizeof(u32);
  389. if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
  390. "profile buffer not large enough"))
  391. return;
  392. /* Protect the per cpu buffer, begin the rcu read side */
  393. local_irq_save(flags);
  394. rctx = perf_swevent_get_recursion_context();
  395. if (rctx < 0)
  396. goto end_recursion;
  397. cpu = smp_processor_id();
  398. trace_buf = rcu_dereference(perf_trace_buf);
  399. if (!trace_buf)
  400. goto end;
  401. raw_data = per_cpu_ptr(trace_buf, cpu);
  402. /* zero the dead bytes from align to not leak stack to user */
  403. *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
  404. rec = (struct syscall_trace_enter *) raw_data;
  405. tracing_generic_entry_update(&rec->ent, 0, 0);
  406. rec->ent.type = sys_data->enter_event->id;
  407. rec->nr = syscall_nr;
  408. syscall_get_arguments(current, regs, 0, sys_data->nb_args,
  409. (unsigned long *)&rec->args);
  410. perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
  411. end:
  412. perf_swevent_put_recursion_context(rctx);
  413. end_recursion:
  414. local_irq_restore(flags);
  415. }
  416. int prof_sysenter_enable(struct ftrace_event_call *call)
  417. {
  418. int ret = 0;
  419. int num;
  420. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  421. mutex_lock(&syscall_trace_lock);
  422. if (!sys_prof_refcount_enter)
  423. ret = register_trace_sys_enter(prof_syscall_enter);
  424. if (ret) {
  425. pr_info("event trace: Could not activate"
  426. "syscall entry trace point");
  427. } else {
  428. set_bit(num, enabled_prof_enter_syscalls);
  429. sys_prof_refcount_enter++;
  430. }
  431. mutex_unlock(&syscall_trace_lock);
  432. return ret;
  433. }
  434. void prof_sysenter_disable(struct ftrace_event_call *call)
  435. {
  436. int num;
  437. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  438. mutex_lock(&syscall_trace_lock);
  439. sys_prof_refcount_enter--;
  440. clear_bit(num, enabled_prof_enter_syscalls);
  441. if (!sys_prof_refcount_enter)
  442. unregister_trace_sys_enter(prof_syscall_enter);
  443. mutex_unlock(&syscall_trace_lock);
  444. }
  445. static void prof_syscall_exit(struct pt_regs *regs, long ret)
  446. {
  447. struct syscall_metadata *sys_data;
  448. struct syscall_trace_exit *rec;
  449. unsigned long flags;
  450. int syscall_nr;
  451. char *trace_buf;
  452. char *raw_data;
  453. int rctx;
  454. int size;
  455. int cpu;
  456. syscall_nr = syscall_get_nr(current, regs);
  457. if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
  458. return;
  459. sys_data = syscall_nr_to_meta(syscall_nr);
  460. if (!sys_data)
  461. return;
  462. /* We can probably do that at build time */
  463. size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
  464. size -= sizeof(u32);
  465. /*
  466. * Impossible, but be paranoid with the future
  467. * How to put this check outside runtime?
  468. */
  469. if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
  470. "exit event has grown above profile buffer size"))
  471. return;
  472. /* Protect the per cpu buffer, begin the rcu read side */
  473. local_irq_save(flags);
  474. rctx = perf_swevent_get_recursion_context();
  475. if (rctx < 0)
  476. goto end_recursion;
  477. cpu = smp_processor_id();
  478. trace_buf = rcu_dereference(perf_trace_buf);
  479. if (!trace_buf)
  480. goto end;
  481. raw_data = per_cpu_ptr(trace_buf, cpu);
  482. /* zero the dead bytes from align to not leak stack to user */
  483. *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
  484. rec = (struct syscall_trace_exit *)raw_data;
  485. tracing_generic_entry_update(&rec->ent, 0, 0);
  486. rec->ent.type = sys_data->exit_event->id;
  487. rec->nr = syscall_nr;
  488. rec->ret = syscall_get_return_value(current, regs);
  489. perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
  490. end:
  491. perf_swevent_put_recursion_context(rctx);
  492. end_recursion:
  493. local_irq_restore(flags);
  494. }
  495. int prof_sysexit_enable(struct ftrace_event_call *call)
  496. {
  497. int ret = 0;
  498. int num;
  499. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  500. mutex_lock(&syscall_trace_lock);
  501. if (!sys_prof_refcount_exit)
  502. ret = register_trace_sys_exit(prof_syscall_exit);
  503. if (ret) {
  504. pr_info("event trace: Could not activate"
  505. "syscall entry trace point");
  506. } else {
  507. set_bit(num, enabled_prof_exit_syscalls);
  508. sys_prof_refcount_exit++;
  509. }
  510. mutex_unlock(&syscall_trace_lock);
  511. return ret;
  512. }
  513. void prof_sysexit_disable(struct ftrace_event_call *call)
  514. {
  515. int num;
  516. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  517. mutex_lock(&syscall_trace_lock);
  518. sys_prof_refcount_exit--;
  519. clear_bit(num, enabled_prof_exit_syscalls);
  520. if (!sys_prof_refcount_exit)
  521. unregister_trace_sys_exit(prof_syscall_exit);
  522. mutex_unlock(&syscall_trace_lock);
  523. }
  524. #endif