trace_syscalls.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. #include <trace/syscall.h>
  2. #include <trace/events/syscalls.h>
  3. #include <linux/slab.h>
  4. #include <linux/kernel.h>
  5. #include <linux/ftrace.h>
  6. #include <linux/perf_event.h>
  7. #include <asm/syscall.h>
  8. #include "trace_output.h"
  9. #include "trace.h"
  10. static DEFINE_MUTEX(syscall_trace_lock);
  11. static int sys_refcount_enter;
  12. static int sys_refcount_exit;
  13. static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
  14. static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
  15. extern unsigned long __start_syscalls_metadata[];
  16. extern unsigned long __stop_syscalls_metadata[];
  17. static struct syscall_metadata **syscalls_metadata;
  18. static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
  19. {
  20. struct syscall_metadata *start;
  21. struct syscall_metadata *stop;
  22. char str[KSYM_SYMBOL_LEN];
  23. start = (struct syscall_metadata *)__start_syscalls_metadata;
  24. stop = (struct syscall_metadata *)__stop_syscalls_metadata;
  25. kallsyms_lookup(syscall, NULL, NULL, NULL, str);
  26. for ( ; start < stop; start++) {
  27. /*
  28. * Only compare after the "sys" prefix. Archs that use
  29. * syscall wrappers may have syscalls symbols aliases prefixed
  30. * with "SyS" instead of "sys", leading to an unwanted
  31. * mismatch.
  32. */
  33. if (start->name && !strcmp(start->name + 3, str + 3))
  34. return start;
  35. }
  36. return NULL;
  37. }
  38. static struct syscall_metadata *syscall_nr_to_meta(int nr)
  39. {
  40. if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
  41. return NULL;
  42. return syscalls_metadata[nr];
  43. }
  44. enum print_line_t
  45. print_syscall_enter(struct trace_iterator *iter, int flags)
  46. {
  47. struct trace_seq *s = &iter->seq;
  48. struct trace_entry *ent = iter->ent;
  49. struct syscall_trace_enter *trace;
  50. struct syscall_metadata *entry;
  51. int i, ret, syscall;
  52. trace = (typeof(trace))ent;
  53. syscall = trace->nr;
  54. entry = syscall_nr_to_meta(syscall);
  55. if (!entry)
  56. goto end;
  57. if (entry->enter_event->id != ent->type) {
  58. WARN_ON_ONCE(1);
  59. goto end;
  60. }
  61. ret = trace_seq_printf(s, "%s(", entry->name);
  62. if (!ret)
  63. return TRACE_TYPE_PARTIAL_LINE;
  64. for (i = 0; i < entry->nb_args; i++) {
  65. /* parameter types */
  66. if (trace_flags & TRACE_ITER_VERBOSE) {
  67. ret = trace_seq_printf(s, "%s ", entry->types[i]);
  68. if (!ret)
  69. return TRACE_TYPE_PARTIAL_LINE;
  70. }
  71. /* parameter values */
  72. ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
  73. trace->args[i],
  74. i == entry->nb_args - 1 ? "" : ", ");
  75. if (!ret)
  76. return TRACE_TYPE_PARTIAL_LINE;
  77. }
  78. ret = trace_seq_putc(s, ')');
  79. if (!ret)
  80. return TRACE_TYPE_PARTIAL_LINE;
  81. end:
  82. ret = trace_seq_putc(s, '\n');
  83. if (!ret)
  84. return TRACE_TYPE_PARTIAL_LINE;
  85. return TRACE_TYPE_HANDLED;
  86. }
  87. enum print_line_t
  88. print_syscall_exit(struct trace_iterator *iter, int flags)
  89. {
  90. struct trace_seq *s = &iter->seq;
  91. struct trace_entry *ent = iter->ent;
  92. struct syscall_trace_exit *trace;
  93. int syscall;
  94. struct syscall_metadata *entry;
  95. int ret;
  96. trace = (typeof(trace))ent;
  97. syscall = trace->nr;
  98. entry = syscall_nr_to_meta(syscall);
  99. if (!entry) {
  100. trace_seq_printf(s, "\n");
  101. return TRACE_TYPE_HANDLED;
  102. }
  103. if (entry->exit_event->id != ent->type) {
  104. WARN_ON_ONCE(1);
  105. return TRACE_TYPE_UNHANDLED;
  106. }
  107. ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
  108. trace->ret);
  109. if (!ret)
  110. return TRACE_TYPE_PARTIAL_LINE;
  111. return TRACE_TYPE_HANDLED;
  112. }
  113. extern char *__bad_type_size(void);
  114. #define SYSCALL_FIELD(type, name) \
  115. sizeof(type) != sizeof(trace.name) ? \
  116. __bad_type_size() : \
  117. #type, #name, offsetof(typeof(trace), name), \
  118. sizeof(trace.name), is_signed_type(type)
  119. static
  120. int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
  121. {
  122. int i;
  123. int pos = 0;
  124. /* When len=0, we just calculate the needed length */
  125. #define LEN_OR_ZERO (len ? len - pos : 0)
  126. pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
  127. for (i = 0; i < entry->nb_args; i++) {
  128. pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
  129. entry->args[i], sizeof(unsigned long),
  130. i == entry->nb_args - 1 ? "" : ", ");
  131. }
  132. pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
  133. for (i = 0; i < entry->nb_args; i++) {
  134. pos += snprintf(buf + pos, LEN_OR_ZERO,
  135. ", ((unsigned long)(REC->%s))", entry->args[i]);
  136. }
  137. #undef LEN_OR_ZERO
  138. /* return the length of print_fmt */
  139. return pos;
  140. }
  141. static int set_syscall_print_fmt(struct ftrace_event_call *call)
  142. {
  143. char *print_fmt;
  144. int len;
  145. struct syscall_metadata *entry = call->data;
  146. if (entry->enter_event != call) {
  147. call->print_fmt = "\"0x%lx\", REC->ret";
  148. return 0;
  149. }
  150. /* First: called with 0 length to calculate the needed length */
  151. len = __set_enter_print_fmt(entry, NULL, 0);
  152. print_fmt = kmalloc(len + 1, GFP_KERNEL);
  153. if (!print_fmt)
  154. return -ENOMEM;
  155. /* Second: actually write the @print_fmt */
  156. __set_enter_print_fmt(entry, print_fmt, len + 1);
  157. call->print_fmt = print_fmt;
  158. return 0;
  159. }
  160. static void free_syscall_print_fmt(struct ftrace_event_call *call)
  161. {
  162. struct syscall_metadata *entry = call->data;
  163. if (entry->enter_event == call)
  164. kfree(call->print_fmt);
  165. }
  166. int syscall_enter_define_fields(struct ftrace_event_call *call)
  167. {
  168. struct syscall_trace_enter trace;
  169. struct syscall_metadata *meta = call->data;
  170. int ret;
  171. int i;
  172. int offset = offsetof(typeof(trace), args);
  173. ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
  174. if (ret)
  175. return ret;
  176. for (i = 0; i < meta->nb_args; i++) {
  177. ret = trace_define_field(call, meta->types[i],
  178. meta->args[i], offset,
  179. sizeof(unsigned long), 0,
  180. FILTER_OTHER);
  181. offset += sizeof(unsigned long);
  182. }
  183. return ret;
  184. }
  185. int syscall_exit_define_fields(struct ftrace_event_call *call)
  186. {
  187. struct syscall_trace_exit trace;
  188. int ret;
  189. ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
  190. if (ret)
  191. return ret;
  192. ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
  193. FILTER_OTHER);
  194. return ret;
  195. }
  196. void ftrace_syscall_enter(struct pt_regs *regs, long id)
  197. {
  198. struct syscall_trace_enter *entry;
  199. struct syscall_metadata *sys_data;
  200. struct ring_buffer_event *event;
  201. struct ring_buffer *buffer;
  202. int size;
  203. int syscall_nr;
  204. syscall_nr = syscall_get_nr(current, regs);
  205. if (syscall_nr < 0)
  206. return;
  207. if (!test_bit(syscall_nr, enabled_enter_syscalls))
  208. return;
  209. sys_data = syscall_nr_to_meta(syscall_nr);
  210. if (!sys_data)
  211. return;
  212. size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
  213. event = trace_current_buffer_lock_reserve(&buffer,
  214. sys_data->enter_event->id, size, 0, 0);
  215. if (!event)
  216. return;
  217. entry = ring_buffer_event_data(event);
  218. entry->nr = syscall_nr;
  219. syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
  220. if (!filter_current_check_discard(buffer, sys_data->enter_event,
  221. entry, event))
  222. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  223. }
  224. void ftrace_syscall_exit(struct pt_regs *regs, long ret)
  225. {
  226. struct syscall_trace_exit *entry;
  227. struct syscall_metadata *sys_data;
  228. struct ring_buffer_event *event;
  229. struct ring_buffer *buffer;
  230. int syscall_nr;
  231. syscall_nr = syscall_get_nr(current, regs);
  232. if (syscall_nr < 0)
  233. return;
  234. if (!test_bit(syscall_nr, enabled_exit_syscalls))
  235. return;
  236. sys_data = syscall_nr_to_meta(syscall_nr);
  237. if (!sys_data)
  238. return;
  239. event = trace_current_buffer_lock_reserve(&buffer,
  240. sys_data->exit_event->id, sizeof(*entry), 0, 0);
  241. if (!event)
  242. return;
  243. entry = ring_buffer_event_data(event);
  244. entry->nr = syscall_nr;
  245. entry->ret = syscall_get_return_value(current, regs);
  246. if (!filter_current_check_discard(buffer, sys_data->exit_event,
  247. entry, event))
  248. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  249. }
  250. int reg_event_syscall_enter(struct ftrace_event_call *call)
  251. {
  252. int ret = 0;
  253. int num;
  254. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  255. if (num < 0 || num >= NR_syscalls)
  256. return -ENOSYS;
  257. mutex_lock(&syscall_trace_lock);
  258. if (!sys_refcount_enter)
  259. ret = register_trace_sys_enter(ftrace_syscall_enter);
  260. if (!ret) {
  261. set_bit(num, enabled_enter_syscalls);
  262. sys_refcount_enter++;
  263. }
  264. mutex_unlock(&syscall_trace_lock);
  265. return ret;
  266. }
  267. void unreg_event_syscall_enter(struct ftrace_event_call *call)
  268. {
  269. int num;
  270. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  271. if (num < 0 || num >= NR_syscalls)
  272. return;
  273. mutex_lock(&syscall_trace_lock);
  274. sys_refcount_enter--;
  275. clear_bit(num, enabled_enter_syscalls);
  276. if (!sys_refcount_enter)
  277. unregister_trace_sys_enter(ftrace_syscall_enter);
  278. mutex_unlock(&syscall_trace_lock);
  279. }
  280. int reg_event_syscall_exit(struct ftrace_event_call *call)
  281. {
  282. int ret = 0;
  283. int num;
  284. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  285. if (num < 0 || num >= NR_syscalls)
  286. return -ENOSYS;
  287. mutex_lock(&syscall_trace_lock);
  288. if (!sys_refcount_exit)
  289. ret = register_trace_sys_exit(ftrace_syscall_exit);
  290. if (!ret) {
  291. set_bit(num, enabled_exit_syscalls);
  292. sys_refcount_exit++;
  293. }
  294. mutex_unlock(&syscall_trace_lock);
  295. return ret;
  296. }
  297. void unreg_event_syscall_exit(struct ftrace_event_call *call)
  298. {
  299. int num;
  300. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  301. if (num < 0 || num >= NR_syscalls)
  302. return;
  303. mutex_lock(&syscall_trace_lock);
  304. sys_refcount_exit--;
  305. clear_bit(num, enabled_exit_syscalls);
  306. if (!sys_refcount_exit)
  307. unregister_trace_sys_exit(ftrace_syscall_exit);
  308. mutex_unlock(&syscall_trace_lock);
  309. }
  310. int init_syscall_trace(struct ftrace_event_call *call)
  311. {
  312. int id;
  313. if (set_syscall_print_fmt(call) < 0)
  314. return -ENOMEM;
  315. id = trace_event_raw_init(call);
  316. if (id < 0) {
  317. free_syscall_print_fmt(call);
  318. return id;
  319. }
  320. return id;
  321. }
  322. unsigned long __init arch_syscall_addr(int nr)
  323. {
  324. return (unsigned long)sys_call_table[nr];
  325. }
  326. int __init init_ftrace_syscalls(void)
  327. {
  328. struct syscall_metadata *meta;
  329. unsigned long addr;
  330. int i;
  331. syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
  332. NR_syscalls, GFP_KERNEL);
  333. if (!syscalls_metadata) {
  334. WARN_ON(1);
  335. return -ENOMEM;
  336. }
  337. for (i = 0; i < NR_syscalls; i++) {
  338. addr = arch_syscall_addr(i);
  339. meta = find_syscall_meta(addr);
  340. if (!meta)
  341. continue;
  342. meta->syscall_nr = i;
  343. syscalls_metadata[i] = meta;
  344. }
  345. return 0;
  346. }
  347. core_initcall(init_ftrace_syscalls);
  348. #ifdef CONFIG_PERF_EVENTS
  349. static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
  350. static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
  351. static int sys_perf_refcount_enter;
  352. static int sys_perf_refcount_exit;
  353. static void perf_syscall_enter(struct pt_regs *regs, long id)
  354. {
  355. struct syscall_metadata *sys_data;
  356. struct syscall_trace_enter *rec;
  357. unsigned long flags;
  358. int syscall_nr;
  359. int rctx;
  360. int size;
  361. syscall_nr = syscall_get_nr(current, regs);
  362. if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
  363. return;
  364. sys_data = syscall_nr_to_meta(syscall_nr);
  365. if (!sys_data)
  366. return;
  367. /* get the size after alignment with the u32 buffer size field */
  368. size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
  369. size = ALIGN(size + sizeof(u32), sizeof(u64));
  370. size -= sizeof(u32);
  371. if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
  372. "perf buffer not large enough"))
  373. return;
  374. rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
  375. sys_data->enter_event->id, &rctx, &flags);
  376. if (!rec)
  377. return;
  378. rec->nr = syscall_nr;
  379. syscall_get_arguments(current, regs, 0, sys_data->nb_args,
  380. (unsigned long *)&rec->args);
  381. perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
  382. }
  383. int perf_sysenter_enable(struct ftrace_event_call *call)
  384. {
  385. int ret = 0;
  386. int num;
  387. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  388. mutex_lock(&syscall_trace_lock);
  389. if (!sys_perf_refcount_enter)
  390. ret = register_trace_sys_enter(perf_syscall_enter);
  391. if (ret) {
  392. pr_info("event trace: Could not activate"
  393. "syscall entry trace point");
  394. } else {
  395. set_bit(num, enabled_perf_enter_syscalls);
  396. sys_perf_refcount_enter++;
  397. }
  398. mutex_unlock(&syscall_trace_lock);
  399. return ret;
  400. }
  401. void perf_sysenter_disable(struct ftrace_event_call *call)
  402. {
  403. int num;
  404. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  405. mutex_lock(&syscall_trace_lock);
  406. sys_perf_refcount_enter--;
  407. clear_bit(num, enabled_perf_enter_syscalls);
  408. if (!sys_perf_refcount_enter)
  409. unregister_trace_sys_enter(perf_syscall_enter);
  410. mutex_unlock(&syscall_trace_lock);
  411. }
  412. static void perf_syscall_exit(struct pt_regs *regs, long ret)
  413. {
  414. struct syscall_metadata *sys_data;
  415. struct syscall_trace_exit *rec;
  416. unsigned long flags;
  417. int syscall_nr;
  418. int rctx;
  419. int size;
  420. syscall_nr = syscall_get_nr(current, regs);
  421. if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
  422. return;
  423. sys_data = syscall_nr_to_meta(syscall_nr);
  424. if (!sys_data)
  425. return;
  426. /* We can probably do that at build time */
  427. size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
  428. size -= sizeof(u32);
  429. /*
  430. * Impossible, but be paranoid with the future
  431. * How to put this check outside runtime?
  432. */
  433. if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
  434. "exit event has grown above perf buffer size"))
  435. return;
  436. rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
  437. sys_data->exit_event->id, &rctx, &flags);
  438. if (!rec)
  439. return;
  440. rec->nr = syscall_nr;
  441. rec->ret = syscall_get_return_value(current, regs);
  442. perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
  443. }
  444. int perf_sysexit_enable(struct ftrace_event_call *call)
  445. {
  446. int ret = 0;
  447. int num;
  448. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  449. mutex_lock(&syscall_trace_lock);
  450. if (!sys_perf_refcount_exit)
  451. ret = register_trace_sys_exit(perf_syscall_exit);
  452. if (ret) {
  453. pr_info("event trace: Could not activate"
  454. "syscall exit trace point");
  455. } else {
  456. set_bit(num, enabled_perf_exit_syscalls);
  457. sys_perf_refcount_exit++;
  458. }
  459. mutex_unlock(&syscall_trace_lock);
  460. return ret;
  461. }
  462. void perf_sysexit_disable(struct ftrace_event_call *call)
  463. {
  464. int num;
  465. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  466. mutex_lock(&syscall_trace_lock);
  467. sys_perf_refcount_exit--;
  468. clear_bit(num, enabled_perf_exit_syscalls);
  469. if (!sys_perf_refcount_exit)
  470. unregister_trace_sys_exit(perf_syscall_exit);
  471. mutex_unlock(&syscall_trace_lock);
  472. }
  473. #endif /* CONFIG_PERF_EVENTS */