trace_syscalls.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. #include <trace/syscall.h>
  2. #include <trace/events/syscalls.h>
  3. #include <linux/slab.h>
  4. #include <linux/kernel.h>
  5. #include <linux/ftrace.h>
  6. #include <linux/perf_event.h>
  7. #include <asm/syscall.h>
  8. #include "trace_output.h"
  9. #include "trace.h"
  10. static DEFINE_MUTEX(syscall_trace_lock);
  11. static int sys_refcount_enter;
  12. static int sys_refcount_exit;
  13. static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
  14. static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
  15. struct ftrace_event_class event_class_syscalls = {
  16. .system = "syscalls"
  17. };
  18. extern unsigned long __start_syscalls_metadata[];
  19. extern unsigned long __stop_syscalls_metadata[];
  20. static struct syscall_metadata **syscalls_metadata;
  21. static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
  22. {
  23. struct syscall_metadata *start;
  24. struct syscall_metadata *stop;
  25. char str[KSYM_SYMBOL_LEN];
  26. start = (struct syscall_metadata *)__start_syscalls_metadata;
  27. stop = (struct syscall_metadata *)__stop_syscalls_metadata;
  28. kallsyms_lookup(syscall, NULL, NULL, NULL, str);
  29. for ( ; start < stop; start++) {
  30. /*
  31. * Only compare after the "sys" prefix. Archs that use
  32. * syscall wrappers may have syscalls symbols aliases prefixed
  33. * with "SyS" instead of "sys", leading to an unwanted
  34. * mismatch.
  35. */
  36. if (start->name && !strcmp(start->name + 3, str + 3))
  37. return start;
  38. }
  39. return NULL;
  40. }
  41. static struct syscall_metadata *syscall_nr_to_meta(int nr)
  42. {
  43. if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
  44. return NULL;
  45. return syscalls_metadata[nr];
  46. }
  47. enum print_line_t
  48. print_syscall_enter(struct trace_iterator *iter, int flags)
  49. {
  50. struct trace_seq *s = &iter->seq;
  51. struct trace_entry *ent = iter->ent;
  52. struct syscall_trace_enter *trace;
  53. struct syscall_metadata *entry;
  54. int i, ret, syscall;
  55. trace = (typeof(trace))ent;
  56. syscall = trace->nr;
  57. entry = syscall_nr_to_meta(syscall);
  58. if (!entry)
  59. goto end;
  60. if (entry->enter_event->id != ent->type) {
  61. WARN_ON_ONCE(1);
  62. goto end;
  63. }
  64. ret = trace_seq_printf(s, "%s(", entry->name);
  65. if (!ret)
  66. return TRACE_TYPE_PARTIAL_LINE;
  67. for (i = 0; i < entry->nb_args; i++) {
  68. /* parameter types */
  69. if (trace_flags & TRACE_ITER_VERBOSE) {
  70. ret = trace_seq_printf(s, "%s ", entry->types[i]);
  71. if (!ret)
  72. return TRACE_TYPE_PARTIAL_LINE;
  73. }
  74. /* parameter values */
  75. ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
  76. trace->args[i],
  77. i == entry->nb_args - 1 ? "" : ", ");
  78. if (!ret)
  79. return TRACE_TYPE_PARTIAL_LINE;
  80. }
  81. ret = trace_seq_putc(s, ')');
  82. if (!ret)
  83. return TRACE_TYPE_PARTIAL_LINE;
  84. end:
  85. ret = trace_seq_putc(s, '\n');
  86. if (!ret)
  87. return TRACE_TYPE_PARTIAL_LINE;
  88. return TRACE_TYPE_HANDLED;
  89. }
  90. enum print_line_t
  91. print_syscall_exit(struct trace_iterator *iter, int flags)
  92. {
  93. struct trace_seq *s = &iter->seq;
  94. struct trace_entry *ent = iter->ent;
  95. struct syscall_trace_exit *trace;
  96. int syscall;
  97. struct syscall_metadata *entry;
  98. int ret;
  99. trace = (typeof(trace))ent;
  100. syscall = trace->nr;
  101. entry = syscall_nr_to_meta(syscall);
  102. if (!entry) {
  103. trace_seq_printf(s, "\n");
  104. return TRACE_TYPE_HANDLED;
  105. }
  106. if (entry->exit_event->id != ent->type) {
  107. WARN_ON_ONCE(1);
  108. return TRACE_TYPE_UNHANDLED;
  109. }
  110. ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
  111. trace->ret);
  112. if (!ret)
  113. return TRACE_TYPE_PARTIAL_LINE;
  114. return TRACE_TYPE_HANDLED;
  115. }
  116. extern char *__bad_type_size(void);
  117. #define SYSCALL_FIELD(type, name) \
  118. sizeof(type) != sizeof(trace.name) ? \
  119. __bad_type_size() : \
  120. #type, #name, offsetof(typeof(trace), name), \
  121. sizeof(trace.name), is_signed_type(type)
  122. static
  123. int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
  124. {
  125. int i;
  126. int pos = 0;
  127. /* When len=0, we just calculate the needed length */
  128. #define LEN_OR_ZERO (len ? len - pos : 0)
  129. pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
  130. for (i = 0; i < entry->nb_args; i++) {
  131. pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
  132. entry->args[i], sizeof(unsigned long),
  133. i == entry->nb_args - 1 ? "" : ", ");
  134. }
  135. pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
  136. for (i = 0; i < entry->nb_args; i++) {
  137. pos += snprintf(buf + pos, LEN_OR_ZERO,
  138. ", ((unsigned long)(REC->%s))", entry->args[i]);
  139. }
  140. #undef LEN_OR_ZERO
  141. /* return the length of print_fmt */
  142. return pos;
  143. }
  144. static int set_syscall_print_fmt(struct ftrace_event_call *call)
  145. {
  146. char *print_fmt;
  147. int len;
  148. struct syscall_metadata *entry = call->data;
  149. if (entry->enter_event != call) {
  150. call->print_fmt = "\"0x%lx\", REC->ret";
  151. return 0;
  152. }
  153. /* First: called with 0 length to calculate the needed length */
  154. len = __set_enter_print_fmt(entry, NULL, 0);
  155. print_fmt = kmalloc(len + 1, GFP_KERNEL);
  156. if (!print_fmt)
  157. return -ENOMEM;
  158. /* Second: actually write the @print_fmt */
  159. __set_enter_print_fmt(entry, print_fmt, len + 1);
  160. call->print_fmt = print_fmt;
  161. return 0;
  162. }
  163. static void free_syscall_print_fmt(struct ftrace_event_call *call)
  164. {
  165. struct syscall_metadata *entry = call->data;
  166. if (entry->enter_event == call)
  167. kfree(call->print_fmt);
  168. }
  169. int syscall_enter_define_fields(struct ftrace_event_call *call)
  170. {
  171. struct syscall_trace_enter trace;
  172. struct syscall_metadata *meta = call->data;
  173. int ret;
  174. int i;
  175. int offset = offsetof(typeof(trace), args);
  176. ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
  177. if (ret)
  178. return ret;
  179. for (i = 0; i < meta->nb_args; i++) {
  180. ret = trace_define_field(call, meta->types[i],
  181. meta->args[i], offset,
  182. sizeof(unsigned long), 0,
  183. FILTER_OTHER);
  184. offset += sizeof(unsigned long);
  185. }
  186. return ret;
  187. }
  188. int syscall_exit_define_fields(struct ftrace_event_call *call)
  189. {
  190. struct syscall_trace_exit trace;
  191. int ret;
  192. ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
  193. if (ret)
  194. return ret;
  195. ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
  196. FILTER_OTHER);
  197. return ret;
  198. }
  199. void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
  200. {
  201. struct syscall_trace_enter *entry;
  202. struct syscall_metadata *sys_data;
  203. struct ring_buffer_event *event;
  204. struct ring_buffer *buffer;
  205. int size;
  206. int syscall_nr;
  207. syscall_nr = syscall_get_nr(current, regs);
  208. if (syscall_nr < 0)
  209. return;
  210. if (!test_bit(syscall_nr, enabled_enter_syscalls))
  211. return;
  212. sys_data = syscall_nr_to_meta(syscall_nr);
  213. if (!sys_data)
  214. return;
  215. size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
  216. event = trace_current_buffer_lock_reserve(&buffer,
  217. sys_data->enter_event->id, size, 0, 0);
  218. if (!event)
  219. return;
  220. entry = ring_buffer_event_data(event);
  221. entry->nr = syscall_nr;
  222. syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
  223. if (!filter_current_check_discard(buffer, sys_data->enter_event,
  224. entry, event))
  225. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  226. }
  227. void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
  228. {
  229. struct syscall_trace_exit *entry;
  230. struct syscall_metadata *sys_data;
  231. struct ring_buffer_event *event;
  232. struct ring_buffer *buffer;
  233. int syscall_nr;
  234. syscall_nr = syscall_get_nr(current, regs);
  235. if (syscall_nr < 0)
  236. return;
  237. if (!test_bit(syscall_nr, enabled_exit_syscalls))
  238. return;
  239. sys_data = syscall_nr_to_meta(syscall_nr);
  240. if (!sys_data)
  241. return;
  242. event = trace_current_buffer_lock_reserve(&buffer,
  243. sys_data->exit_event->id, sizeof(*entry), 0, 0);
  244. if (!event)
  245. return;
  246. entry = ring_buffer_event_data(event);
  247. entry->nr = syscall_nr;
  248. entry->ret = syscall_get_return_value(current, regs);
  249. if (!filter_current_check_discard(buffer, sys_data->exit_event,
  250. entry, event))
  251. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  252. }
  253. int reg_event_syscall_enter(struct ftrace_event_call *call)
  254. {
  255. int ret = 0;
  256. int num;
  257. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  258. if (num < 0 || num >= NR_syscalls)
  259. return -ENOSYS;
  260. mutex_lock(&syscall_trace_lock);
  261. if (!sys_refcount_enter)
  262. ret = register_trace_sys_enter(ftrace_syscall_enter, NULL);
  263. if (!ret) {
  264. set_bit(num, enabled_enter_syscalls);
  265. sys_refcount_enter++;
  266. }
  267. mutex_unlock(&syscall_trace_lock);
  268. return ret;
  269. }
  270. void unreg_event_syscall_enter(struct ftrace_event_call *call)
  271. {
  272. int num;
  273. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  274. if (num < 0 || num >= NR_syscalls)
  275. return;
  276. mutex_lock(&syscall_trace_lock);
  277. sys_refcount_enter--;
  278. clear_bit(num, enabled_enter_syscalls);
  279. if (!sys_refcount_enter)
  280. unregister_trace_sys_enter(ftrace_syscall_enter, NULL);
  281. mutex_unlock(&syscall_trace_lock);
  282. }
  283. int reg_event_syscall_exit(struct ftrace_event_call *call)
  284. {
  285. int ret = 0;
  286. int num;
  287. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  288. if (num < 0 || num >= NR_syscalls)
  289. return -ENOSYS;
  290. mutex_lock(&syscall_trace_lock);
  291. if (!sys_refcount_exit)
  292. ret = register_trace_sys_exit(ftrace_syscall_exit, NULL);
  293. if (!ret) {
  294. set_bit(num, enabled_exit_syscalls);
  295. sys_refcount_exit++;
  296. }
  297. mutex_unlock(&syscall_trace_lock);
  298. return ret;
  299. }
  300. void unreg_event_syscall_exit(struct ftrace_event_call *call)
  301. {
  302. int num;
  303. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  304. if (num < 0 || num >= NR_syscalls)
  305. return;
  306. mutex_lock(&syscall_trace_lock);
  307. sys_refcount_exit--;
  308. clear_bit(num, enabled_exit_syscalls);
  309. if (!sys_refcount_exit)
  310. unregister_trace_sys_exit(ftrace_syscall_exit, NULL);
  311. mutex_unlock(&syscall_trace_lock);
  312. }
  313. int init_syscall_trace(struct ftrace_event_call *call)
  314. {
  315. int id;
  316. if (set_syscall_print_fmt(call) < 0)
  317. return -ENOMEM;
  318. id = trace_event_raw_init(call);
  319. if (id < 0) {
  320. free_syscall_print_fmt(call);
  321. return id;
  322. }
  323. return id;
  324. }
  325. unsigned long __init arch_syscall_addr(int nr)
  326. {
  327. return (unsigned long)sys_call_table[nr];
  328. }
  329. int __init init_ftrace_syscalls(void)
  330. {
  331. struct syscall_metadata *meta;
  332. unsigned long addr;
  333. int i;
  334. syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
  335. NR_syscalls, GFP_KERNEL);
  336. if (!syscalls_metadata) {
  337. WARN_ON(1);
  338. return -ENOMEM;
  339. }
  340. for (i = 0; i < NR_syscalls; i++) {
  341. addr = arch_syscall_addr(i);
  342. meta = find_syscall_meta(addr);
  343. if (!meta)
  344. continue;
  345. meta->syscall_nr = i;
  346. syscalls_metadata[i] = meta;
  347. }
  348. return 0;
  349. }
  350. core_initcall(init_ftrace_syscalls);
  351. #ifdef CONFIG_PERF_EVENTS
  352. static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
  353. static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
  354. static int sys_perf_refcount_enter;
  355. static int sys_perf_refcount_exit;
  356. static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
  357. {
  358. struct syscall_metadata *sys_data;
  359. struct syscall_trace_enter *rec;
  360. unsigned long flags;
  361. int syscall_nr;
  362. int rctx;
  363. int size;
  364. syscall_nr = syscall_get_nr(current, regs);
  365. if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
  366. return;
  367. sys_data = syscall_nr_to_meta(syscall_nr);
  368. if (!sys_data)
  369. return;
  370. /* get the size after alignment with the u32 buffer size field */
  371. size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
  372. size = ALIGN(size + sizeof(u32), sizeof(u64));
  373. size -= sizeof(u32);
  374. if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
  375. "perf buffer not large enough"))
  376. return;
  377. rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
  378. sys_data->enter_event->id, &rctx, &flags);
  379. if (!rec)
  380. return;
  381. rec->nr = syscall_nr;
  382. syscall_get_arguments(current, regs, 0, sys_data->nb_args,
  383. (unsigned long *)&rec->args);
  384. perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
  385. }
  386. int perf_sysenter_enable(struct ftrace_event_call *call)
  387. {
  388. int ret = 0;
  389. int num;
  390. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  391. mutex_lock(&syscall_trace_lock);
  392. if (!sys_perf_refcount_enter)
  393. ret = register_trace_sys_enter(perf_syscall_enter, NULL);
  394. if (ret) {
  395. pr_info("event trace: Could not activate"
  396. "syscall entry trace point");
  397. } else {
  398. set_bit(num, enabled_perf_enter_syscalls);
  399. sys_perf_refcount_enter++;
  400. }
  401. mutex_unlock(&syscall_trace_lock);
  402. return ret;
  403. }
  404. void perf_sysenter_disable(struct ftrace_event_call *call)
  405. {
  406. int num;
  407. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  408. mutex_lock(&syscall_trace_lock);
  409. sys_perf_refcount_enter--;
  410. clear_bit(num, enabled_perf_enter_syscalls);
  411. if (!sys_perf_refcount_enter)
  412. unregister_trace_sys_enter(perf_syscall_enter, NULL);
  413. mutex_unlock(&syscall_trace_lock);
  414. }
  415. static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
  416. {
  417. struct syscall_metadata *sys_data;
  418. struct syscall_trace_exit *rec;
  419. unsigned long flags;
  420. int syscall_nr;
  421. int rctx;
  422. int size;
  423. syscall_nr = syscall_get_nr(current, regs);
  424. if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
  425. return;
  426. sys_data = syscall_nr_to_meta(syscall_nr);
  427. if (!sys_data)
  428. return;
  429. /* We can probably do that at build time */
  430. size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
  431. size -= sizeof(u32);
  432. /*
  433. * Impossible, but be paranoid with the future
  434. * How to put this check outside runtime?
  435. */
  436. if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
  437. "exit event has grown above perf buffer size"))
  438. return;
  439. rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
  440. sys_data->exit_event->id, &rctx, &flags);
  441. if (!rec)
  442. return;
  443. rec->nr = syscall_nr;
  444. rec->ret = syscall_get_return_value(current, regs);
  445. perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
  446. }
  447. int perf_sysexit_enable(struct ftrace_event_call *call)
  448. {
  449. int ret = 0;
  450. int num;
  451. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  452. mutex_lock(&syscall_trace_lock);
  453. if (!sys_perf_refcount_exit)
  454. ret = register_trace_sys_exit(perf_syscall_exit, NULL);
  455. if (ret) {
  456. pr_info("event trace: Could not activate"
  457. "syscall exit trace point");
  458. } else {
  459. set_bit(num, enabled_perf_exit_syscalls);
  460. sys_perf_refcount_exit++;
  461. }
  462. mutex_unlock(&syscall_trace_lock);
  463. return ret;
  464. }
  465. void perf_sysexit_disable(struct ftrace_event_call *call)
  466. {
  467. int num;
  468. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  469. mutex_lock(&syscall_trace_lock);
  470. sys_perf_refcount_exit--;
  471. clear_bit(num, enabled_perf_exit_syscalls);
  472. if (!sys_perf_refcount_exit)
  473. unregister_trace_sys_exit(perf_syscall_exit, NULL);
  474. mutex_unlock(&syscall_trace_lock);
  475. }
  476. #endif /* CONFIG_PERF_EVENTS */