trace_syscalls.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. #include <trace/syscall.h>
  2. #include <trace/events/syscalls.h>
  3. #include <linux/kernel.h>
  4. #include <linux/ftrace.h>
  5. #include <linux/perf_event.h>
  6. #include <asm/syscall.h>
  7. #include "trace_output.h"
  8. #include "trace.h"
  9. static DEFINE_MUTEX(syscall_trace_lock);
  10. static int sys_refcount_enter;
  11. static int sys_refcount_exit;
  12. static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
  13. static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
  14. extern unsigned long __start_syscalls_metadata[];
  15. extern unsigned long __stop_syscalls_metadata[];
  16. static struct syscall_metadata **syscalls_metadata;
  17. static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
  18. {
  19. struct syscall_metadata *start;
  20. struct syscall_metadata *stop;
  21. char str[KSYM_SYMBOL_LEN];
  22. start = (struct syscall_metadata *)__start_syscalls_metadata;
  23. stop = (struct syscall_metadata *)__stop_syscalls_metadata;
  24. kallsyms_lookup(syscall, NULL, NULL, NULL, str);
  25. for ( ; start < stop; start++) {
  26. /*
  27. * Only compare after the "sys" prefix. Archs that use
  28. * syscall wrappers may have syscalls symbols aliases prefixed
  29. * with "SyS" instead of "sys", leading to an unwanted
  30. * mismatch.
  31. */
  32. if (start->name && !strcmp(start->name + 3, str + 3))
  33. return start;
  34. }
  35. return NULL;
  36. }
  37. static struct syscall_metadata *syscall_nr_to_meta(int nr)
  38. {
  39. if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
  40. return NULL;
  41. return syscalls_metadata[nr];
  42. }
  43. enum print_line_t
  44. print_syscall_enter(struct trace_iterator *iter, int flags)
  45. {
  46. struct trace_seq *s = &iter->seq;
  47. struct trace_entry *ent = iter->ent;
  48. struct syscall_trace_enter *trace;
  49. struct syscall_metadata *entry;
  50. int i, ret, syscall;
  51. trace = (typeof(trace))ent;
  52. syscall = trace->nr;
  53. entry = syscall_nr_to_meta(syscall);
  54. if (!entry)
  55. goto end;
  56. if (entry->enter_event->id != ent->type) {
  57. WARN_ON_ONCE(1);
  58. goto end;
  59. }
  60. ret = trace_seq_printf(s, "%s(", entry->name);
  61. if (!ret)
  62. return TRACE_TYPE_PARTIAL_LINE;
  63. for (i = 0; i < entry->nb_args; i++) {
  64. /* parameter types */
  65. if (trace_flags & TRACE_ITER_VERBOSE) {
  66. ret = trace_seq_printf(s, "%s ", entry->types[i]);
  67. if (!ret)
  68. return TRACE_TYPE_PARTIAL_LINE;
  69. }
  70. /* parameter values */
  71. ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
  72. trace->args[i],
  73. i == entry->nb_args - 1 ? "" : ", ");
  74. if (!ret)
  75. return TRACE_TYPE_PARTIAL_LINE;
  76. }
  77. ret = trace_seq_putc(s, ')');
  78. if (!ret)
  79. return TRACE_TYPE_PARTIAL_LINE;
  80. end:
  81. ret = trace_seq_putc(s, '\n');
  82. if (!ret)
  83. return TRACE_TYPE_PARTIAL_LINE;
  84. return TRACE_TYPE_HANDLED;
  85. }
  86. enum print_line_t
  87. print_syscall_exit(struct trace_iterator *iter, int flags)
  88. {
  89. struct trace_seq *s = &iter->seq;
  90. struct trace_entry *ent = iter->ent;
  91. struct syscall_trace_exit *trace;
  92. int syscall;
  93. struct syscall_metadata *entry;
  94. int ret;
  95. trace = (typeof(trace))ent;
  96. syscall = trace->nr;
  97. entry = syscall_nr_to_meta(syscall);
  98. if (!entry) {
  99. trace_seq_printf(s, "\n");
  100. return TRACE_TYPE_HANDLED;
  101. }
  102. if (entry->exit_event->id != ent->type) {
  103. WARN_ON_ONCE(1);
  104. return TRACE_TYPE_UNHANDLED;
  105. }
  106. ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
  107. trace->ret);
  108. if (!ret)
  109. return TRACE_TYPE_PARTIAL_LINE;
  110. return TRACE_TYPE_HANDLED;
  111. }
  112. extern char *__bad_type_size(void);
  113. #define SYSCALL_FIELD(type, name) \
  114. sizeof(type) != sizeof(trace.name) ? \
  115. __bad_type_size() : \
  116. #type, #name, offsetof(typeof(trace), name), \
  117. sizeof(trace.name), is_signed_type(type)
  118. static
  119. int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
  120. {
  121. int i;
  122. int pos = 0;
  123. /* When len=0, we just calculate the needed length */
  124. #define LEN_OR_ZERO (len ? len - pos : 0)
  125. pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
  126. for (i = 0; i < entry->nb_args; i++) {
  127. pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
  128. entry->args[i], sizeof(unsigned long),
  129. i == entry->nb_args - 1 ? "" : ", ");
  130. }
  131. pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
  132. for (i = 0; i < entry->nb_args; i++) {
  133. pos += snprintf(buf + pos, LEN_OR_ZERO,
  134. ", ((unsigned long)(REC->%s))", entry->args[i]);
  135. }
  136. #undef LEN_OR_ZERO
  137. /* return the length of print_fmt */
  138. return pos;
  139. }
  140. static int set_syscall_print_fmt(struct ftrace_event_call *call)
  141. {
  142. char *print_fmt;
  143. int len;
  144. struct syscall_metadata *entry = call->data;
  145. if (entry->enter_event != call) {
  146. call->print_fmt = "\"0x%lx\", REC->ret";
  147. return 0;
  148. }
  149. /* First: called with 0 length to calculate the needed length */
  150. len = __set_enter_print_fmt(entry, NULL, 0);
  151. print_fmt = kmalloc(len + 1, GFP_KERNEL);
  152. if (!print_fmt)
  153. return -ENOMEM;
  154. /* Second: actually write the @print_fmt */
  155. __set_enter_print_fmt(entry, print_fmt, len + 1);
  156. call->print_fmt = print_fmt;
  157. return 0;
  158. }
  159. static void free_syscall_print_fmt(struct ftrace_event_call *call)
  160. {
  161. struct syscall_metadata *entry = call->data;
  162. if (entry->enter_event == call)
  163. kfree(call->print_fmt);
  164. }
  165. int syscall_enter_define_fields(struct ftrace_event_call *call)
  166. {
  167. struct syscall_trace_enter trace;
  168. struct syscall_metadata *meta = call->data;
  169. int ret;
  170. int i;
  171. int offset = offsetof(typeof(trace), args);
  172. ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
  173. if (ret)
  174. return ret;
  175. for (i = 0; i < meta->nb_args; i++) {
  176. ret = trace_define_field(call, meta->types[i],
  177. meta->args[i], offset,
  178. sizeof(unsigned long), 0,
  179. FILTER_OTHER);
  180. offset += sizeof(unsigned long);
  181. }
  182. return ret;
  183. }
  184. int syscall_exit_define_fields(struct ftrace_event_call *call)
  185. {
  186. struct syscall_trace_exit trace;
  187. int ret;
  188. ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
  189. if (ret)
  190. return ret;
  191. ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
  192. FILTER_OTHER);
  193. return ret;
  194. }
  195. void ftrace_syscall_enter(struct pt_regs *regs, long id)
  196. {
  197. struct syscall_trace_enter *entry;
  198. struct syscall_metadata *sys_data;
  199. struct ring_buffer_event *event;
  200. struct ring_buffer *buffer;
  201. int size;
  202. int syscall_nr;
  203. syscall_nr = syscall_get_nr(current, regs);
  204. if (syscall_nr < 0)
  205. return;
  206. if (!test_bit(syscall_nr, enabled_enter_syscalls))
  207. return;
  208. sys_data = syscall_nr_to_meta(syscall_nr);
  209. if (!sys_data)
  210. return;
  211. size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
  212. event = trace_current_buffer_lock_reserve(&buffer,
  213. sys_data->enter_event->id, size, 0, 0);
  214. if (!event)
  215. return;
  216. entry = ring_buffer_event_data(event);
  217. entry->nr = syscall_nr;
  218. syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
  219. if (!filter_current_check_discard(buffer, sys_data->enter_event,
  220. entry, event))
  221. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  222. }
  223. void ftrace_syscall_exit(struct pt_regs *regs, long ret)
  224. {
  225. struct syscall_trace_exit *entry;
  226. struct syscall_metadata *sys_data;
  227. struct ring_buffer_event *event;
  228. struct ring_buffer *buffer;
  229. int syscall_nr;
  230. syscall_nr = syscall_get_nr(current, regs);
  231. if (syscall_nr < 0)
  232. return;
  233. if (!test_bit(syscall_nr, enabled_exit_syscalls))
  234. return;
  235. sys_data = syscall_nr_to_meta(syscall_nr);
  236. if (!sys_data)
  237. return;
  238. event = trace_current_buffer_lock_reserve(&buffer,
  239. sys_data->exit_event->id, sizeof(*entry), 0, 0);
  240. if (!event)
  241. return;
  242. entry = ring_buffer_event_data(event);
  243. entry->nr = syscall_nr;
  244. entry->ret = syscall_get_return_value(current, regs);
  245. if (!filter_current_check_discard(buffer, sys_data->exit_event,
  246. entry, event))
  247. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  248. }
  249. int reg_event_syscall_enter(struct ftrace_event_call *call)
  250. {
  251. int ret = 0;
  252. int num;
  253. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  254. if (num < 0 || num >= NR_syscalls)
  255. return -ENOSYS;
  256. mutex_lock(&syscall_trace_lock);
  257. if (!sys_refcount_enter)
  258. ret = register_trace_sys_enter(ftrace_syscall_enter);
  259. if (!ret) {
  260. set_bit(num, enabled_enter_syscalls);
  261. sys_refcount_enter++;
  262. }
  263. mutex_unlock(&syscall_trace_lock);
  264. return ret;
  265. }
  266. void unreg_event_syscall_enter(struct ftrace_event_call *call)
  267. {
  268. int num;
  269. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  270. if (num < 0 || num >= NR_syscalls)
  271. return;
  272. mutex_lock(&syscall_trace_lock);
  273. sys_refcount_enter--;
  274. clear_bit(num, enabled_enter_syscalls);
  275. if (!sys_refcount_enter)
  276. unregister_trace_sys_enter(ftrace_syscall_enter);
  277. mutex_unlock(&syscall_trace_lock);
  278. }
  279. int reg_event_syscall_exit(struct ftrace_event_call *call)
  280. {
  281. int ret = 0;
  282. int num;
  283. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  284. if (num < 0 || num >= NR_syscalls)
  285. return -ENOSYS;
  286. mutex_lock(&syscall_trace_lock);
  287. if (!sys_refcount_exit)
  288. ret = register_trace_sys_exit(ftrace_syscall_exit);
  289. if (!ret) {
  290. set_bit(num, enabled_exit_syscalls);
  291. sys_refcount_exit++;
  292. }
  293. mutex_unlock(&syscall_trace_lock);
  294. return ret;
  295. }
  296. void unreg_event_syscall_exit(struct ftrace_event_call *call)
  297. {
  298. int num;
  299. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  300. if (num < 0 || num >= NR_syscalls)
  301. return;
  302. mutex_lock(&syscall_trace_lock);
  303. sys_refcount_exit--;
  304. clear_bit(num, enabled_exit_syscalls);
  305. if (!sys_refcount_exit)
  306. unregister_trace_sys_exit(ftrace_syscall_exit);
  307. mutex_unlock(&syscall_trace_lock);
  308. }
  309. int init_syscall_trace(struct ftrace_event_call *call)
  310. {
  311. int id;
  312. if (set_syscall_print_fmt(call) < 0)
  313. return -ENOMEM;
  314. id = trace_event_raw_init(call);
  315. if (id < 0) {
  316. free_syscall_print_fmt(call);
  317. return id;
  318. }
  319. return id;
  320. }
  321. unsigned long __init arch_syscall_addr(int nr)
  322. {
  323. return (unsigned long)sys_call_table[nr];
  324. }
  325. int __init init_ftrace_syscalls(void)
  326. {
  327. struct syscall_metadata *meta;
  328. unsigned long addr;
  329. int i;
  330. syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
  331. NR_syscalls, GFP_KERNEL);
  332. if (!syscalls_metadata) {
  333. WARN_ON(1);
  334. return -ENOMEM;
  335. }
  336. for (i = 0; i < NR_syscalls; i++) {
  337. addr = arch_syscall_addr(i);
  338. meta = find_syscall_meta(addr);
  339. if (!meta)
  340. continue;
  341. meta->syscall_nr = i;
  342. syscalls_metadata[i] = meta;
  343. }
  344. return 0;
  345. }
  346. core_initcall(init_ftrace_syscalls);
  347. #ifdef CONFIG_PERF_EVENTS
  348. static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
  349. static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
  350. static int sys_prof_refcount_enter;
  351. static int sys_prof_refcount_exit;
  352. static void prof_syscall_enter(struct pt_regs *regs, long id)
  353. {
  354. struct syscall_metadata *sys_data;
  355. struct syscall_trace_enter *rec;
  356. unsigned long flags;
  357. int syscall_nr;
  358. int rctx;
  359. int size;
  360. syscall_nr = syscall_get_nr(current, regs);
  361. if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
  362. return;
  363. sys_data = syscall_nr_to_meta(syscall_nr);
  364. if (!sys_data)
  365. return;
  366. /* get the size after alignment with the u32 buffer size field */
  367. size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
  368. size = ALIGN(size + sizeof(u32), sizeof(u64));
  369. size -= sizeof(u32);
  370. if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
  371. "profile buffer not large enough"))
  372. return;
  373. rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
  374. sys_data->enter_event->id, &rctx, &flags);
  375. if (!rec)
  376. return;
  377. rec->nr = syscall_nr;
  378. syscall_get_arguments(current, regs, 0, sys_data->nb_args,
  379. (unsigned long *)&rec->args);
  380. ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
  381. }
  382. int prof_sysenter_enable(struct ftrace_event_call *call)
  383. {
  384. int ret = 0;
  385. int num;
  386. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  387. mutex_lock(&syscall_trace_lock);
  388. if (!sys_prof_refcount_enter)
  389. ret = register_trace_sys_enter(prof_syscall_enter);
  390. if (ret) {
  391. pr_info("event trace: Could not activate"
  392. "syscall entry trace point");
  393. } else {
  394. set_bit(num, enabled_prof_enter_syscalls);
  395. sys_prof_refcount_enter++;
  396. }
  397. mutex_unlock(&syscall_trace_lock);
  398. return ret;
  399. }
  400. void prof_sysenter_disable(struct ftrace_event_call *call)
  401. {
  402. int num;
  403. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  404. mutex_lock(&syscall_trace_lock);
  405. sys_prof_refcount_enter--;
  406. clear_bit(num, enabled_prof_enter_syscalls);
  407. if (!sys_prof_refcount_enter)
  408. unregister_trace_sys_enter(prof_syscall_enter);
  409. mutex_unlock(&syscall_trace_lock);
  410. }
  411. static void prof_syscall_exit(struct pt_regs *regs, long ret)
  412. {
  413. struct syscall_metadata *sys_data;
  414. struct syscall_trace_exit *rec;
  415. unsigned long flags;
  416. int syscall_nr;
  417. int rctx;
  418. int size;
  419. syscall_nr = syscall_get_nr(current, regs);
  420. if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
  421. return;
  422. sys_data = syscall_nr_to_meta(syscall_nr);
  423. if (!sys_data)
  424. return;
  425. /* We can probably do that at build time */
  426. size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
  427. size -= sizeof(u32);
  428. /*
  429. * Impossible, but be paranoid with the future
  430. * How to put this check outside runtime?
  431. */
  432. if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
  433. "exit event has grown above profile buffer size"))
  434. return;
  435. rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
  436. sys_data->exit_event->id, &rctx, &flags);
  437. if (!rec)
  438. return;
  439. rec->nr = syscall_nr;
  440. rec->ret = syscall_get_return_value(current, regs);
  441. ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
  442. }
  443. int prof_sysexit_enable(struct ftrace_event_call *call)
  444. {
  445. int ret = 0;
  446. int num;
  447. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  448. mutex_lock(&syscall_trace_lock);
  449. if (!sys_prof_refcount_exit)
  450. ret = register_trace_sys_exit(prof_syscall_exit);
  451. if (ret) {
  452. pr_info("event trace: Could not activate"
  453. "syscall exit trace point");
  454. } else {
  455. set_bit(num, enabled_prof_exit_syscalls);
  456. sys_prof_refcount_exit++;
  457. }
  458. mutex_unlock(&syscall_trace_lock);
  459. return ret;
  460. }
  461. void prof_sysexit_disable(struct ftrace_event_call *call)
  462. {
  463. int num;
  464. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  465. mutex_lock(&syscall_trace_lock);
  466. sys_prof_refcount_exit--;
  467. clear_bit(num, enabled_prof_exit_syscalls);
  468. if (!sys_prof_refcount_exit)
  469. unregister_trace_sys_exit(prof_syscall_exit);
  470. mutex_unlock(&syscall_trace_lock);
  471. }
  472. #endif /* CONFIG_PERF_EVENTS */