trace_syscalls.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. #include <trace/syscall.h>
  2. #include <trace/events/syscalls.h>
  3. #include <linux/kernel.h>
  4. #include <linux/ftrace.h>
  5. #include <linux/perf_event.h>
  6. #include <asm/syscall.h>
  7. #include "trace_output.h"
  8. #include "trace.h"
  9. static DEFINE_MUTEX(syscall_trace_lock);
  10. static int sys_refcount_enter;
  11. static int sys_refcount_exit;
  12. static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
  13. static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
  14. enum print_line_t
  15. print_syscall_enter(struct trace_iterator *iter, int flags)
  16. {
  17. struct trace_seq *s = &iter->seq;
  18. struct trace_entry *ent = iter->ent;
  19. struct syscall_trace_enter *trace;
  20. struct syscall_metadata *entry;
  21. int i, ret, syscall;
  22. trace = (typeof(trace))ent;
  23. syscall = trace->nr;
  24. entry = syscall_nr_to_meta(syscall);
  25. if (!entry)
  26. goto end;
  27. if (entry->enter_id != ent->type) {
  28. WARN_ON_ONCE(1);
  29. goto end;
  30. }
  31. ret = trace_seq_printf(s, "%s(", entry->name);
  32. if (!ret)
  33. return TRACE_TYPE_PARTIAL_LINE;
  34. for (i = 0; i < entry->nb_args; i++) {
  35. /* parameter types */
  36. if (trace_flags & TRACE_ITER_VERBOSE) {
  37. ret = trace_seq_printf(s, "%s ", entry->types[i]);
  38. if (!ret)
  39. return TRACE_TYPE_PARTIAL_LINE;
  40. }
  41. /* parameter values */
  42. ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
  43. trace->args[i],
  44. i == entry->nb_args - 1 ? "" : ", ");
  45. if (!ret)
  46. return TRACE_TYPE_PARTIAL_LINE;
  47. }
  48. ret = trace_seq_putc(s, ')');
  49. if (!ret)
  50. return TRACE_TYPE_PARTIAL_LINE;
  51. end:
  52. ret = trace_seq_putc(s, '\n');
  53. if (!ret)
  54. return TRACE_TYPE_PARTIAL_LINE;
  55. return TRACE_TYPE_HANDLED;
  56. }
  57. enum print_line_t
  58. print_syscall_exit(struct trace_iterator *iter, int flags)
  59. {
  60. struct trace_seq *s = &iter->seq;
  61. struct trace_entry *ent = iter->ent;
  62. struct syscall_trace_exit *trace;
  63. int syscall;
  64. struct syscall_metadata *entry;
  65. int ret;
  66. trace = (typeof(trace))ent;
  67. syscall = trace->nr;
  68. entry = syscall_nr_to_meta(syscall);
  69. if (!entry) {
  70. trace_seq_printf(s, "\n");
  71. return TRACE_TYPE_HANDLED;
  72. }
  73. if (entry->exit_id != ent->type) {
  74. WARN_ON_ONCE(1);
  75. return TRACE_TYPE_UNHANDLED;
  76. }
  77. ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
  78. trace->ret);
  79. if (!ret)
  80. return TRACE_TYPE_PARTIAL_LINE;
  81. return TRACE_TYPE_HANDLED;
  82. }
  83. extern char *__bad_type_size(void);
  84. #define SYSCALL_FIELD(type, name) \
  85. sizeof(type) != sizeof(trace.name) ? \
  86. __bad_type_size() : \
  87. #type, #name, offsetof(typeof(trace), name), \
  88. sizeof(trace.name), is_signed_type(type)
  89. int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
  90. {
  91. int i;
  92. int nr;
  93. int ret;
  94. struct syscall_metadata *entry;
  95. struct syscall_trace_enter trace;
  96. int offset = offsetof(struct syscall_trace_enter, args);
  97. nr = syscall_name_to_nr(call->data);
  98. entry = syscall_nr_to_meta(nr);
  99. if (!entry)
  100. return 0;
  101. ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
  102. "\tsigned:%u;\n",
  103. SYSCALL_FIELD(int, nr));
  104. if (!ret)
  105. return 0;
  106. for (i = 0; i < entry->nb_args; i++) {
  107. ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
  108. entry->args[i]);
  109. if (!ret)
  110. return 0;
  111. ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
  112. "\tsigned:%u;\n", offset,
  113. sizeof(unsigned long),
  114. is_signed_type(unsigned long));
  115. if (!ret)
  116. return 0;
  117. offset += sizeof(unsigned long);
  118. }
  119. trace_seq_puts(s, "\nprint fmt: \"");
  120. for (i = 0; i < entry->nb_args; i++) {
  121. ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
  122. sizeof(unsigned long),
  123. i == entry->nb_args - 1 ? "" : ", ");
  124. if (!ret)
  125. return 0;
  126. }
  127. trace_seq_putc(s, '"');
  128. for (i = 0; i < entry->nb_args; i++) {
  129. ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
  130. entry->args[i]);
  131. if (!ret)
  132. return 0;
  133. }
  134. return trace_seq_putc(s, '\n');
  135. }
  136. int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
  137. {
  138. int ret;
  139. struct syscall_trace_exit trace;
  140. ret = trace_seq_printf(s,
  141. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
  142. "\tsigned:%u;\n"
  143. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
  144. "\tsigned:%u;\n",
  145. SYSCALL_FIELD(int, nr),
  146. SYSCALL_FIELD(long, ret));
  147. if (!ret)
  148. return 0;
  149. return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
  150. }
  151. int syscall_enter_define_fields(struct ftrace_event_call *call)
  152. {
  153. struct syscall_trace_enter trace;
  154. struct syscall_metadata *meta;
  155. int ret;
  156. int nr;
  157. int i;
  158. int offset = offsetof(typeof(trace), args);
  159. nr = syscall_name_to_nr(call->data);
  160. meta = syscall_nr_to_meta(nr);
  161. if (!meta)
  162. return 0;
  163. ret = trace_define_common_fields(call);
  164. if (ret)
  165. return ret;
  166. for (i = 0; i < meta->nb_args; i++) {
  167. ret = trace_define_field(call, meta->types[i],
  168. meta->args[i], offset,
  169. sizeof(unsigned long), 0,
  170. FILTER_OTHER);
  171. offset += sizeof(unsigned long);
  172. }
  173. return ret;
  174. }
  175. int syscall_exit_define_fields(struct ftrace_event_call *call)
  176. {
  177. struct syscall_trace_exit trace;
  178. int ret;
  179. ret = trace_define_common_fields(call);
  180. if (ret)
  181. return ret;
  182. ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
  183. FILTER_OTHER);
  184. return ret;
  185. }
  186. void ftrace_syscall_enter(struct pt_regs *regs, long id)
  187. {
  188. struct syscall_trace_enter *entry;
  189. struct syscall_metadata *sys_data;
  190. struct ring_buffer_event *event;
  191. struct ring_buffer *buffer;
  192. int size;
  193. int syscall_nr;
  194. syscall_nr = syscall_get_nr(current, regs);
  195. if (syscall_nr < 0)
  196. return;
  197. if (!test_bit(syscall_nr, enabled_enter_syscalls))
  198. return;
  199. sys_data = syscall_nr_to_meta(syscall_nr);
  200. if (!sys_data)
  201. return;
  202. size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
  203. event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
  204. size, 0, 0);
  205. if (!event)
  206. return;
  207. entry = ring_buffer_event_data(event);
  208. entry->nr = syscall_nr;
  209. syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
  210. if (!filter_current_check_discard(buffer, sys_data->enter_event,
  211. entry, event))
  212. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  213. }
  214. void ftrace_syscall_exit(struct pt_regs *regs, long ret)
  215. {
  216. struct syscall_trace_exit *entry;
  217. struct syscall_metadata *sys_data;
  218. struct ring_buffer_event *event;
  219. struct ring_buffer *buffer;
  220. int syscall_nr;
  221. syscall_nr = syscall_get_nr(current, regs);
  222. if (syscall_nr < 0)
  223. return;
  224. if (!test_bit(syscall_nr, enabled_exit_syscalls))
  225. return;
  226. sys_data = syscall_nr_to_meta(syscall_nr);
  227. if (!sys_data)
  228. return;
  229. event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
  230. sizeof(*entry), 0, 0);
  231. if (!event)
  232. return;
  233. entry = ring_buffer_event_data(event);
  234. entry->nr = syscall_nr;
  235. entry->ret = syscall_get_return_value(current, regs);
  236. if (!filter_current_check_discard(buffer, sys_data->exit_event,
  237. entry, event))
  238. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  239. }
  240. int reg_event_syscall_enter(void *ptr)
  241. {
  242. int ret = 0;
  243. int num;
  244. char *name;
  245. name = (char *)ptr;
  246. num = syscall_name_to_nr(name);
  247. if (num < 0 || num >= NR_syscalls)
  248. return -ENOSYS;
  249. mutex_lock(&syscall_trace_lock);
  250. if (!sys_refcount_enter)
  251. ret = register_trace_sys_enter(ftrace_syscall_enter);
  252. if (ret) {
  253. pr_info("event trace: Could not activate"
  254. "syscall entry trace point");
  255. } else {
  256. set_bit(num, enabled_enter_syscalls);
  257. sys_refcount_enter++;
  258. }
  259. mutex_unlock(&syscall_trace_lock);
  260. return ret;
  261. }
  262. void unreg_event_syscall_enter(void *ptr)
  263. {
  264. int num;
  265. char *name;
  266. name = (char *)ptr;
  267. num = syscall_name_to_nr(name);
  268. if (num < 0 || num >= NR_syscalls)
  269. return;
  270. mutex_lock(&syscall_trace_lock);
  271. sys_refcount_enter--;
  272. clear_bit(num, enabled_enter_syscalls);
  273. if (!sys_refcount_enter)
  274. unregister_trace_sys_enter(ftrace_syscall_enter);
  275. mutex_unlock(&syscall_trace_lock);
  276. }
  277. int reg_event_syscall_exit(void *ptr)
  278. {
  279. int ret = 0;
  280. int num;
  281. char *name;
  282. name = (char *)ptr;
  283. num = syscall_name_to_nr(name);
  284. if (num < 0 || num >= NR_syscalls)
  285. return -ENOSYS;
  286. mutex_lock(&syscall_trace_lock);
  287. if (!sys_refcount_exit)
  288. ret = register_trace_sys_exit(ftrace_syscall_exit);
  289. if (ret) {
  290. pr_info("event trace: Could not activate"
  291. "syscall exit trace point");
  292. } else {
  293. set_bit(num, enabled_exit_syscalls);
  294. sys_refcount_exit++;
  295. }
  296. mutex_unlock(&syscall_trace_lock);
  297. return ret;
  298. }
  299. void unreg_event_syscall_exit(void *ptr)
  300. {
  301. int num;
  302. char *name;
  303. name = (char *)ptr;
  304. num = syscall_name_to_nr(name);
  305. if (num < 0 || num >= NR_syscalls)
  306. return;
  307. mutex_lock(&syscall_trace_lock);
  308. sys_refcount_exit--;
  309. clear_bit(num, enabled_exit_syscalls);
  310. if (!sys_refcount_exit)
  311. unregister_trace_sys_exit(ftrace_syscall_exit);
  312. mutex_unlock(&syscall_trace_lock);
  313. }
  314. struct trace_event event_syscall_enter = {
  315. .trace = print_syscall_enter,
  316. };
  317. struct trace_event event_syscall_exit = {
  318. .trace = print_syscall_exit,
  319. };
  320. #ifdef CONFIG_EVENT_PROFILE
  321. static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
  322. static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
  323. static int sys_prof_refcount_enter;
  324. static int sys_prof_refcount_exit;
  325. static void prof_syscall_enter(struct pt_regs *regs, long id)
  326. {
  327. struct syscall_metadata *sys_data;
  328. struct syscall_trace_enter *rec;
  329. unsigned long flags;
  330. char *raw_data;
  331. int syscall_nr;
  332. int size;
  333. int cpu;
  334. syscall_nr = syscall_get_nr(current, regs);
  335. if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
  336. return;
  337. sys_data = syscall_nr_to_meta(syscall_nr);
  338. if (!sys_data)
  339. return;
  340. /* get the size after alignment with the u32 buffer size field */
  341. size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
  342. size = ALIGN(size + sizeof(u32), sizeof(u64));
  343. size -= sizeof(u32);
  344. if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
  345. "profile buffer not large enough"))
  346. return;
  347. /* Protect the per cpu buffer, begin the rcu read side */
  348. local_irq_save(flags);
  349. cpu = smp_processor_id();
  350. if (in_nmi())
  351. raw_data = rcu_dereference(trace_profile_buf_nmi);
  352. else
  353. raw_data = rcu_dereference(trace_profile_buf);
  354. if (!raw_data)
  355. goto end;
  356. raw_data = per_cpu_ptr(raw_data, cpu);
  357. /* zero the dead bytes from align to not leak stack to user */
  358. *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
  359. rec = (struct syscall_trace_enter *) raw_data;
  360. tracing_generic_entry_update(&rec->ent, 0, 0);
  361. rec->ent.type = sys_data->enter_id;
  362. rec->nr = syscall_nr;
  363. syscall_get_arguments(current, regs, 0, sys_data->nb_args,
  364. (unsigned long *)&rec->args);
  365. perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
  366. end:
  367. local_irq_restore(flags);
  368. }
  369. int reg_prof_syscall_enter(char *name)
  370. {
  371. int ret = 0;
  372. int num;
  373. num = syscall_name_to_nr(name);
  374. if (num < 0 || num >= NR_syscalls)
  375. return -ENOSYS;
  376. mutex_lock(&syscall_trace_lock);
  377. if (!sys_prof_refcount_enter)
  378. ret = register_trace_sys_enter(prof_syscall_enter);
  379. if (ret) {
  380. pr_info("event trace: Could not activate"
  381. "syscall entry trace point");
  382. } else {
  383. set_bit(num, enabled_prof_enter_syscalls);
  384. sys_prof_refcount_enter++;
  385. }
  386. mutex_unlock(&syscall_trace_lock);
  387. return ret;
  388. }
  389. void unreg_prof_syscall_enter(char *name)
  390. {
  391. int num;
  392. num = syscall_name_to_nr(name);
  393. if (num < 0 || num >= NR_syscalls)
  394. return;
  395. mutex_lock(&syscall_trace_lock);
  396. sys_prof_refcount_enter--;
  397. clear_bit(num, enabled_prof_enter_syscalls);
  398. if (!sys_prof_refcount_enter)
  399. unregister_trace_sys_enter(prof_syscall_enter);
  400. mutex_unlock(&syscall_trace_lock);
  401. }
  402. static void prof_syscall_exit(struct pt_regs *regs, long ret)
  403. {
  404. struct syscall_metadata *sys_data;
  405. struct syscall_trace_exit *rec;
  406. unsigned long flags;
  407. int syscall_nr;
  408. char *raw_data;
  409. int size;
  410. int cpu;
  411. syscall_nr = syscall_get_nr(current, regs);
  412. if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
  413. return;
  414. sys_data = syscall_nr_to_meta(syscall_nr);
  415. if (!sys_data)
  416. return;
  417. /* We can probably do that at build time */
  418. size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
  419. size -= sizeof(u32);
  420. /*
  421. * Impossible, but be paranoid with the future
  422. * How to put this check outside runtime?
  423. */
  424. if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
  425. "exit event has grown above profile buffer size"))
  426. return;
  427. /* Protect the per cpu buffer, begin the rcu read side */
  428. local_irq_save(flags);
  429. cpu = smp_processor_id();
  430. if (in_nmi())
  431. raw_data = rcu_dereference(trace_profile_buf_nmi);
  432. else
  433. raw_data = rcu_dereference(trace_profile_buf);
  434. if (!raw_data)
  435. goto end;
  436. raw_data = per_cpu_ptr(raw_data, cpu);
  437. /* zero the dead bytes from align to not leak stack to user */
  438. *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
  439. rec = (struct syscall_trace_exit *)raw_data;
  440. tracing_generic_entry_update(&rec->ent, 0, 0);
  441. rec->ent.type = sys_data->exit_id;
  442. rec->nr = syscall_nr;
  443. rec->ret = syscall_get_return_value(current, regs);
  444. perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
  445. end:
  446. local_irq_restore(flags);
  447. }
  448. int reg_prof_syscall_exit(char *name)
  449. {
  450. int ret = 0;
  451. int num;
  452. num = syscall_name_to_nr(name);
  453. if (num < 0 || num >= NR_syscalls)
  454. return -ENOSYS;
  455. mutex_lock(&syscall_trace_lock);
  456. if (!sys_prof_refcount_exit)
  457. ret = register_trace_sys_exit(prof_syscall_exit);
  458. if (ret) {
  459. pr_info("event trace: Could not activate"
  460. "syscall entry trace point");
  461. } else {
  462. set_bit(num, enabled_prof_exit_syscalls);
  463. sys_prof_refcount_exit++;
  464. }
  465. mutex_unlock(&syscall_trace_lock);
  466. return ret;
  467. }
  468. void unreg_prof_syscall_exit(char *name)
  469. {
  470. int num;
  471. num = syscall_name_to_nr(name);
  472. if (num < 0 || num >= NR_syscalls)
  473. return;
  474. mutex_lock(&syscall_trace_lock);
  475. sys_prof_refcount_exit--;
  476. clear_bit(num, enabled_prof_exit_syscalls);
  477. if (!sys_prof_refcount_exit)
  478. unregister_trace_sys_exit(prof_syscall_exit);
  479. mutex_unlock(&syscall_trace_lock);
  480. }
  481. #endif