trace_syscalls.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. #include <trace/syscall.h>
  2. #include <trace/events/syscalls.h>
  3. #include <linux/kernel.h>
  4. #include <linux/ftrace.h>
  5. #include <linux/perf_event.h>
  6. #include <asm/syscall.h>
  7. #include "trace_output.h"
  8. #include "trace.h"
  9. static DEFINE_MUTEX(syscall_trace_lock);
  10. static int sys_refcount_enter;
  11. static int sys_refcount_exit;
  12. static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
  13. static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
  14. enum print_line_t
  15. print_syscall_enter(struct trace_iterator *iter, int flags)
  16. {
  17. struct trace_seq *s = &iter->seq;
  18. struct trace_entry *ent = iter->ent;
  19. struct syscall_trace_enter *trace;
  20. struct syscall_metadata *entry;
  21. int i, ret, syscall;
  22. trace = (typeof(trace))ent;
  23. syscall = trace->nr;
  24. entry = syscall_nr_to_meta(syscall);
  25. if (!entry)
  26. goto end;
  27. if (entry->enter_id != ent->type) {
  28. WARN_ON_ONCE(1);
  29. goto end;
  30. }
  31. ret = trace_seq_printf(s, "%s(", entry->name);
  32. if (!ret)
  33. return TRACE_TYPE_PARTIAL_LINE;
  34. for (i = 0; i < entry->nb_args; i++) {
  35. /* parameter types */
  36. if (trace_flags & TRACE_ITER_VERBOSE) {
  37. ret = trace_seq_printf(s, "%s ", entry->types[i]);
  38. if (!ret)
  39. return TRACE_TYPE_PARTIAL_LINE;
  40. }
  41. /* parameter values */
  42. ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
  43. trace->args[i],
  44. i == entry->nb_args - 1 ? "" : ", ");
  45. if (!ret)
  46. return TRACE_TYPE_PARTIAL_LINE;
  47. }
  48. ret = trace_seq_putc(s, ')');
  49. if (!ret)
  50. return TRACE_TYPE_PARTIAL_LINE;
  51. end:
  52. ret = trace_seq_putc(s, '\n');
  53. if (!ret)
  54. return TRACE_TYPE_PARTIAL_LINE;
  55. return TRACE_TYPE_HANDLED;
  56. }
  57. enum print_line_t
  58. print_syscall_exit(struct trace_iterator *iter, int flags)
  59. {
  60. struct trace_seq *s = &iter->seq;
  61. struct trace_entry *ent = iter->ent;
  62. struct syscall_trace_exit *trace;
  63. int syscall;
  64. struct syscall_metadata *entry;
  65. int ret;
  66. trace = (typeof(trace))ent;
  67. syscall = trace->nr;
  68. entry = syscall_nr_to_meta(syscall);
  69. if (!entry) {
  70. trace_seq_printf(s, "\n");
  71. return TRACE_TYPE_HANDLED;
  72. }
  73. if (entry->exit_id != ent->type) {
  74. WARN_ON_ONCE(1);
  75. return TRACE_TYPE_UNHANDLED;
  76. }
  77. ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
  78. trace->ret);
  79. if (!ret)
  80. return TRACE_TYPE_PARTIAL_LINE;
  81. return TRACE_TYPE_HANDLED;
  82. }
  83. extern char *__bad_type_size(void);
  84. #define SYSCALL_FIELD(type, name) \
  85. sizeof(type) != sizeof(trace.name) ? \
  86. __bad_type_size() : \
  87. #type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
  88. int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
  89. {
  90. int i;
  91. int nr;
  92. int ret;
  93. struct syscall_metadata *entry;
  94. struct syscall_trace_enter trace;
  95. int offset = offsetof(struct syscall_trace_enter, args);
  96. nr = syscall_name_to_nr(call->data);
  97. entry = syscall_nr_to_meta(nr);
  98. if (!entry)
  99. return 0;
  100. ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
  101. SYSCALL_FIELD(int, nr));
  102. if (!ret)
  103. return 0;
  104. for (i = 0; i < entry->nb_args; i++) {
  105. ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
  106. entry->args[i]);
  107. if (!ret)
  108. return 0;
  109. ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
  110. sizeof(unsigned long));
  111. if (!ret)
  112. return 0;
  113. offset += sizeof(unsigned long);
  114. }
  115. trace_seq_puts(s, "\nprint fmt: \"");
  116. for (i = 0; i < entry->nb_args; i++) {
  117. ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
  118. sizeof(unsigned long),
  119. i == entry->nb_args - 1 ? "" : ", ");
  120. if (!ret)
  121. return 0;
  122. }
  123. trace_seq_putc(s, '"');
  124. for (i = 0; i < entry->nb_args; i++) {
  125. ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
  126. entry->args[i]);
  127. if (!ret)
  128. return 0;
  129. }
  130. return trace_seq_putc(s, '\n');
  131. }
  132. int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
  133. {
  134. int ret;
  135. struct syscall_trace_exit trace;
  136. ret = trace_seq_printf(s,
  137. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
  138. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
  139. SYSCALL_FIELD(int, nr),
  140. SYSCALL_FIELD(long, ret));
  141. if (!ret)
  142. return 0;
  143. return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
  144. }
  145. int syscall_enter_define_fields(struct ftrace_event_call *call)
  146. {
  147. struct syscall_trace_enter trace;
  148. struct syscall_metadata *meta;
  149. int ret;
  150. int nr;
  151. int i;
  152. int offset = offsetof(typeof(trace), args);
  153. nr = syscall_name_to_nr(call->data);
  154. meta = syscall_nr_to_meta(nr);
  155. if (!meta)
  156. return 0;
  157. ret = trace_define_common_fields(call);
  158. if (ret)
  159. return ret;
  160. for (i = 0; i < meta->nb_args; i++) {
  161. ret = trace_define_field(call, meta->types[i],
  162. meta->args[i], offset,
  163. sizeof(unsigned long), 0,
  164. FILTER_OTHER);
  165. offset += sizeof(unsigned long);
  166. }
  167. return ret;
  168. }
  169. int syscall_exit_define_fields(struct ftrace_event_call *call)
  170. {
  171. struct syscall_trace_exit trace;
  172. int ret;
  173. ret = trace_define_common_fields(call);
  174. if (ret)
  175. return ret;
  176. ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0,
  177. FILTER_OTHER);
  178. return ret;
  179. }
  180. void ftrace_syscall_enter(struct pt_regs *regs, long id)
  181. {
  182. struct syscall_trace_enter *entry;
  183. struct syscall_metadata *sys_data;
  184. struct ring_buffer_event *event;
  185. struct ring_buffer *buffer;
  186. int size;
  187. int syscall_nr;
  188. syscall_nr = syscall_get_nr(current, regs);
  189. if (syscall_nr < 0)
  190. return;
  191. if (!test_bit(syscall_nr, enabled_enter_syscalls))
  192. return;
  193. sys_data = syscall_nr_to_meta(syscall_nr);
  194. if (!sys_data)
  195. return;
  196. size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
  197. event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
  198. size, 0, 0);
  199. if (!event)
  200. return;
  201. entry = ring_buffer_event_data(event);
  202. entry->nr = syscall_nr;
  203. syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
  204. if (!filter_current_check_discard(buffer, sys_data->enter_event,
  205. entry, event))
  206. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  207. }
  208. void ftrace_syscall_exit(struct pt_regs *regs, long ret)
  209. {
  210. struct syscall_trace_exit *entry;
  211. struct syscall_metadata *sys_data;
  212. struct ring_buffer_event *event;
  213. struct ring_buffer *buffer;
  214. int syscall_nr;
  215. syscall_nr = syscall_get_nr(current, regs);
  216. if (syscall_nr < 0)
  217. return;
  218. if (!test_bit(syscall_nr, enabled_exit_syscalls))
  219. return;
  220. sys_data = syscall_nr_to_meta(syscall_nr);
  221. if (!sys_data)
  222. return;
  223. event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
  224. sizeof(*entry), 0, 0);
  225. if (!event)
  226. return;
  227. entry = ring_buffer_event_data(event);
  228. entry->nr = syscall_nr;
  229. entry->ret = syscall_get_return_value(current, regs);
  230. if (!filter_current_check_discard(buffer, sys_data->exit_event,
  231. entry, event))
  232. trace_current_buffer_unlock_commit(buffer, event, 0, 0);
  233. }
  234. int reg_event_syscall_enter(void *ptr)
  235. {
  236. int ret = 0;
  237. int num;
  238. char *name;
  239. name = (char *)ptr;
  240. num = syscall_name_to_nr(name);
  241. if (num < 0 || num >= NR_syscalls)
  242. return -ENOSYS;
  243. mutex_lock(&syscall_trace_lock);
  244. if (!sys_refcount_enter)
  245. ret = register_trace_sys_enter(ftrace_syscall_enter);
  246. if (ret) {
  247. pr_info("event trace: Could not activate"
  248. "syscall entry trace point");
  249. } else {
  250. set_bit(num, enabled_enter_syscalls);
  251. sys_refcount_enter++;
  252. }
  253. mutex_unlock(&syscall_trace_lock);
  254. return ret;
  255. }
  256. void unreg_event_syscall_enter(void *ptr)
  257. {
  258. int num;
  259. char *name;
  260. name = (char *)ptr;
  261. num = syscall_name_to_nr(name);
  262. if (num < 0 || num >= NR_syscalls)
  263. return;
  264. mutex_lock(&syscall_trace_lock);
  265. sys_refcount_enter--;
  266. clear_bit(num, enabled_enter_syscalls);
  267. if (!sys_refcount_enter)
  268. unregister_trace_sys_enter(ftrace_syscall_enter);
  269. mutex_unlock(&syscall_trace_lock);
  270. }
  271. int reg_event_syscall_exit(void *ptr)
  272. {
  273. int ret = 0;
  274. int num;
  275. char *name;
  276. name = (char *)ptr;
  277. num = syscall_name_to_nr(name);
  278. if (num < 0 || num >= NR_syscalls)
  279. return -ENOSYS;
  280. mutex_lock(&syscall_trace_lock);
  281. if (!sys_refcount_exit)
  282. ret = register_trace_sys_exit(ftrace_syscall_exit);
  283. if (ret) {
  284. pr_info("event trace: Could not activate"
  285. "syscall exit trace point");
  286. } else {
  287. set_bit(num, enabled_exit_syscalls);
  288. sys_refcount_exit++;
  289. }
  290. mutex_unlock(&syscall_trace_lock);
  291. return ret;
  292. }
  293. void unreg_event_syscall_exit(void *ptr)
  294. {
  295. int num;
  296. char *name;
  297. name = (char *)ptr;
  298. num = syscall_name_to_nr(name);
  299. if (num < 0 || num >= NR_syscalls)
  300. return;
  301. mutex_lock(&syscall_trace_lock);
  302. sys_refcount_exit--;
  303. clear_bit(num, enabled_exit_syscalls);
  304. if (!sys_refcount_exit)
  305. unregister_trace_sys_exit(ftrace_syscall_exit);
  306. mutex_unlock(&syscall_trace_lock);
  307. }
  308. struct trace_event event_syscall_enter = {
  309. .trace = print_syscall_enter,
  310. };
  311. struct trace_event event_syscall_exit = {
  312. .trace = print_syscall_exit,
  313. };
  314. #ifdef CONFIG_EVENT_PROFILE
  315. static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
  316. static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
  317. static int sys_prof_refcount_enter;
  318. static int sys_prof_refcount_exit;
  319. static void prof_syscall_enter(struct pt_regs *regs, long id)
  320. {
  321. struct syscall_metadata *sys_data;
  322. struct syscall_trace_enter *rec;
  323. unsigned long flags;
  324. char *raw_data;
  325. int syscall_nr;
  326. int size;
  327. int cpu;
  328. syscall_nr = syscall_get_nr(current, regs);
  329. if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
  330. return;
  331. sys_data = syscall_nr_to_meta(syscall_nr);
  332. if (!sys_data)
  333. return;
  334. /* get the size after alignment with the u32 buffer size field */
  335. size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
  336. size = ALIGN(size + sizeof(u32), sizeof(u64));
  337. size -= sizeof(u32);
  338. if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
  339. "profile buffer not large enough"))
  340. return;
  341. /* Protect the per cpu buffer, begin the rcu read side */
  342. local_irq_save(flags);
  343. cpu = smp_processor_id();
  344. if (in_nmi())
  345. raw_data = rcu_dereference(trace_profile_buf_nmi);
  346. else
  347. raw_data = rcu_dereference(trace_profile_buf);
  348. if (!raw_data)
  349. goto end;
  350. raw_data = per_cpu_ptr(raw_data, cpu);
  351. /* zero the dead bytes from align to not leak stack to user */
  352. *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
  353. rec = (struct syscall_trace_enter *) raw_data;
  354. tracing_generic_entry_update(&rec->ent, 0, 0);
  355. rec->ent.type = sys_data->enter_id;
  356. rec->nr = syscall_nr;
  357. syscall_get_arguments(current, regs, 0, sys_data->nb_args,
  358. (unsigned long *)&rec->args);
  359. perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
  360. end:
  361. local_irq_restore(flags);
  362. }
  363. int reg_prof_syscall_enter(char *name)
  364. {
  365. int ret = 0;
  366. int num;
  367. num = syscall_name_to_nr(name);
  368. if (num < 0 || num >= NR_syscalls)
  369. return -ENOSYS;
  370. mutex_lock(&syscall_trace_lock);
  371. if (!sys_prof_refcount_enter)
  372. ret = register_trace_sys_enter(prof_syscall_enter);
  373. if (ret) {
  374. pr_info("event trace: Could not activate"
  375. "syscall entry trace point");
  376. } else {
  377. set_bit(num, enabled_prof_enter_syscalls);
  378. sys_prof_refcount_enter++;
  379. }
  380. mutex_unlock(&syscall_trace_lock);
  381. return ret;
  382. }
  383. void unreg_prof_syscall_enter(char *name)
  384. {
  385. int num;
  386. num = syscall_name_to_nr(name);
  387. if (num < 0 || num >= NR_syscalls)
  388. return;
  389. mutex_lock(&syscall_trace_lock);
  390. sys_prof_refcount_enter--;
  391. clear_bit(num, enabled_prof_enter_syscalls);
  392. if (!sys_prof_refcount_enter)
  393. unregister_trace_sys_enter(prof_syscall_enter);
  394. mutex_unlock(&syscall_trace_lock);
  395. }
  396. static void prof_syscall_exit(struct pt_regs *regs, long ret)
  397. {
  398. struct syscall_metadata *sys_data;
  399. struct syscall_trace_exit *rec;
  400. unsigned long flags;
  401. int syscall_nr;
  402. char *raw_data;
  403. int size;
  404. int cpu;
  405. syscall_nr = syscall_get_nr(current, regs);
  406. if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
  407. return;
  408. sys_data = syscall_nr_to_meta(syscall_nr);
  409. if (!sys_data)
  410. return;
  411. /* We can probably do that at build time */
  412. size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
  413. size -= sizeof(u32);
  414. /*
  415. * Impossible, but be paranoid with the future
  416. * How to put this check outside runtime?
  417. */
  418. if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
  419. "exit event has grown above profile buffer size"))
  420. return;
  421. /* Protect the per cpu buffer, begin the rcu read side */
  422. local_irq_save(flags);
  423. cpu = smp_processor_id();
  424. if (in_nmi())
  425. raw_data = rcu_dereference(trace_profile_buf_nmi);
  426. else
  427. raw_data = rcu_dereference(trace_profile_buf);
  428. if (!raw_data)
  429. goto end;
  430. raw_data = per_cpu_ptr(raw_data, cpu);
  431. /* zero the dead bytes from align to not leak stack to user */
  432. *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
  433. rec = (struct syscall_trace_exit *)raw_data;
  434. tracing_generic_entry_update(&rec->ent, 0, 0);
  435. rec->ent.type = sys_data->exit_id;
  436. rec->nr = syscall_nr;
  437. rec->ret = syscall_get_return_value(current, regs);
  438. perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
  439. end:
  440. local_irq_restore(flags);
  441. }
  442. int reg_prof_syscall_exit(char *name)
  443. {
  444. int ret = 0;
  445. int num;
  446. num = syscall_name_to_nr(name);
  447. if (num < 0 || num >= NR_syscalls)
  448. return -ENOSYS;
  449. mutex_lock(&syscall_trace_lock);
  450. if (!sys_prof_refcount_exit)
  451. ret = register_trace_sys_exit(prof_syscall_exit);
  452. if (ret) {
  453. pr_info("event trace: Could not activate"
  454. "syscall entry trace point");
  455. } else {
  456. set_bit(num, enabled_prof_exit_syscalls);
  457. sys_prof_refcount_exit++;
  458. }
  459. mutex_unlock(&syscall_trace_lock);
  460. return ret;
  461. }
  462. void unreg_prof_syscall_exit(char *name)
  463. {
  464. int num;
  465. num = syscall_name_to_nr(name);
  466. if (num < 0 || num >= NR_syscalls)
  467. return;
  468. mutex_lock(&syscall_trace_lock);
  469. sys_prof_refcount_exit--;
  470. clear_bit(num, enabled_prof_exit_syscalls);
  471. if (!sys_prof_refcount_exit)
  472. unregister_trace_sys_exit(prof_syscall_exit);
  473. mutex_unlock(&syscall_trace_lock);
  474. }
  475. #endif