trace_syscalls.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. #include <trace/syscall.h>
  2. #include <linux/kernel.h>
  3. #include <linux/ftrace.h>
  4. #include <linux/perf_counter.h>
  5. #include <asm/syscall.h>
  6. #include "trace_output.h"
  7. #include "trace.h"
  8. static DEFINE_MUTEX(syscall_trace_lock);
  9. static int sys_refcount_enter;
  10. static int sys_refcount_exit;
  11. static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX);
  12. static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX);
  13. enum print_line_t
  14. print_syscall_enter(struct trace_iterator *iter, int flags)
  15. {
  16. struct trace_seq *s = &iter->seq;
  17. struct trace_entry *ent = iter->ent;
  18. struct syscall_trace_enter *trace;
  19. struct syscall_metadata *entry;
  20. int i, ret, syscall;
  21. trace = (typeof(trace))ent;
  22. syscall = trace->nr;
  23. entry = syscall_nr_to_meta(syscall);
  24. if (!entry)
  25. goto end;
  26. if (entry->enter_id != ent->type) {
  27. WARN_ON_ONCE(1);
  28. goto end;
  29. }
  30. ret = trace_seq_printf(s, "%s(", entry->name);
  31. if (!ret)
  32. return TRACE_TYPE_PARTIAL_LINE;
  33. for (i = 0; i < entry->nb_args; i++) {
  34. /* parameter types */
  35. if (trace_flags & TRACE_ITER_VERBOSE) {
  36. ret = trace_seq_printf(s, "%s ", entry->types[i]);
  37. if (!ret)
  38. return TRACE_TYPE_PARTIAL_LINE;
  39. }
  40. /* parameter values */
  41. ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
  42. trace->args[i],
  43. i == entry->nb_args - 1 ? "" : ", ");
  44. if (!ret)
  45. return TRACE_TYPE_PARTIAL_LINE;
  46. }
  47. ret = trace_seq_putc(s, ')');
  48. if (!ret)
  49. return TRACE_TYPE_PARTIAL_LINE;
  50. end:
  51. ret = trace_seq_putc(s, '\n');
  52. if (!ret)
  53. return TRACE_TYPE_PARTIAL_LINE;
  54. return TRACE_TYPE_HANDLED;
  55. }
  56. enum print_line_t
  57. print_syscall_exit(struct trace_iterator *iter, int flags)
  58. {
  59. struct trace_seq *s = &iter->seq;
  60. struct trace_entry *ent = iter->ent;
  61. struct syscall_trace_exit *trace;
  62. int syscall;
  63. struct syscall_metadata *entry;
  64. int ret;
  65. trace = (typeof(trace))ent;
  66. syscall = trace->nr;
  67. entry = syscall_nr_to_meta(syscall);
  68. if (!entry) {
  69. trace_seq_printf(s, "\n");
  70. return TRACE_TYPE_HANDLED;
  71. }
  72. if (entry->exit_id != ent->type) {
  73. WARN_ON_ONCE(1);
  74. return TRACE_TYPE_UNHANDLED;
  75. }
  76. ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
  77. trace->ret);
  78. if (!ret)
  79. return TRACE_TYPE_PARTIAL_LINE;
  80. return TRACE_TYPE_HANDLED;
  81. }
  82. extern char *__bad_type_size(void);
  83. #define SYSCALL_FIELD(type, name) \
  84. sizeof(type) != sizeof(trace.name) ? \
  85. __bad_type_size() : \
  86. #type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
  87. int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
  88. {
  89. int i;
  90. int nr;
  91. int ret;
  92. struct syscall_metadata *entry;
  93. struct syscall_trace_enter trace;
  94. int offset = offsetof(struct syscall_trace_enter, args);
  95. nr = syscall_name_to_nr(call->data);
  96. entry = syscall_nr_to_meta(nr);
  97. if (!entry)
  98. return 0;
  99. ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
  100. SYSCALL_FIELD(int, nr));
  101. if (!ret)
  102. return 0;
  103. for (i = 0; i < entry->nb_args; i++) {
  104. ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
  105. entry->args[i]);
  106. if (!ret)
  107. return 0;
  108. ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
  109. sizeof(unsigned long));
  110. if (!ret)
  111. return 0;
  112. offset += sizeof(unsigned long);
  113. }
  114. trace_seq_puts(s, "\nprint fmt: \"");
  115. for (i = 0; i < entry->nb_args; i++) {
  116. ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
  117. sizeof(unsigned long),
  118. i == entry->nb_args - 1 ? "" : ", ");
  119. if (!ret)
  120. return 0;
  121. }
  122. trace_seq_putc(s, '"');
  123. for (i = 0; i < entry->nb_args; i++) {
  124. ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
  125. entry->args[i]);
  126. if (!ret)
  127. return 0;
  128. }
  129. return trace_seq_putc(s, '\n');
  130. }
  131. int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
  132. {
  133. int ret;
  134. struct syscall_trace_exit trace;
  135. ret = trace_seq_printf(s,
  136. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
  137. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
  138. SYSCALL_FIELD(int, nr),
  139. SYSCALL_FIELD(unsigned long, ret));
  140. if (!ret)
  141. return 0;
  142. return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
  143. }
  144. int syscall_enter_define_fields(struct ftrace_event_call *call)
  145. {
  146. struct syscall_trace_enter trace;
  147. struct syscall_metadata *meta;
  148. int ret;
  149. int nr;
  150. int i;
  151. int offset = offsetof(typeof(trace), args);
  152. nr = syscall_name_to_nr(call->data);
  153. meta = syscall_nr_to_meta(nr);
  154. if (!meta)
  155. return 0;
  156. ret = trace_define_common_fields(call);
  157. if (ret)
  158. return ret;
  159. for (i = 0; i < meta->nb_args; i++) {
  160. ret = trace_define_field(call, meta->types[i],
  161. meta->args[i], offset,
  162. sizeof(unsigned long), 0,
  163. FILTER_OTHER);
  164. offset += sizeof(unsigned long);
  165. }
  166. return ret;
  167. }
  168. int syscall_exit_define_fields(struct ftrace_event_call *call)
  169. {
  170. struct syscall_trace_exit trace;
  171. int ret;
  172. ret = trace_define_common_fields(call);
  173. if (ret)
  174. return ret;
  175. ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0,
  176. FILTER_OTHER);
  177. return ret;
  178. }
  179. void ftrace_syscall_enter(struct pt_regs *regs, long id)
  180. {
  181. struct syscall_trace_enter *entry;
  182. struct syscall_metadata *sys_data;
  183. struct ring_buffer_event *event;
  184. int size;
  185. int syscall_nr;
  186. syscall_nr = syscall_get_nr(current, regs);
  187. if (!test_bit(syscall_nr, enabled_enter_syscalls))
  188. return;
  189. sys_data = syscall_nr_to_meta(syscall_nr);
  190. if (!sys_data)
  191. return;
  192. size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
  193. event = trace_current_buffer_lock_reserve(sys_data->enter_id, size,
  194. 0, 0);
  195. if (!event)
  196. return;
  197. entry = ring_buffer_event_data(event);
  198. entry->nr = syscall_nr;
  199. syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
  200. if (!filter_current_check_discard(sys_data->enter_event, entry, event))
  201. trace_current_buffer_unlock_commit(event, 0, 0);
  202. }
  203. void ftrace_syscall_exit(struct pt_regs *regs, long ret)
  204. {
  205. struct syscall_trace_exit *entry;
  206. struct syscall_metadata *sys_data;
  207. struct ring_buffer_event *event;
  208. int syscall_nr;
  209. syscall_nr = syscall_get_nr(current, regs);
  210. if (!test_bit(syscall_nr, enabled_exit_syscalls))
  211. return;
  212. sys_data = syscall_nr_to_meta(syscall_nr);
  213. if (!sys_data)
  214. return;
  215. event = trace_current_buffer_lock_reserve(sys_data->exit_id,
  216. sizeof(*entry), 0, 0);
  217. if (!event)
  218. return;
  219. entry = ring_buffer_event_data(event);
  220. entry->nr = syscall_nr;
  221. entry->ret = syscall_get_return_value(current, regs);
  222. if (!filter_current_check_discard(sys_data->exit_event, entry, event))
  223. trace_current_buffer_unlock_commit(event, 0, 0);
  224. }
  225. int reg_event_syscall_enter(void *ptr)
  226. {
  227. int ret = 0;
  228. int num;
  229. char *name;
  230. name = (char *)ptr;
  231. num = syscall_name_to_nr(name);
  232. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  233. return -ENOSYS;
  234. mutex_lock(&syscall_trace_lock);
  235. if (!sys_refcount_enter)
  236. ret = register_trace_syscall_enter(ftrace_syscall_enter);
  237. if (ret) {
  238. pr_info("event trace: Could not activate"
  239. "syscall entry trace point");
  240. } else {
  241. set_bit(num, enabled_enter_syscalls);
  242. sys_refcount_enter++;
  243. }
  244. mutex_unlock(&syscall_trace_lock);
  245. return ret;
  246. }
  247. void unreg_event_syscall_enter(void *ptr)
  248. {
  249. int num;
  250. char *name;
  251. name = (char *)ptr;
  252. num = syscall_name_to_nr(name);
  253. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  254. return;
  255. mutex_lock(&syscall_trace_lock);
  256. sys_refcount_enter--;
  257. clear_bit(num, enabled_enter_syscalls);
  258. if (!sys_refcount_enter)
  259. unregister_trace_syscall_enter(ftrace_syscall_enter);
  260. mutex_unlock(&syscall_trace_lock);
  261. }
  262. int reg_event_syscall_exit(void *ptr)
  263. {
  264. int ret = 0;
  265. int num;
  266. char *name;
  267. name = (char *)ptr;
  268. num = syscall_name_to_nr(name);
  269. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  270. return -ENOSYS;
  271. mutex_lock(&syscall_trace_lock);
  272. if (!sys_refcount_exit)
  273. ret = register_trace_syscall_exit(ftrace_syscall_exit);
  274. if (ret) {
  275. pr_info("event trace: Could not activate"
  276. "syscall exit trace point");
  277. } else {
  278. set_bit(num, enabled_exit_syscalls);
  279. sys_refcount_exit++;
  280. }
  281. mutex_unlock(&syscall_trace_lock);
  282. return ret;
  283. }
  284. void unreg_event_syscall_exit(void *ptr)
  285. {
  286. int num;
  287. char *name;
  288. name = (char *)ptr;
  289. num = syscall_name_to_nr(name);
  290. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  291. return;
  292. mutex_lock(&syscall_trace_lock);
  293. sys_refcount_exit--;
  294. clear_bit(num, enabled_exit_syscalls);
  295. if (!sys_refcount_exit)
  296. unregister_trace_syscall_exit(ftrace_syscall_exit);
  297. mutex_unlock(&syscall_trace_lock);
  298. }
  299. struct trace_event event_syscall_enter = {
  300. .trace = print_syscall_enter,
  301. };
  302. struct trace_event event_syscall_exit = {
  303. .trace = print_syscall_exit,
  304. };
  305. #ifdef CONFIG_EVENT_PROFILE
  306. static DECLARE_BITMAP(enabled_prof_enter_syscalls, FTRACE_SYSCALL_MAX);
  307. static DECLARE_BITMAP(enabled_prof_exit_syscalls, FTRACE_SYSCALL_MAX);
  308. static int sys_prof_refcount_enter;
  309. static int sys_prof_refcount_exit;
  310. static void prof_syscall_enter(struct pt_regs *regs, long id)
  311. {
  312. struct syscall_trace_enter *rec;
  313. struct syscall_metadata *sys_data;
  314. int syscall_nr;
  315. int size;
  316. syscall_nr = syscall_get_nr(current, regs);
  317. if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
  318. return;
  319. sys_data = syscall_nr_to_meta(syscall_nr);
  320. if (!sys_data)
  321. return;
  322. /* get the size after alignment with the u32 buffer size field */
  323. size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
  324. size = ALIGN(size + sizeof(u32), sizeof(u64));
  325. size -= sizeof(u32);
  326. do {
  327. char raw_data[size];
  328. /* zero the dead bytes from align to not leak stack to user */
  329. *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
  330. rec = (struct syscall_trace_enter *) raw_data;
  331. tracing_generic_entry_update(&rec->ent, 0, 0);
  332. rec->ent.type = sys_data->enter_id;
  333. rec->nr = syscall_nr;
  334. syscall_get_arguments(current, regs, 0, sys_data->nb_args,
  335. (unsigned long *)&rec->args);
  336. perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
  337. } while(0);
  338. }
  339. int reg_prof_syscall_enter(char *name)
  340. {
  341. int ret = 0;
  342. int num;
  343. num = syscall_name_to_nr(name);
  344. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  345. return -ENOSYS;
  346. mutex_lock(&syscall_trace_lock);
  347. if (!sys_prof_refcount_enter)
  348. ret = register_trace_syscall_enter(prof_syscall_enter);
  349. if (ret) {
  350. pr_info("event trace: Could not activate"
  351. "syscall entry trace point");
  352. } else {
  353. set_bit(num, enabled_prof_enter_syscalls);
  354. sys_prof_refcount_enter++;
  355. }
  356. mutex_unlock(&syscall_trace_lock);
  357. return ret;
  358. }
  359. void unreg_prof_syscall_enter(char *name)
  360. {
  361. int num;
  362. num = syscall_name_to_nr(name);
  363. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  364. return;
  365. mutex_lock(&syscall_trace_lock);
  366. sys_prof_refcount_enter--;
  367. clear_bit(num, enabled_prof_enter_syscalls);
  368. if (!sys_prof_refcount_enter)
  369. unregister_trace_syscall_enter(prof_syscall_enter);
  370. mutex_unlock(&syscall_trace_lock);
  371. }
  372. static void prof_syscall_exit(struct pt_regs *regs, long ret)
  373. {
  374. struct syscall_metadata *sys_data;
  375. struct syscall_trace_exit rec;
  376. int syscall_nr;
  377. syscall_nr = syscall_get_nr(current, regs);
  378. if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
  379. return;
  380. sys_data = syscall_nr_to_meta(syscall_nr);
  381. if (!sys_data)
  382. return;
  383. tracing_generic_entry_update(&rec.ent, 0, 0);
  384. rec.ent.type = sys_data->exit_id;
  385. rec.nr = syscall_nr;
  386. rec.ret = syscall_get_return_value(current, regs);
  387. perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
  388. }
  389. int reg_prof_syscall_exit(char *name)
  390. {
  391. int ret = 0;
  392. int num;
  393. num = syscall_name_to_nr(name);
  394. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  395. return -ENOSYS;
  396. mutex_lock(&syscall_trace_lock);
  397. if (!sys_prof_refcount_exit)
  398. ret = register_trace_syscall_exit(prof_syscall_exit);
  399. if (ret) {
  400. pr_info("event trace: Could not activate"
  401. "syscall entry trace point");
  402. } else {
  403. set_bit(num, enabled_prof_exit_syscalls);
  404. sys_prof_refcount_exit++;
  405. }
  406. mutex_unlock(&syscall_trace_lock);
  407. return ret;
  408. }
  409. void unreg_prof_syscall_exit(char *name)
  410. {
  411. int num;
  412. num = syscall_name_to_nr(name);
  413. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  414. return;
  415. mutex_lock(&syscall_trace_lock);
  416. sys_prof_refcount_exit--;
  417. clear_bit(num, enabled_prof_exit_syscalls);
  418. if (!sys_prof_refcount_exit)
  419. unregister_trace_syscall_exit(prof_syscall_exit);
  420. mutex_unlock(&syscall_trace_lock);
  421. }
  422. #endif