trace_syscalls.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. #include <trace/syscall.h>
  2. #include <linux/kernel.h>
  3. #include <linux/ftrace.h>
  4. #include <linux/perf_counter.h>
  5. #include <asm/syscall.h>
  6. #include "trace_output.h"
  7. #include "trace.h"
  8. static DEFINE_MUTEX(syscall_trace_lock);
  9. static int sys_refcount_enter;
  10. static int sys_refcount_exit;
  11. static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX);
  12. static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX);
  13. enum print_line_t
  14. print_syscall_enter(struct trace_iterator *iter, int flags)
  15. {
  16. struct trace_seq *s = &iter->seq;
  17. struct trace_entry *ent = iter->ent;
  18. struct syscall_trace_enter *trace;
  19. struct syscall_metadata *entry;
  20. int i, ret, syscall;
  21. trace = (typeof(trace))ent;
  22. syscall = trace->nr;
  23. entry = syscall_nr_to_meta(syscall);
  24. if (!entry)
  25. goto end;
  26. if (entry->enter_id != ent->type) {
  27. WARN_ON_ONCE(1);
  28. goto end;
  29. }
  30. ret = trace_seq_printf(s, "%s(", entry->name);
  31. if (!ret)
  32. return TRACE_TYPE_PARTIAL_LINE;
  33. for (i = 0; i < entry->nb_args; i++) {
  34. /* parameter types */
  35. if (trace_flags & TRACE_ITER_VERBOSE) {
  36. ret = trace_seq_printf(s, "%s ", entry->types[i]);
  37. if (!ret)
  38. return TRACE_TYPE_PARTIAL_LINE;
  39. }
  40. /* parameter values */
  41. ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i],
  42. trace->args[i],
  43. i == entry->nb_args - 1 ? ")" : ",");
  44. if (!ret)
  45. return TRACE_TYPE_PARTIAL_LINE;
  46. }
  47. end:
  48. trace_seq_printf(s, "\n");
  49. return TRACE_TYPE_HANDLED;
  50. }
  51. enum print_line_t
  52. print_syscall_exit(struct trace_iterator *iter, int flags)
  53. {
  54. struct trace_seq *s = &iter->seq;
  55. struct trace_entry *ent = iter->ent;
  56. struct syscall_trace_exit *trace;
  57. int syscall;
  58. struct syscall_metadata *entry;
  59. int ret;
  60. trace = (typeof(trace))ent;
  61. syscall = trace->nr;
  62. entry = syscall_nr_to_meta(syscall);
  63. if (!entry) {
  64. trace_seq_printf(s, "\n");
  65. return TRACE_TYPE_HANDLED;
  66. }
  67. if (entry->exit_id != ent->type) {
  68. WARN_ON_ONCE(1);
  69. return TRACE_TYPE_UNHANDLED;
  70. }
  71. ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
  72. trace->ret);
  73. if (!ret)
  74. return TRACE_TYPE_PARTIAL_LINE;
  75. return TRACE_TYPE_HANDLED;
  76. }
  77. extern char *__bad_type_size(void);
  78. #define SYSCALL_FIELD(type, name) \
  79. sizeof(type) != sizeof(trace.name) ? \
  80. __bad_type_size() : \
  81. #type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
  82. int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
  83. {
  84. int i;
  85. int nr;
  86. int ret;
  87. struct syscall_metadata *entry;
  88. struct syscall_trace_enter trace;
  89. int offset = offsetof(struct syscall_trace_enter, args);
  90. nr = syscall_name_to_nr(call->data);
  91. entry = syscall_nr_to_meta(nr);
  92. if (!entry)
  93. return 0;
  94. ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
  95. SYSCALL_FIELD(int, nr));
  96. if (!ret)
  97. return 0;
  98. for (i = 0; i < entry->nb_args; i++) {
  99. ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
  100. entry->args[i]);
  101. if (!ret)
  102. return 0;
  103. ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
  104. sizeof(unsigned long));
  105. if (!ret)
  106. return 0;
  107. offset += sizeof(unsigned long);
  108. }
  109. trace_seq_printf(s, "\nprint fmt: \"");
  110. for (i = 0; i < entry->nb_args; i++) {
  111. ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
  112. sizeof(unsigned long),
  113. i == entry->nb_args - 1 ? "\", " : ", ");
  114. if (!ret)
  115. return 0;
  116. }
  117. for (i = 0; i < entry->nb_args; i++) {
  118. ret = trace_seq_printf(s, "((unsigned long)(REC->%s))%s",
  119. entry->args[i],
  120. i == entry->nb_args - 1 ? "\n" : ", ");
  121. if (!ret)
  122. return 0;
  123. }
  124. return ret;
  125. }
  126. int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
  127. {
  128. int ret;
  129. struct syscall_trace_exit trace;
  130. ret = trace_seq_printf(s,
  131. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
  132. "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
  133. SYSCALL_FIELD(int, nr),
  134. SYSCALL_FIELD(unsigned long, ret));
  135. if (!ret)
  136. return 0;
  137. return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
  138. }
  139. int syscall_enter_define_fields(struct ftrace_event_call *call)
  140. {
  141. struct syscall_trace_enter trace;
  142. struct syscall_metadata *meta;
  143. int ret;
  144. int nr;
  145. int i;
  146. int offset = offsetof(typeof(trace), args);
  147. nr = syscall_name_to_nr(call->data);
  148. meta = syscall_nr_to_meta(nr);
  149. if (!meta)
  150. return 0;
  151. ret = trace_define_common_fields(call);
  152. if (ret)
  153. return ret;
  154. for (i = 0; i < meta->nb_args; i++) {
  155. ret = trace_define_field(call, meta->types[i],
  156. meta->args[i], offset,
  157. sizeof(unsigned long), 0);
  158. offset += sizeof(unsigned long);
  159. }
  160. return ret;
  161. }
  162. int syscall_exit_define_fields(struct ftrace_event_call *call)
  163. {
  164. struct syscall_trace_exit trace;
  165. int ret;
  166. ret = trace_define_common_fields(call);
  167. if (ret)
  168. return ret;
  169. ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0);
  170. return ret;
  171. }
  172. void ftrace_syscall_enter(struct pt_regs *regs, long id)
  173. {
  174. struct syscall_trace_enter *entry;
  175. struct syscall_metadata *sys_data;
  176. struct ring_buffer_event *event;
  177. int size;
  178. int syscall_nr;
  179. syscall_nr = syscall_get_nr(current, regs);
  180. if (!test_bit(syscall_nr, enabled_enter_syscalls))
  181. return;
  182. sys_data = syscall_nr_to_meta(syscall_nr);
  183. if (!sys_data)
  184. return;
  185. size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
  186. event = trace_current_buffer_lock_reserve(sys_data->enter_id, size,
  187. 0, 0);
  188. if (!event)
  189. return;
  190. entry = ring_buffer_event_data(event);
  191. entry->nr = syscall_nr;
  192. syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
  193. if (!filter_current_check_discard(sys_data->enter_event, entry, event))
  194. trace_current_buffer_unlock_commit(event, 0, 0);
  195. }
  196. void ftrace_syscall_exit(struct pt_regs *regs, long ret)
  197. {
  198. struct syscall_trace_exit *entry;
  199. struct syscall_metadata *sys_data;
  200. struct ring_buffer_event *event;
  201. int syscall_nr;
  202. syscall_nr = syscall_get_nr(current, regs);
  203. if (!test_bit(syscall_nr, enabled_exit_syscalls))
  204. return;
  205. sys_data = syscall_nr_to_meta(syscall_nr);
  206. if (!sys_data)
  207. return;
  208. event = trace_current_buffer_lock_reserve(sys_data->exit_id,
  209. sizeof(*entry), 0, 0);
  210. if (!event)
  211. return;
  212. entry = ring_buffer_event_data(event);
  213. entry->nr = syscall_nr;
  214. entry->ret = syscall_get_return_value(current, regs);
  215. if (!filter_current_check_discard(sys_data->exit_event, entry, event))
  216. trace_current_buffer_unlock_commit(event, 0, 0);
  217. }
  218. int reg_event_syscall_enter(void *ptr)
  219. {
  220. int ret = 0;
  221. int num;
  222. char *name;
  223. name = (char *)ptr;
  224. num = syscall_name_to_nr(name);
  225. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  226. return -ENOSYS;
  227. mutex_lock(&syscall_trace_lock);
  228. if (!sys_refcount_enter)
  229. ret = register_trace_syscall_enter(ftrace_syscall_enter);
  230. if (ret) {
  231. pr_info("event trace: Could not activate"
  232. "syscall entry trace point");
  233. } else {
  234. set_bit(num, enabled_enter_syscalls);
  235. sys_refcount_enter++;
  236. }
  237. mutex_unlock(&syscall_trace_lock);
  238. return ret;
  239. }
  240. void unreg_event_syscall_enter(void *ptr)
  241. {
  242. int num;
  243. char *name;
  244. name = (char *)ptr;
  245. num = syscall_name_to_nr(name);
  246. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  247. return;
  248. mutex_lock(&syscall_trace_lock);
  249. sys_refcount_enter--;
  250. clear_bit(num, enabled_enter_syscalls);
  251. if (!sys_refcount_enter)
  252. unregister_trace_syscall_enter(ftrace_syscall_enter);
  253. mutex_unlock(&syscall_trace_lock);
  254. }
  255. int reg_event_syscall_exit(void *ptr)
  256. {
  257. int ret = 0;
  258. int num;
  259. char *name;
  260. name = (char *)ptr;
  261. num = syscall_name_to_nr(name);
  262. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  263. return -ENOSYS;
  264. mutex_lock(&syscall_trace_lock);
  265. if (!sys_refcount_exit)
  266. ret = register_trace_syscall_exit(ftrace_syscall_exit);
  267. if (ret) {
  268. pr_info("event trace: Could not activate"
  269. "syscall exit trace point");
  270. } else {
  271. set_bit(num, enabled_exit_syscalls);
  272. sys_refcount_exit++;
  273. }
  274. mutex_unlock(&syscall_trace_lock);
  275. return ret;
  276. }
  277. void unreg_event_syscall_exit(void *ptr)
  278. {
  279. int num;
  280. char *name;
  281. name = (char *)ptr;
  282. num = syscall_name_to_nr(name);
  283. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  284. return;
  285. mutex_lock(&syscall_trace_lock);
  286. sys_refcount_exit--;
  287. clear_bit(num, enabled_exit_syscalls);
  288. if (!sys_refcount_exit)
  289. unregister_trace_syscall_exit(ftrace_syscall_exit);
  290. mutex_unlock(&syscall_trace_lock);
  291. }
  292. struct trace_event event_syscall_enter = {
  293. .trace = print_syscall_enter,
  294. };
  295. struct trace_event event_syscall_exit = {
  296. .trace = print_syscall_exit,
  297. };
  298. #ifdef CONFIG_EVENT_PROFILE
  299. static DECLARE_BITMAP(enabled_prof_enter_syscalls, FTRACE_SYSCALL_MAX);
  300. static DECLARE_BITMAP(enabled_prof_exit_syscalls, FTRACE_SYSCALL_MAX);
  301. static int sys_prof_refcount_enter;
  302. static int sys_prof_refcount_exit;
  303. static void prof_syscall_enter(struct pt_regs *regs, long id)
  304. {
  305. struct syscall_trace_enter *rec;
  306. struct syscall_metadata *sys_data;
  307. int syscall_nr;
  308. int size;
  309. syscall_nr = syscall_get_nr(current, regs);
  310. if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
  311. return;
  312. sys_data = syscall_nr_to_meta(syscall_nr);
  313. if (!sys_data)
  314. return;
  315. /* get the size after alignment with the u32 buffer size field */
  316. size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
  317. size = ALIGN(size + sizeof(u32), sizeof(u64));
  318. size -= sizeof(u32);
  319. do {
  320. char raw_data[size];
  321. /* zero the dead bytes from align to not leak stack to user */
  322. *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
  323. rec = (struct syscall_trace_enter *) raw_data;
  324. tracing_generic_entry_update(&rec->ent, 0, 0);
  325. rec->ent.type = sys_data->enter_id;
  326. rec->nr = syscall_nr;
  327. syscall_get_arguments(current, regs, 0, sys_data->nb_args,
  328. (unsigned long *)&rec->args);
  329. perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
  330. } while(0);
  331. }
  332. int reg_prof_syscall_enter(char *name)
  333. {
  334. int ret = 0;
  335. int num;
  336. num = syscall_name_to_nr(name);
  337. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  338. return -ENOSYS;
  339. mutex_lock(&syscall_trace_lock);
  340. if (!sys_prof_refcount_enter)
  341. ret = register_trace_syscall_enter(prof_syscall_enter);
  342. if (ret) {
  343. pr_info("event trace: Could not activate"
  344. "syscall entry trace point");
  345. } else {
  346. set_bit(num, enabled_prof_enter_syscalls);
  347. sys_prof_refcount_enter++;
  348. }
  349. mutex_unlock(&syscall_trace_lock);
  350. return ret;
  351. }
  352. void unreg_prof_syscall_enter(char *name)
  353. {
  354. int num;
  355. num = syscall_name_to_nr(name);
  356. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  357. return;
  358. mutex_lock(&syscall_trace_lock);
  359. sys_prof_refcount_enter--;
  360. clear_bit(num, enabled_prof_enter_syscalls);
  361. if (!sys_prof_refcount_enter)
  362. unregister_trace_syscall_enter(prof_syscall_enter);
  363. mutex_unlock(&syscall_trace_lock);
  364. }
  365. static void prof_syscall_exit(struct pt_regs *regs, long ret)
  366. {
  367. struct syscall_metadata *sys_data;
  368. struct syscall_trace_exit rec;
  369. int syscall_nr;
  370. syscall_nr = syscall_get_nr(current, regs);
  371. if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
  372. return;
  373. sys_data = syscall_nr_to_meta(syscall_nr);
  374. if (!sys_data)
  375. return;
  376. tracing_generic_entry_update(&rec.ent, 0, 0);
  377. rec.ent.type = sys_data->exit_id;
  378. rec.nr = syscall_nr;
  379. rec.ret = syscall_get_return_value(current, regs);
  380. perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
  381. }
  382. int reg_prof_syscall_exit(char *name)
  383. {
  384. int ret = 0;
  385. int num;
  386. num = syscall_name_to_nr(name);
  387. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  388. return -ENOSYS;
  389. mutex_lock(&syscall_trace_lock);
  390. if (!sys_prof_refcount_exit)
  391. ret = register_trace_syscall_exit(prof_syscall_exit);
  392. if (ret) {
  393. pr_info("event trace: Could not activate"
  394. "syscall entry trace point");
  395. } else {
  396. set_bit(num, enabled_prof_exit_syscalls);
  397. sys_prof_refcount_exit++;
  398. }
  399. mutex_unlock(&syscall_trace_lock);
  400. return ret;
  401. }
  402. void unreg_prof_syscall_exit(char *name)
  403. {
  404. int num;
  405. num = syscall_name_to_nr(name);
  406. if (num < 0 || num >= FTRACE_SYSCALL_MAX)
  407. return;
  408. mutex_lock(&syscall_trace_lock);
  409. sys_prof_refcount_exit--;
  410. clear_bit(num, enabled_prof_exit_syscalls);
  411. if (!sys_prof_refcount_exit)
  412. unregister_trace_syscall_exit(prof_syscall_exit);
  413. mutex_unlock(&syscall_trace_lock);
  414. }
  415. #endif