ftrace.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. /*
  2. * Code for replacing ftrace calls with jumps.
  3. *
  4. * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
  5. *
  6. * Thanks goes to Ingo Molnar, for suggesting the idea.
  7. * Mathieu Desnoyers, for suggesting postponing the modifications.
  8. * Arjan van de Ven, for keeping me straight, and explaining to me
  9. * the dangers of modifying code on the run.
  10. */
  11. #include <linux/spinlock.h>
  12. #include <linux/hardirq.h>
  13. #include <linux/uaccess.h>
  14. #include <linux/ftrace.h>
  15. #include <linux/percpu.h>
  16. #include <linux/sched.h>
  17. #include <linux/init.h>
  18. #include <linux/list.h>
  19. #include <asm/cacheflush.h>
  20. #include <asm/ftrace.h>
  21. #include <linux/ftrace.h>
  22. #include <asm/nops.h>
  23. #include <asm/nmi.h>
  24. #ifdef CONFIG_DYNAMIC_FTRACE
  25. int ftrace_arch_code_modify_prepare(void)
  26. {
  27. set_kernel_text_rw();
  28. return 0;
  29. }
  30. int ftrace_arch_code_modify_post_process(void)
  31. {
  32. set_kernel_text_ro();
  33. return 0;
  34. }
  35. union ftrace_code_union {
  36. char code[MCOUNT_INSN_SIZE];
  37. struct {
  38. char e8;
  39. int offset;
  40. } __attribute__((packed));
  41. };
  42. static int ftrace_calc_offset(long ip, long addr)
  43. {
  44. return (int)(addr - ip);
  45. }
  46. static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  47. {
  48. static union ftrace_code_union calc;
  49. calc.e8 = 0xe8;
  50. calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
  51. /*
  52. * No locking needed, this must be called via kstop_machine
  53. * which in essence is like running on a uniprocessor machine.
  54. */
  55. return calc.code;
  56. }
  57. /*
  58. * Modifying code must take extra care. On an SMP machine, if
  59. * the code being modified is also being executed on another CPU
  60. * that CPU will have undefined results and possibly take a GPF.
  61. * We use kstop_machine to stop other CPUS from exectuing code.
  62. * But this does not stop NMIs from happening. We still need
  63. * to protect against that. We separate out the modification of
  64. * the code to take care of this.
  65. *
  66. * Two buffers are added: An IP buffer and a "code" buffer.
  67. *
  68. * 1) Put the instruction pointer into the IP buffer
  69. * and the new code into the "code" buffer.
  70. * 2) Set a flag that says we are modifying code
  71. * 3) Wait for any running NMIs to finish.
  72. * 4) Write the code
  73. * 5) clear the flag.
  74. * 6) Wait for any running NMIs to finish.
  75. *
  76. * If an NMI is executed, the first thing it does is to call
  77. * "ftrace_nmi_enter". This will check if the flag is set to write
  78. * and if it is, it will write what is in the IP and "code" buffers.
  79. *
  80. * The trick is, it does not matter if everyone is writing the same
  81. * content to the code location. Also, if a CPU is executing code
  82. * it is OK to write to that code location if the contents being written
  83. * are the same as what exists.
  84. */
  85. static atomic_t nmi_running = ATOMIC_INIT(0);
  86. static int mod_code_status; /* holds return value of text write */
  87. static int mod_code_write; /* set when NMI should do the write */
  88. static void *mod_code_ip; /* holds the IP to write to */
  89. static void *mod_code_newcode; /* holds the text to write to the IP */
  90. static unsigned nmi_wait_count;
  91. static atomic_t nmi_update_count = ATOMIC_INIT(0);
  92. int ftrace_arch_read_dyn_info(char *buf, int size)
  93. {
  94. int r;
  95. r = snprintf(buf, size, "%u %u",
  96. nmi_wait_count,
  97. atomic_read(&nmi_update_count));
  98. return r;
  99. }
  100. static void ftrace_mod_code(void)
  101. {
  102. /*
  103. * Yes, more than one CPU process can be writing to mod_code_status.
  104. * (and the code itself)
  105. * But if one were to fail, then they all should, and if one were
  106. * to succeed, then they all should.
  107. */
  108. mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
  109. MCOUNT_INSN_SIZE);
  110. /* if we fail, then kill any new writers */
  111. if (mod_code_status)
  112. mod_code_write = 0;
  113. }
  114. void ftrace_nmi_enter(void)
  115. {
  116. atomic_inc(&nmi_running);
  117. /* Must have nmi_running seen before reading write flag */
  118. smp_mb();
  119. if (mod_code_write) {
  120. ftrace_mod_code();
  121. atomic_inc(&nmi_update_count);
  122. }
  123. }
  124. void ftrace_nmi_exit(void)
  125. {
  126. /* Finish all executions before clearing nmi_running */
  127. smp_wmb();
  128. atomic_dec(&nmi_running);
  129. }
  130. static void wait_for_nmi(void)
  131. {
  132. if (!atomic_read(&nmi_running))
  133. return;
  134. do {
  135. cpu_relax();
  136. } while (atomic_read(&nmi_running));
  137. nmi_wait_count++;
  138. }
  139. static int
  140. do_ftrace_mod_code(unsigned long ip, void *new_code)
  141. {
  142. mod_code_ip = (void *)ip;
  143. mod_code_newcode = new_code;
  144. /* The buffers need to be visible before we let NMIs write them */
  145. smp_wmb();
  146. mod_code_write = 1;
  147. /* Make sure write bit is visible before we wait on NMIs */
  148. smp_mb();
  149. wait_for_nmi();
  150. /* Make sure all running NMIs have finished before we write the code */
  151. smp_mb();
  152. ftrace_mod_code();
  153. /* Make sure the write happens before clearing the bit */
  154. smp_wmb();
  155. mod_code_write = 0;
  156. /* make sure NMIs see the cleared bit */
  157. smp_mb();
  158. wait_for_nmi();
  159. return mod_code_status;
  160. }
  161. static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
  162. static unsigned char *ftrace_nop_replace(void)
  163. {
  164. return ftrace_nop;
  165. }
  166. static int
  167. ftrace_modify_code(unsigned long ip, unsigned char *old_code,
  168. unsigned char *new_code)
  169. {
  170. unsigned char replaced[MCOUNT_INSN_SIZE];
  171. /*
  172. * Note: Due to modules and __init, code can
  173. * disappear and change, we need to protect against faulting
  174. * as well as code changing. We do this by using the
  175. * probe_kernel_* functions.
  176. *
  177. * No real locking needed, this code is run through
  178. * kstop_machine, or before SMP starts.
  179. */
  180. /* read the text we want to modify */
  181. if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
  182. return -EFAULT;
  183. /* Make sure it is what we expect it to be */
  184. if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
  185. return -EINVAL;
  186. /* replace the text with the new text */
  187. if (do_ftrace_mod_code(ip, new_code))
  188. return -EPERM;
  189. sync_core();
  190. return 0;
  191. }
  192. int ftrace_make_nop(struct module *mod,
  193. struct dyn_ftrace *rec, unsigned long addr)
  194. {
  195. unsigned char *new, *old;
  196. unsigned long ip = rec->ip;
  197. old = ftrace_call_replace(ip, addr);
  198. new = ftrace_nop_replace();
  199. return ftrace_modify_code(rec->ip, old, new);
  200. }
  201. int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
  202. {
  203. unsigned char *new, *old;
  204. unsigned long ip = rec->ip;
  205. old = ftrace_nop_replace();
  206. new = ftrace_call_replace(ip, addr);
  207. return ftrace_modify_code(rec->ip, old, new);
  208. }
  209. int ftrace_update_ftrace_func(ftrace_func_t func)
  210. {
  211. unsigned long ip = (unsigned long)(&ftrace_call);
  212. unsigned char old[MCOUNT_INSN_SIZE], *new;
  213. int ret;
  214. memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
  215. new = ftrace_call_replace(ip, (unsigned long)func);
  216. ret = ftrace_modify_code(ip, old, new);
  217. return ret;
  218. }
  219. int __init ftrace_dyn_arch_init(void *data)
  220. {
  221. extern const unsigned char ftrace_test_p6nop[];
  222. extern const unsigned char ftrace_test_nop5[];
  223. extern const unsigned char ftrace_test_jmp[];
  224. int faulted = 0;
  225. /*
  226. * There is no good nop for all x86 archs.
  227. * We will default to using the P6_NOP5, but first we
  228. * will test to make sure that the nop will actually
  229. * work on this CPU. If it faults, we will then
  230. * go to a lesser efficient 5 byte nop. If that fails
  231. * we then just use a jmp as our nop. This isn't the most
  232. * efficient nop, but we can not use a multi part nop
  233. * since we would then risk being preempted in the middle
  234. * of that nop, and if we enabled tracing then, it might
  235. * cause a system crash.
  236. *
  237. * TODO: check the cpuid to determine the best nop.
  238. */
  239. asm volatile (
  240. "ftrace_test_jmp:"
  241. "jmp ftrace_test_p6nop\n"
  242. "nop\n"
  243. "nop\n"
  244. "nop\n" /* 2 byte jmp + 3 bytes */
  245. "ftrace_test_p6nop:"
  246. P6_NOP5
  247. "jmp 1f\n"
  248. "ftrace_test_nop5:"
  249. ".byte 0x66,0x66,0x66,0x66,0x90\n"
  250. "1:"
  251. ".section .fixup, \"ax\"\n"
  252. "2: movl $1, %0\n"
  253. " jmp ftrace_test_nop5\n"
  254. "3: movl $2, %0\n"
  255. " jmp 1b\n"
  256. ".previous\n"
  257. _ASM_EXTABLE(ftrace_test_p6nop, 2b)
  258. _ASM_EXTABLE(ftrace_test_nop5, 3b)
  259. : "=r"(faulted) : "0" (faulted));
  260. switch (faulted) {
  261. case 0:
  262. pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
  263. memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
  264. break;
  265. case 1:
  266. pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
  267. memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
  268. break;
  269. case 2:
  270. pr_info("ftrace: converting mcount calls to jmp . + 5\n");
  271. memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
  272. break;
  273. }
  274. /* The return code is retured via data */
  275. *(unsigned long *)data = 0;
  276. return 0;
  277. }
  278. #endif
  279. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  280. #ifdef CONFIG_DYNAMIC_FTRACE
  281. extern void ftrace_graph_call(void);
  282. static int ftrace_mod_jmp(unsigned long ip,
  283. int old_offset, int new_offset)
  284. {
  285. unsigned char code[MCOUNT_INSN_SIZE];
  286. if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
  287. return -EFAULT;
  288. if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
  289. return -EINVAL;
  290. *(int *)(&code[1]) = new_offset;
  291. if (do_ftrace_mod_code(ip, &code))
  292. return -EPERM;
  293. return 0;
  294. }
  295. int ftrace_enable_ftrace_graph_caller(void)
  296. {
  297. unsigned long ip = (unsigned long)(&ftrace_graph_call);
  298. int old_offset, new_offset;
  299. old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
  300. new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
  301. return ftrace_mod_jmp(ip, old_offset, new_offset);
  302. }
  303. int ftrace_disable_ftrace_graph_caller(void)
  304. {
  305. unsigned long ip = (unsigned long)(&ftrace_graph_call);
  306. int old_offset, new_offset;
  307. old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
  308. new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
  309. return ftrace_mod_jmp(ip, old_offset, new_offset);
  310. }
  311. #endif /* !CONFIG_DYNAMIC_FTRACE */
  312. /*
  313. * Hook the return address and push it in the stack of return addrs
  314. * in current thread info.
  315. */
  316. void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
  317. {
  318. unsigned long old;
  319. unsigned long long calltime;
  320. int faulted;
  321. struct ftrace_graph_ent trace;
  322. unsigned long return_hooker = (unsigned long)
  323. &return_to_handler;
  324. /* Nmi's are currently unsupported */
  325. if (unlikely(in_nmi()))
  326. return;
  327. if (unlikely(atomic_read(&current->tracing_graph_pause)))
  328. return;
  329. /*
  330. * Protect against fault, even if it shouldn't
  331. * happen. This tool is too much intrusive to
  332. * ignore such a protection.
  333. */
  334. asm volatile(
  335. "1: " _ASM_MOV " (%[parent]), %[old]\n"
  336. "2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
  337. " movl $0, %[faulted]\n"
  338. "3:\n"
  339. ".section .fixup, \"ax\"\n"
  340. "4: movl $1, %[faulted]\n"
  341. " jmp 3b\n"
  342. ".previous\n"
  343. _ASM_EXTABLE(1b, 4b)
  344. _ASM_EXTABLE(2b, 4b)
  345. : [old] "=r" (old), [faulted] "=r" (faulted)
  346. : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
  347. : "memory"
  348. );
  349. if (unlikely(faulted)) {
  350. ftrace_graph_stop();
  351. WARN_ON(1);
  352. return;
  353. }
  354. calltime = cpu_clock(raw_smp_processor_id());
  355. if (ftrace_push_return_trace(old, calltime,
  356. self_addr, &trace.depth) == -EBUSY) {
  357. *parent = old;
  358. return;
  359. }
  360. trace.func = self_addr;
  361. /* Only trace if the calling function expects to */
  362. if (!ftrace_graph_entry(&trace)) {
  363. current->curr_ret_stack--;
  364. *parent = old;
  365. }
  366. }
  367. #endif /* CONFIG_FUNCTION_GRAPH_TRACER */