step.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /*
  2. * x86 single-step support code, common to 32-bit and 64-bit.
  3. */
  4. #include <linux/sched.h>
  5. #include <linux/mm.h>
  6. #include <linux/ptrace.h>
  7. #ifdef CONFIG_X86_32
  8. #include <linux/uaccess.h>
  9. #include <asm/desc.h>
  10. /*
  11. * Return EIP plus the CS segment base. The segment limit is also
  12. * adjusted, clamped to the kernel/user address space (whichever is
  13. * appropriate), and returned in *eip_limit.
  14. *
  15. * The segment is checked, because it might have been changed by another
  16. * task between the original faulting instruction and here.
  17. *
  18. * If CS is no longer a valid code segment, or if EIP is beyond the
  19. * limit, or if it is a kernel address when CS is not a kernel segment,
  20. * then the returned value will be greater than *eip_limit.
  21. *
  22. * This is slow, but is very rarely executed.
  23. */
  24. unsigned long get_segment_eip(struct pt_regs *regs,
  25. unsigned long *eip_limit)
  26. {
  27. unsigned long ip = regs->ip;
  28. unsigned seg = regs->cs & 0xffff;
  29. u32 seg_ar, seg_limit, base, *desc;
  30. /* Unlikely, but must come before segment checks. */
  31. if (unlikely(regs->flags & VM_MASK)) {
  32. base = seg << 4;
  33. *eip_limit = base + 0xffff;
  34. return base + (ip & 0xffff);
  35. }
  36. /* The standard kernel/user address space limit. */
  37. *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
  38. /* By far the most common cases. */
  39. if (likely(SEGMENT_IS_FLAT_CODE(seg)))
  40. return ip;
  41. /* Check the segment exists, is within the current LDT/GDT size,
  42. that kernel/user (ring 0..3) has the appropriate privilege,
  43. that it's a code segment, and get the limit. */
  44. __asm__("larl %3,%0; lsll %3,%1"
  45. : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
  46. if ((~seg_ar & 0x9800) || ip > seg_limit) {
  47. *eip_limit = 0;
  48. return 1; /* So that returned ip > *eip_limit. */
  49. }
  50. /* Get the GDT/LDT descriptor base.
  51. When you look for races in this code remember that
  52. LDT and other horrors are only used in user space. */
  53. if (seg & (1<<2)) {
  54. /* Must lock the LDT while reading it. */
  55. mutex_lock(&current->mm->context.lock);
  56. desc = current->mm->context.ldt;
  57. desc = (void *)desc + (seg & ~7);
  58. } else {
  59. /* Must disable preemption while reading the GDT. */
  60. desc = (u32 *)get_cpu_gdt_table(get_cpu());
  61. desc = (void *)desc + (seg & ~7);
  62. }
  63. /* Decode the code segment base from the descriptor */
  64. base = get_desc_base((struct desc_struct *)desc);
  65. if (seg & (1<<2))
  66. mutex_unlock(&current->mm->context.lock);
  67. else
  68. put_cpu();
  69. /* Adjust EIP and segment limit, and clamp at the kernel limit.
  70. It's legitimate for segments to wrap at 0xffffffff. */
  71. seg_limit += base;
  72. if (seg_limit < *eip_limit && seg_limit >= base)
  73. *eip_limit = seg_limit;
  74. return ip + base;
  75. }
  76. #endif
  77. #ifdef CONFIG_X86_32
  78. static
  79. #endif
  80. unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs)
  81. {
  82. unsigned long addr, seg;
  83. addr = regs->ip;
  84. seg = regs->cs & 0xffff;
  85. if (v8086_mode(regs)) {
  86. addr = (addr & 0xffff) + (seg << 4);
  87. return addr;
  88. }
  89. /*
  90. * We'll assume that the code segments in the GDT
  91. * are all zero-based. That is largely true: the
  92. * TLS segments are used for data, and the PNPBIOS
  93. * and APM bios ones we just ignore here.
  94. */
  95. if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) {
  96. u32 *desc;
  97. unsigned long base;
  98. seg &= ~7UL;
  99. mutex_lock(&child->mm->context.lock);
  100. if (unlikely((seg >> 3) >= child->mm->context.size))
  101. addr = -1L; /* bogus selector, access would fault */
  102. else {
  103. desc = child->mm->context.ldt + seg;
  104. base = ((desc[0] >> 16) |
  105. ((desc[1] & 0xff) << 16) |
  106. (desc[1] & 0xff000000));
  107. /* 16-bit code segment? */
  108. if (!((desc[1] >> 22) & 1))
  109. addr &= 0xffff;
  110. addr += base;
  111. }
  112. mutex_unlock(&child->mm->context.lock);
  113. }
  114. return addr;
  115. }
  116. static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
  117. {
  118. int i, copied;
  119. unsigned char opcode[15];
  120. unsigned long addr = convert_rip_to_linear(child, regs);
  121. copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
  122. for (i = 0; i < copied; i++) {
  123. switch (opcode[i]) {
  124. /* popf and iret */
  125. case 0x9d: case 0xcf:
  126. return 1;
  127. /* CHECKME: 64 65 */
  128. /* opcode and address size prefixes */
  129. case 0x66: case 0x67:
  130. continue;
  131. /* irrelevant prefixes (segment overrides and repeats) */
  132. case 0x26: case 0x2e:
  133. case 0x36: case 0x3e:
  134. case 0x64: case 0x65:
  135. case 0xf0: case 0xf2: case 0xf3:
  136. continue;
  137. #ifdef CONFIG_X86_64
  138. case 0x40 ... 0x4f:
  139. if (regs->cs != __USER_CS)
  140. /* 32-bit mode: register increment */
  141. return 0;
  142. /* 64-bit mode: REX prefix */
  143. continue;
  144. #endif
  145. /* CHECKME: f2, f3 */
  146. /*
  147. * pushf: NOTE! We should probably not let
  148. * the user see the TF bit being set. But
  149. * it's more pain than it's worth to avoid
  150. * it, and a debugger could emulate this
  151. * all in user space if it _really_ cares.
  152. */
  153. case 0x9c:
  154. default:
  155. return 0;
  156. }
  157. }
  158. return 0;
  159. }
  160. /*
  161. * Enable single-stepping. Return nonzero if user mode is not using TF itself.
  162. */
  163. static int enable_single_step(struct task_struct *child)
  164. {
  165. struct pt_regs *regs = task_pt_regs(child);
  166. /*
  167. * Always set TIF_SINGLESTEP - this guarantees that
  168. * we single-step system calls etc.. This will also
  169. * cause us to set TF when returning to user mode.
  170. */
  171. set_tsk_thread_flag(child, TIF_SINGLESTEP);
  172. /*
  173. * If TF was already set, don't do anything else
  174. */
  175. if (regs->flags & X86_EFLAGS_TF)
  176. return 0;
  177. /* Set TF on the kernel stack.. */
  178. regs->flags |= X86_EFLAGS_TF;
  179. /*
  180. * ..but if TF is changed by the instruction we will trace,
  181. * don't mark it as being "us" that set it, so that we
  182. * won't clear it by hand later.
  183. */
  184. if (is_setting_trap_flag(child, regs))
  185. return 0;
  186. set_tsk_thread_flag(child, TIF_FORCED_TF);
  187. return 1;
  188. }
  189. /*
  190. * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running.
  191. */
  192. static void write_debugctlmsr(struct task_struct *child, unsigned long val)
  193. {
  194. child->thread.debugctlmsr = val;
  195. if (child != current)
  196. return;
  197. #ifdef CONFIG_X86_64
  198. wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
  199. #else
  200. wrmsr(MSR_IA32_DEBUGCTLMSR, val, 0);
  201. #endif
  202. }
  203. /*
  204. * Enable single or block step.
  205. */
  206. static void enable_step(struct task_struct *child, bool block)
  207. {
  208. /*
  209. * Make sure block stepping (BTF) is not enabled unless it should be.
  210. * Note that we don't try to worry about any is_setting_trap_flag()
  211. * instructions after the first when using block stepping.
  212. * So noone should try to use debugger block stepping in a program
  213. * that uses user-mode single stepping itself.
  214. */
  215. if (enable_single_step(child) && block) {
  216. set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
  217. write_debugctlmsr(child,
  218. child->thread.debugctlmsr | DEBUGCTLMSR_BTF);
  219. } else {
  220. write_debugctlmsr(child,
  221. child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
  222. if (!child->thread.debugctlmsr)
  223. clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
  224. }
  225. }
  226. void user_enable_single_step(struct task_struct *child)
  227. {
  228. enable_step(child, 0);
  229. }
  230. void user_enable_block_step(struct task_struct *child)
  231. {
  232. enable_step(child, 1);
  233. }
  234. void user_disable_single_step(struct task_struct *child)
  235. {
  236. /*
  237. * Make sure block stepping (BTF) is disabled.
  238. */
  239. write_debugctlmsr(child,
  240. child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
  241. if (!child->thread.debugctlmsr)
  242. clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
  243. /* Always clear TIF_SINGLESTEP... */
  244. clear_tsk_thread_flag(child, TIF_SINGLESTEP);
  245. /* But touch TF only if it was set by us.. */
  246. if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF))
  247. task_pt_regs(child)->flags &= ~X86_EFLAGS_TF;
  248. }