kmmio.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. /* Support for MMIO probes.
  2. * Benfit many code from kprobes
  3. * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
  4. * 2007 Alexander Eichner
  5. * 2008 Pekka Paalanen <pq@iki.fi>
  6. */
  7. #include <linux/list.h>
  8. #include <linux/rculist.h>
  9. #include <linux/spinlock.h>
  10. #include <linux/hash.h>
  11. #include <linux/init.h>
  12. #include <linux/module.h>
  13. #include <linux/kernel.h>
  14. #include <linux/uaccess.h>
  15. #include <linux/ptrace.h>
  16. #include <linux/preempt.h>
  17. #include <linux/percpu.h>
  18. #include <linux/kdebug.h>
  19. #include <linux/mutex.h>
  20. #include <linux/io.h>
  21. #include <asm/cacheflush.h>
  22. #include <asm/tlbflush.h>
  23. #include <linux/errno.h>
  24. #include <asm/debugreg.h>
  25. #include <linux/mmiotrace.h>
  26. #define KMMIO_PAGE_HASH_BITS 4
  27. #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
  28. struct kmmio_fault_page {
  29. struct list_head list;
  30. struct kmmio_fault_page *release_next;
  31. unsigned long page; /* location of the fault page */
  32. /*
  33. * Number of times this page has been registered as a part
  34. * of a probe. If zero, page is disarmed and this may be freed.
  35. * Used only by writers (RCU).
  36. */
  37. int count;
  38. };
  39. struct kmmio_delayed_release {
  40. struct rcu_head rcu;
  41. struct kmmio_fault_page *release_list;
  42. };
  43. struct kmmio_context {
  44. struct kmmio_fault_page *fpage;
  45. struct kmmio_probe *probe;
  46. unsigned long saved_flags;
  47. unsigned long addr;
  48. int active;
  49. };
  50. static DEFINE_SPINLOCK(kmmio_lock);
  51. /* Protected by kmmio_lock */
  52. unsigned int kmmio_count;
  53. /* Read-protected by RCU, write-protected by kmmio_lock. */
  54. static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
  55. static LIST_HEAD(kmmio_probes);
  56. static struct list_head *kmmio_page_list(unsigned long page)
  57. {
  58. return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
  59. }
  60. /* Accessed per-cpu */
  61. static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
  62. /*
  63. * this is basically a dynamic stabbing problem:
  64. * Could use the existing prio tree code or
  65. * Possible better implementations:
  66. * The Interval Skip List: A Data Structure for Finding All Intervals That
  67. * Overlap a Point (might be simple)
  68. * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
  69. */
  70. /* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
  71. static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
  72. {
  73. struct kmmio_probe *p;
  74. list_for_each_entry_rcu(p, &kmmio_probes, list) {
  75. if (addr >= p->addr && addr <= (p->addr + p->len))
  76. return p;
  77. }
  78. return NULL;
  79. }
  80. /* You must be holding RCU read lock. */
  81. static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
  82. {
  83. struct list_head *head;
  84. struct kmmio_fault_page *p;
  85. page &= PAGE_MASK;
  86. head = kmmio_page_list(page);
  87. list_for_each_entry_rcu(p, head, list) {
  88. if (p->page == page)
  89. return p;
  90. }
  91. return NULL;
  92. }
  93. static void set_page_present(unsigned long addr, bool present,
  94. unsigned int *pglevel)
  95. {
  96. pteval_t pteval;
  97. pmdval_t pmdval;
  98. unsigned int level;
  99. pmd_t *pmd;
  100. pte_t *pte = lookup_address(addr, &level);
  101. if (!pte) {
  102. pr_err("kmmio: no pte for page 0x%08lx\n", addr);
  103. return;
  104. }
  105. if (pglevel)
  106. *pglevel = level;
  107. switch (level) {
  108. case PG_LEVEL_2M:
  109. pmd = (pmd_t *)pte;
  110. pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
  111. if (present)
  112. pmdval |= _PAGE_PRESENT;
  113. set_pmd(pmd, __pmd(pmdval));
  114. break;
  115. case PG_LEVEL_4K:
  116. pteval = pte_val(*pte) & ~_PAGE_PRESENT;
  117. if (present)
  118. pteval |= _PAGE_PRESENT;
  119. set_pte_atomic(pte, __pte(pteval));
  120. break;
  121. default:
  122. pr_err("kmmio: unexpected page level 0x%x.\n", level);
  123. return;
  124. }
  125. __flush_tlb_one(addr);
  126. }
  127. /** Mark the given page as not present. Access to it will trigger a fault. */
  128. static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
  129. {
  130. set_page_present(page & PAGE_MASK, false, pglevel);
  131. }
  132. /** Mark the given page as present. */
  133. static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
  134. {
  135. set_page_present(page & PAGE_MASK, true, pglevel);
  136. }
  137. /*
  138. * This is being called from do_page_fault().
  139. *
  140. * We may be in an interrupt or a critical section. Also prefecthing may
  141. * trigger a page fault. We may be in the middle of process switch.
  142. * We cannot take any locks, because we could be executing especially
  143. * within a kmmio critical section.
  144. *
  145. * Local interrupts are disabled, so preemption cannot happen.
  146. * Do not enable interrupts, do not sleep, and watch out for other CPUs.
  147. */
  148. /*
  149. * Interrupts are disabled on entry as trap3 is an interrupt gate
  150. * and they remain disabled thorough out this function.
  151. */
  152. int kmmio_handler(struct pt_regs *regs, unsigned long addr)
  153. {
  154. struct kmmio_context *ctx;
  155. struct kmmio_fault_page *faultpage;
  156. int ret = 0; /* default to fault not handled */
  157. /*
  158. * Preemption is now disabled to prevent process switch during
  159. * single stepping. We can only handle one active kmmio trace
  160. * per cpu, so ensure that we finish it before something else
  161. * gets to run. We also hold the RCU read lock over single
  162. * stepping to avoid looking up the probe and kmmio_fault_page
  163. * again.
  164. */
  165. preempt_disable();
  166. rcu_read_lock();
  167. faultpage = get_kmmio_fault_page(addr);
  168. if (!faultpage) {
  169. /*
  170. * Either this page fault is not caused by kmmio, or
  171. * another CPU just pulled the kmmio probe from under
  172. * our feet. The latter case should not be possible.
  173. */
  174. goto no_kmmio;
  175. }
  176. ctx = &get_cpu_var(kmmio_ctx);
  177. if (ctx->active) {
  178. disarm_kmmio_fault_page(faultpage->page, NULL);
  179. if (addr == ctx->addr) {
  180. /*
  181. * On SMP we sometimes get recursive probe hits on the
  182. * same address. Context is already saved, fall out.
  183. */
  184. pr_debug("kmmio: duplicate probe hit on CPU %d, for "
  185. "address 0x%08lx.\n",
  186. smp_processor_id(), addr);
  187. ret = 1;
  188. goto no_kmmio_ctx;
  189. }
  190. /*
  191. * Prevent overwriting already in-flight context.
  192. * This should not happen, let's hope disarming at least
  193. * prevents a panic.
  194. */
  195. pr_emerg("kmmio: recursive probe hit on CPU %d, "
  196. "for address 0x%08lx. Ignoring.\n",
  197. smp_processor_id(), addr);
  198. pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
  199. ctx->addr);
  200. goto no_kmmio_ctx;
  201. }
  202. ctx->active++;
  203. ctx->fpage = faultpage;
  204. ctx->probe = get_kmmio_probe(addr);
  205. ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
  206. ctx->addr = addr;
  207. if (ctx->probe && ctx->probe->pre_handler)
  208. ctx->probe->pre_handler(ctx->probe, regs, addr);
  209. /*
  210. * Enable single-stepping and disable interrupts for the faulting
  211. * context. Local interrupts must not get enabled during stepping.
  212. */
  213. regs->flags |= X86_EFLAGS_TF;
  214. regs->flags &= ~X86_EFLAGS_IF;
  215. /* Now we set present bit in PTE and single step. */
  216. disarm_kmmio_fault_page(ctx->fpage->page, NULL);
  217. /*
  218. * If another cpu accesses the same page while we are stepping,
  219. * the access will not be caught. It will simply succeed and the
  220. * only downside is we lose the event. If this becomes a problem,
  221. * the user should drop to single cpu before tracing.
  222. */
  223. put_cpu_var(kmmio_ctx);
  224. return 1; /* fault handled */
  225. no_kmmio_ctx:
  226. put_cpu_var(kmmio_ctx);
  227. no_kmmio:
  228. rcu_read_unlock();
  229. preempt_enable_no_resched();
  230. return ret;
  231. }
  232. /*
  233. * Interrupts are disabled on entry as trap1 is an interrupt gate
  234. * and they remain disabled thorough out this function.
  235. * This must always get called as the pair to kmmio_handler().
  236. */
  237. static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
  238. {
  239. int ret = 0;
  240. struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
  241. if (!ctx->active) {
  242. pr_debug("kmmio: spurious debug trap on CPU %d.\n",
  243. smp_processor_id());
  244. goto out;
  245. }
  246. if (ctx->probe && ctx->probe->post_handler)
  247. ctx->probe->post_handler(ctx->probe, condition, regs);
  248. arm_kmmio_fault_page(ctx->fpage->page, NULL);
  249. regs->flags &= ~X86_EFLAGS_TF;
  250. regs->flags |= ctx->saved_flags;
  251. /* These were acquired in kmmio_handler(). */
  252. ctx->active--;
  253. BUG_ON(ctx->active);
  254. rcu_read_unlock();
  255. preempt_enable_no_resched();
  256. /*
  257. * if somebody else is singlestepping across a probe point, flags
  258. * will have TF set, in which case, continue the remaining processing
  259. * of do_debug, as if this is not a probe hit.
  260. */
  261. if (!(regs->flags & X86_EFLAGS_TF))
  262. ret = 1;
  263. out:
  264. put_cpu_var(kmmio_ctx);
  265. return ret;
  266. }
  267. /* You must be holding kmmio_lock. */
  268. static int add_kmmio_fault_page(unsigned long page)
  269. {
  270. struct kmmio_fault_page *f;
  271. page &= PAGE_MASK;
  272. f = get_kmmio_fault_page(page);
  273. if (f) {
  274. if (!f->count)
  275. arm_kmmio_fault_page(f->page, NULL);
  276. f->count++;
  277. return 0;
  278. }
  279. f = kmalloc(sizeof(*f), GFP_ATOMIC);
  280. if (!f)
  281. return -1;
  282. f->count = 1;
  283. f->page = page;
  284. list_add_rcu(&f->list, kmmio_page_list(f->page));
  285. arm_kmmio_fault_page(f->page, NULL);
  286. return 0;
  287. }
  288. /* You must be holding kmmio_lock. */
  289. static void release_kmmio_fault_page(unsigned long page,
  290. struct kmmio_fault_page **release_list)
  291. {
  292. struct kmmio_fault_page *f;
  293. page &= PAGE_MASK;
  294. f = get_kmmio_fault_page(page);
  295. if (!f)
  296. return;
  297. f->count--;
  298. BUG_ON(f->count < 0);
  299. if (!f->count) {
  300. disarm_kmmio_fault_page(f->page, NULL);
  301. f->release_next = *release_list;
  302. *release_list = f;
  303. }
  304. }
  305. /*
  306. * With page-unaligned ioremaps, one or two armed pages may contain
  307. * addresses from outside the intended mapping. Events for these addresses
  308. * are currently silently dropped. The events may result only from programming
  309. * mistakes by accessing addresses before the beginning or past the end of a
  310. * mapping.
  311. */
  312. int register_kmmio_probe(struct kmmio_probe *p)
  313. {
  314. unsigned long flags;
  315. int ret = 0;
  316. unsigned long size = 0;
  317. const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
  318. spin_lock_irqsave(&kmmio_lock, flags);
  319. if (get_kmmio_probe(p->addr)) {
  320. ret = -EEXIST;
  321. goto out;
  322. }
  323. kmmio_count++;
  324. list_add_rcu(&p->list, &kmmio_probes);
  325. while (size < size_lim) {
  326. if (add_kmmio_fault_page(p->addr + size))
  327. pr_err("kmmio: Unable to set page fault.\n");
  328. size += PAGE_SIZE;
  329. }
  330. out:
  331. spin_unlock_irqrestore(&kmmio_lock, flags);
  332. /*
  333. * XXX: What should I do here?
  334. * Here was a call to global_flush_tlb(), but it does not exist
  335. * anymore. It seems it's not needed after all.
  336. */
  337. return ret;
  338. }
  339. EXPORT_SYMBOL(register_kmmio_probe);
  340. static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
  341. {
  342. struct kmmio_delayed_release *dr = container_of(
  343. head,
  344. struct kmmio_delayed_release,
  345. rcu);
  346. struct kmmio_fault_page *p = dr->release_list;
  347. while (p) {
  348. struct kmmio_fault_page *next = p->release_next;
  349. BUG_ON(p->count);
  350. kfree(p);
  351. p = next;
  352. }
  353. kfree(dr);
  354. }
  355. static void remove_kmmio_fault_pages(struct rcu_head *head)
  356. {
  357. struct kmmio_delayed_release *dr = container_of(
  358. head,
  359. struct kmmio_delayed_release,
  360. rcu);
  361. struct kmmio_fault_page *p = dr->release_list;
  362. struct kmmio_fault_page **prevp = &dr->release_list;
  363. unsigned long flags;
  364. spin_lock_irqsave(&kmmio_lock, flags);
  365. while (p) {
  366. if (!p->count)
  367. list_del_rcu(&p->list);
  368. else
  369. *prevp = p->release_next;
  370. prevp = &p->release_next;
  371. p = p->release_next;
  372. }
  373. spin_unlock_irqrestore(&kmmio_lock, flags);
  374. /* This is the real RCU destroy call. */
  375. call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
  376. }
  377. /*
  378. * Remove a kmmio probe. You have to synchronize_rcu() before you can be
  379. * sure that the callbacks will not be called anymore. Only after that
  380. * you may actually release your struct kmmio_probe.
  381. *
  382. * Unregistering a kmmio fault page has three steps:
  383. * 1. release_kmmio_fault_page()
  384. * Disarm the page, wait a grace period to let all faults finish.
  385. * 2. remove_kmmio_fault_pages()
  386. * Remove the pages from kmmio_page_table.
  387. * 3. rcu_free_kmmio_fault_pages()
  388. * Actally free the kmmio_fault_page structs as with RCU.
  389. */
  390. void unregister_kmmio_probe(struct kmmio_probe *p)
  391. {
  392. unsigned long flags;
  393. unsigned long size = 0;
  394. const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
  395. struct kmmio_fault_page *release_list = NULL;
  396. struct kmmio_delayed_release *drelease;
  397. spin_lock_irqsave(&kmmio_lock, flags);
  398. while (size < size_lim) {
  399. release_kmmio_fault_page(p->addr + size, &release_list);
  400. size += PAGE_SIZE;
  401. }
  402. list_del_rcu(&p->list);
  403. kmmio_count--;
  404. spin_unlock_irqrestore(&kmmio_lock, flags);
  405. drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
  406. if (!drelease) {
  407. pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
  408. return;
  409. }
  410. drelease->release_list = release_list;
  411. /*
  412. * This is not really RCU here. We have just disarmed a set of
  413. * pages so that they cannot trigger page faults anymore. However,
  414. * we cannot remove the pages from kmmio_page_table,
  415. * because a probe hit might be in flight on another CPU. The
  416. * pages are collected into a list, and they will be removed from
  417. * kmmio_page_table when it is certain that no probe hit related to
  418. * these pages can be in flight. RCU grace period sounds like a
  419. * good choice.
  420. *
  421. * If we removed the pages too early, kmmio page fault handler might
  422. * not find the respective kmmio_fault_page and determine it's not
  423. * a kmmio fault, when it actually is. This would lead to madness.
  424. */
  425. call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
  426. }
  427. EXPORT_SYMBOL(unregister_kmmio_probe);
  428. static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
  429. void *args)
  430. {
  431. struct die_args *arg = args;
  432. if (val == DIE_DEBUG && (arg->err & DR_STEP))
  433. if (post_kmmio_handler(arg->err, arg->regs) == 1)
  434. return NOTIFY_STOP;
  435. return NOTIFY_DONE;
  436. }
  437. static struct notifier_block nb_die = {
  438. .notifier_call = kmmio_die_notifier
  439. };
  440. static int __init init_kmmio(void)
  441. {
  442. int i;
  443. for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
  444. INIT_LIST_HEAD(&kmmio_page_table[i]);
  445. return register_die_notifier(&nb_die);
  446. }
  447. fs_initcall(init_kmmio); /* should be before device_initcall() */