kmmio.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. /* Support for MMIO probes.
  2. * Benfit many code from kprobes
  3. * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
  4. * 2007 Alexander Eichner
  5. * 2008 Pekka Paalanen <pq@iki.fi>
  6. */
  7. #include <linux/list.h>
  8. #include <linux/rculist.h>
  9. #include <linux/spinlock.h>
  10. #include <linux/hash.h>
  11. #include <linux/init.h>
  12. #include <linux/module.h>
  13. #include <linux/kernel.h>
  14. #include <linux/uaccess.h>
  15. #include <linux/ptrace.h>
  16. #include <linux/preempt.h>
  17. #include <linux/percpu.h>
  18. #include <linux/kdebug.h>
  19. #include <linux/mutex.h>
  20. #include <linux/io.h>
  21. #include <asm/cacheflush.h>
  22. #include <asm/tlbflush.h>
  23. #include <linux/errno.h>
  24. #include <asm/debugreg.h>
  25. #include <linux/mmiotrace.h>
  26. #define KMMIO_PAGE_HASH_BITS 4
  27. #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
  28. struct kmmio_fault_page {
  29. struct list_head list;
  30. struct kmmio_fault_page *release_next;
  31. unsigned long page; /* location of the fault page */
  32. bool old_presence; /* page presence prior to arming */
  33. bool armed;
  34. /*
  35. * Number of times this page has been registered as a part
  36. * of a probe. If zero, page is disarmed and this may be freed.
  37. * Used only by writers (RCU).
  38. */
  39. int count;
  40. };
  41. struct kmmio_delayed_release {
  42. struct rcu_head rcu;
  43. struct kmmio_fault_page *release_list;
  44. };
  45. struct kmmio_context {
  46. struct kmmio_fault_page *fpage;
  47. struct kmmio_probe *probe;
  48. unsigned long saved_flags;
  49. unsigned long addr;
  50. int active;
  51. };
  52. static DEFINE_SPINLOCK(kmmio_lock);
  53. /* Protected by kmmio_lock */
  54. unsigned int kmmio_count;
  55. /* Read-protected by RCU, write-protected by kmmio_lock. */
  56. static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
  57. static LIST_HEAD(kmmio_probes);
  58. static struct list_head *kmmio_page_list(unsigned long page)
  59. {
  60. return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
  61. }
  62. /* Accessed per-cpu */
  63. static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
  64. /*
  65. * this is basically a dynamic stabbing problem:
  66. * Could use the existing prio tree code or
  67. * Possible better implementations:
  68. * The Interval Skip List: A Data Structure for Finding All Intervals That
  69. * Overlap a Point (might be simple)
  70. * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
  71. */
  72. /* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
  73. static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
  74. {
  75. struct kmmio_probe *p;
  76. list_for_each_entry_rcu(p, &kmmio_probes, list) {
  77. if (addr >= p->addr && addr <= (p->addr + p->len))
  78. return p;
  79. }
  80. return NULL;
  81. }
  82. /* You must be holding RCU read lock. */
  83. static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
  84. {
  85. struct list_head *head;
  86. struct kmmio_fault_page *p;
  87. page &= PAGE_MASK;
  88. head = kmmio_page_list(page);
  89. list_for_each_entry_rcu(p, head, list) {
  90. if (p->page == page)
  91. return p;
  92. }
  93. return NULL;
  94. }
  95. static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
  96. {
  97. pmdval_t v = pmd_val(*pmd);
  98. *old = !!(v & _PAGE_PRESENT);
  99. v &= ~_PAGE_PRESENT;
  100. if (present)
  101. v |= _PAGE_PRESENT;
  102. set_pmd(pmd, __pmd(v));
  103. }
  104. static void set_pte_presence(pte_t *pte, bool present, bool *old)
  105. {
  106. pteval_t v = pte_val(*pte);
  107. *old = !!(v & _PAGE_PRESENT);
  108. v &= ~_PAGE_PRESENT;
  109. if (present)
  110. v |= _PAGE_PRESENT;
  111. set_pte_atomic(pte, __pte(v));
  112. }
  113. static int set_page_presence(unsigned long addr, bool present, bool *old)
  114. {
  115. unsigned int level;
  116. pte_t *pte = lookup_address(addr, &level);
  117. if (!pte) {
  118. pr_err("kmmio: no pte for page 0x%08lx\n", addr);
  119. return -1;
  120. }
  121. switch (level) {
  122. case PG_LEVEL_2M:
  123. set_pmd_presence((pmd_t *)pte, present, old);
  124. break;
  125. case PG_LEVEL_4K:
  126. set_pte_presence(pte, present, old);
  127. break;
  128. default:
  129. pr_err("kmmio: unexpected page level 0x%x.\n", level);
  130. return -1;
  131. }
  132. __flush_tlb_one(addr);
  133. return 0;
  134. }
  135. /*
  136. * Mark the given page as not present. Access to it will trigger a fault.
  137. *
  138. * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
  139. * protection is ignored here. RCU read lock is assumed held, so the struct
  140. * will not disappear unexpectedly. Furthermore, the caller must guarantee,
  141. * that double arming the same virtual address (page) cannot occur.
  142. *
  143. * Double disarming on the other hand is allowed, and may occur when a fault
  144. * and mmiotrace shutdown happen simultaneously.
  145. */
  146. static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
  147. {
  148. int ret;
  149. WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
  150. if (f->armed) {
  151. pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
  152. f->page, f->count, f->old_presence);
  153. }
  154. ret = set_page_presence(f->page, false, &f->old_presence);
  155. WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
  156. f->armed = true;
  157. return ret;
  158. }
  159. /** Restore the given page to saved presence state. */
  160. static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
  161. {
  162. bool tmp;
  163. int ret = set_page_presence(f->page, f->old_presence, &tmp);
  164. WARN_ONCE(ret < 0,
  165. KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
  166. f->armed = false;
  167. }
  168. /*
  169. * This is being called from do_page_fault().
  170. *
  171. * We may be in an interrupt or a critical section. Also prefecthing may
  172. * trigger a page fault. We may be in the middle of process switch.
  173. * We cannot take any locks, because we could be executing especially
  174. * within a kmmio critical section.
  175. *
  176. * Local interrupts are disabled, so preemption cannot happen.
  177. * Do not enable interrupts, do not sleep, and watch out for other CPUs.
  178. */
  179. /*
  180. * Interrupts are disabled on entry as trap3 is an interrupt gate
  181. * and they remain disabled thorough out this function.
  182. */
  183. int kmmio_handler(struct pt_regs *regs, unsigned long addr)
  184. {
  185. struct kmmio_context *ctx;
  186. struct kmmio_fault_page *faultpage;
  187. int ret = 0; /* default to fault not handled */
  188. /*
  189. * Preemption is now disabled to prevent process switch during
  190. * single stepping. We can only handle one active kmmio trace
  191. * per cpu, so ensure that we finish it before something else
  192. * gets to run. We also hold the RCU read lock over single
  193. * stepping to avoid looking up the probe and kmmio_fault_page
  194. * again.
  195. */
  196. preempt_disable();
  197. rcu_read_lock();
  198. faultpage = get_kmmio_fault_page(addr);
  199. if (!faultpage) {
  200. /*
  201. * Either this page fault is not caused by kmmio, or
  202. * another CPU just pulled the kmmio probe from under
  203. * our feet. The latter case should not be possible.
  204. */
  205. goto no_kmmio;
  206. }
  207. ctx = &get_cpu_var(kmmio_ctx);
  208. if (ctx->active) {
  209. disarm_kmmio_fault_page(faultpage);
  210. if (addr == ctx->addr) {
  211. /*
  212. * On SMP we sometimes get recursive probe hits on the
  213. * same address. Context is already saved, fall out.
  214. */
  215. pr_debug("kmmio: duplicate probe hit on CPU %d, for "
  216. "address 0x%08lx.\n",
  217. smp_processor_id(), addr);
  218. ret = 1;
  219. goto no_kmmio_ctx;
  220. }
  221. /*
  222. * Prevent overwriting already in-flight context.
  223. * This should not happen, let's hope disarming at least
  224. * prevents a panic.
  225. */
  226. pr_emerg("kmmio: recursive probe hit on CPU %d, "
  227. "for address 0x%08lx. Ignoring.\n",
  228. smp_processor_id(), addr);
  229. pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
  230. ctx->addr);
  231. goto no_kmmio_ctx;
  232. }
  233. ctx->active++;
  234. ctx->fpage = faultpage;
  235. ctx->probe = get_kmmio_probe(addr);
  236. ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
  237. ctx->addr = addr;
  238. if (ctx->probe && ctx->probe->pre_handler)
  239. ctx->probe->pre_handler(ctx->probe, regs, addr);
  240. /*
  241. * Enable single-stepping and disable interrupts for the faulting
  242. * context. Local interrupts must not get enabled during stepping.
  243. */
  244. regs->flags |= X86_EFLAGS_TF;
  245. regs->flags &= ~X86_EFLAGS_IF;
  246. /* Now we set present bit in PTE and single step. */
  247. disarm_kmmio_fault_page(ctx->fpage);
  248. /*
  249. * If another cpu accesses the same page while we are stepping,
  250. * the access will not be caught. It will simply succeed and the
  251. * only downside is we lose the event. If this becomes a problem,
  252. * the user should drop to single cpu before tracing.
  253. */
  254. put_cpu_var(kmmio_ctx);
  255. return 1; /* fault handled */
  256. no_kmmio_ctx:
  257. put_cpu_var(kmmio_ctx);
  258. no_kmmio:
  259. rcu_read_unlock();
  260. preempt_enable_no_resched();
  261. return ret;
  262. }
  263. /*
  264. * Interrupts are disabled on entry as trap1 is an interrupt gate
  265. * and they remain disabled thorough out this function.
  266. * This must always get called as the pair to kmmio_handler().
  267. */
  268. static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
  269. {
  270. int ret = 0;
  271. struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
  272. if (!ctx->active) {
  273. pr_debug("kmmio: spurious debug trap on CPU %d.\n",
  274. smp_processor_id());
  275. goto out;
  276. }
  277. if (ctx->probe && ctx->probe->post_handler)
  278. ctx->probe->post_handler(ctx->probe, condition, regs);
  279. arm_kmmio_fault_page(ctx->fpage);
  280. regs->flags &= ~X86_EFLAGS_TF;
  281. regs->flags |= ctx->saved_flags;
  282. /* These were acquired in kmmio_handler(). */
  283. ctx->active--;
  284. BUG_ON(ctx->active);
  285. rcu_read_unlock();
  286. preempt_enable_no_resched();
  287. /*
  288. * if somebody else is singlestepping across a probe point, flags
  289. * will have TF set, in which case, continue the remaining processing
  290. * of do_debug, as if this is not a probe hit.
  291. */
  292. if (!(regs->flags & X86_EFLAGS_TF))
  293. ret = 1;
  294. out:
  295. put_cpu_var(kmmio_ctx);
  296. return ret;
  297. }
  298. /* You must be holding kmmio_lock. */
  299. static int add_kmmio_fault_page(unsigned long page)
  300. {
  301. struct kmmio_fault_page *f;
  302. page &= PAGE_MASK;
  303. f = get_kmmio_fault_page(page);
  304. if (f) {
  305. if (!f->count)
  306. arm_kmmio_fault_page(f);
  307. f->count++;
  308. return 0;
  309. }
  310. f = kzalloc(sizeof(*f), GFP_ATOMIC);
  311. if (!f)
  312. return -1;
  313. f->count = 1;
  314. f->page = page;
  315. if (arm_kmmio_fault_page(f)) {
  316. kfree(f);
  317. return -1;
  318. }
  319. list_add_rcu(&f->list, kmmio_page_list(f->page));
  320. return 0;
  321. }
  322. /* You must be holding kmmio_lock. */
  323. static void release_kmmio_fault_page(unsigned long page,
  324. struct kmmio_fault_page **release_list)
  325. {
  326. struct kmmio_fault_page *f;
  327. page &= PAGE_MASK;
  328. f = get_kmmio_fault_page(page);
  329. if (!f)
  330. return;
  331. f->count--;
  332. BUG_ON(f->count < 0);
  333. if (!f->count) {
  334. disarm_kmmio_fault_page(f);
  335. f->release_next = *release_list;
  336. *release_list = f;
  337. }
  338. }
  339. /*
  340. * With page-unaligned ioremaps, one or two armed pages may contain
  341. * addresses from outside the intended mapping. Events for these addresses
  342. * are currently silently dropped. The events may result only from programming
  343. * mistakes by accessing addresses before the beginning or past the end of a
  344. * mapping.
  345. */
  346. int register_kmmio_probe(struct kmmio_probe *p)
  347. {
  348. unsigned long flags;
  349. int ret = 0;
  350. unsigned long size = 0;
  351. const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
  352. spin_lock_irqsave(&kmmio_lock, flags);
  353. if (get_kmmio_probe(p->addr)) {
  354. ret = -EEXIST;
  355. goto out;
  356. }
  357. kmmio_count++;
  358. list_add_rcu(&p->list, &kmmio_probes);
  359. while (size < size_lim) {
  360. if (add_kmmio_fault_page(p->addr + size))
  361. pr_err("kmmio: Unable to set page fault.\n");
  362. size += PAGE_SIZE;
  363. }
  364. out:
  365. spin_unlock_irqrestore(&kmmio_lock, flags);
  366. /*
  367. * XXX: What should I do here?
  368. * Here was a call to global_flush_tlb(), but it does not exist
  369. * anymore. It seems it's not needed after all.
  370. */
  371. return ret;
  372. }
  373. EXPORT_SYMBOL(register_kmmio_probe);
  374. static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
  375. {
  376. struct kmmio_delayed_release *dr = container_of(
  377. head,
  378. struct kmmio_delayed_release,
  379. rcu);
  380. struct kmmio_fault_page *p = dr->release_list;
  381. while (p) {
  382. struct kmmio_fault_page *next = p->release_next;
  383. BUG_ON(p->count);
  384. kfree(p);
  385. p = next;
  386. }
  387. kfree(dr);
  388. }
  389. static void remove_kmmio_fault_pages(struct rcu_head *head)
  390. {
  391. struct kmmio_delayed_release *dr = container_of(
  392. head,
  393. struct kmmio_delayed_release,
  394. rcu);
  395. struct kmmio_fault_page *p = dr->release_list;
  396. struct kmmio_fault_page **prevp = &dr->release_list;
  397. unsigned long flags;
  398. spin_lock_irqsave(&kmmio_lock, flags);
  399. while (p) {
  400. if (!p->count)
  401. list_del_rcu(&p->list);
  402. else
  403. *prevp = p->release_next;
  404. prevp = &p->release_next;
  405. p = p->release_next;
  406. }
  407. spin_unlock_irqrestore(&kmmio_lock, flags);
  408. /* This is the real RCU destroy call. */
  409. call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
  410. }
  411. /*
  412. * Remove a kmmio probe. You have to synchronize_rcu() before you can be
  413. * sure that the callbacks will not be called anymore. Only after that
  414. * you may actually release your struct kmmio_probe.
  415. *
  416. * Unregistering a kmmio fault page has three steps:
  417. * 1. release_kmmio_fault_page()
  418. * Disarm the page, wait a grace period to let all faults finish.
  419. * 2. remove_kmmio_fault_pages()
  420. * Remove the pages from kmmio_page_table.
  421. * 3. rcu_free_kmmio_fault_pages()
  422. * Actally free the kmmio_fault_page structs as with RCU.
  423. */
  424. void unregister_kmmio_probe(struct kmmio_probe *p)
  425. {
  426. unsigned long flags;
  427. unsigned long size = 0;
  428. const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
  429. struct kmmio_fault_page *release_list = NULL;
  430. struct kmmio_delayed_release *drelease;
  431. spin_lock_irqsave(&kmmio_lock, flags);
  432. while (size < size_lim) {
  433. release_kmmio_fault_page(p->addr + size, &release_list);
  434. size += PAGE_SIZE;
  435. }
  436. list_del_rcu(&p->list);
  437. kmmio_count--;
  438. spin_unlock_irqrestore(&kmmio_lock, flags);
  439. drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
  440. if (!drelease) {
  441. pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
  442. return;
  443. }
  444. drelease->release_list = release_list;
  445. /*
  446. * This is not really RCU here. We have just disarmed a set of
  447. * pages so that they cannot trigger page faults anymore. However,
  448. * we cannot remove the pages from kmmio_page_table,
  449. * because a probe hit might be in flight on another CPU. The
  450. * pages are collected into a list, and they will be removed from
  451. * kmmio_page_table when it is certain that no probe hit related to
  452. * these pages can be in flight. RCU grace period sounds like a
  453. * good choice.
  454. *
  455. * If we removed the pages too early, kmmio page fault handler might
  456. * not find the respective kmmio_fault_page and determine it's not
  457. * a kmmio fault, when it actually is. This would lead to madness.
  458. */
  459. call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
  460. }
  461. EXPORT_SYMBOL(unregister_kmmio_probe);
  462. static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
  463. void *args)
  464. {
  465. struct die_args *arg = args;
  466. if (val == DIE_DEBUG && (arg->err & DR_STEP))
  467. if (post_kmmio_handler(arg->err, arg->regs) == 1)
  468. return NOTIFY_STOP;
  469. return NOTIFY_DONE;
  470. }
  471. static struct notifier_block nb_die = {
  472. .notifier_call = kmmio_die_notifier
  473. };
  474. static int __init init_kmmio(void)
  475. {
  476. int i;
  477. for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
  478. INIT_LIST_HEAD(&kmmio_page_table[i]);
  479. return register_die_notifier(&nb_die);
  480. }
  481. fs_initcall(init_kmmio); /* should be before device_initcall() */