kmmio.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. /* Support for MMIO probes.
  2. * Benfit many code from kprobes
  3. * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
  4. * 2007 Alexander Eichner
  5. * 2008 Pekka Paalanen <pq@iki.fi>
  6. */
  7. #include <linux/list.h>
  8. #include <linux/rculist.h>
  9. #include <linux/spinlock.h>
  10. #include <linux/hash.h>
  11. #include <linux/init.h>
  12. #include <linux/module.h>
  13. #include <linux/kernel.h>
  14. #include <linux/uaccess.h>
  15. #include <linux/ptrace.h>
  16. #include <linux/preempt.h>
  17. #include <linux/percpu.h>
  18. #include <linux/kdebug.h>
  19. #include <linux/mutex.h>
  20. #include <linux/io.h>
  21. #include <asm/cacheflush.h>
  22. #include <asm/tlbflush.h>
  23. #include <linux/errno.h>
  24. #include <asm/debugreg.h>
  25. #include <linux/mmiotrace.h>
  26. #define KMMIO_PAGE_HASH_BITS 4
  27. #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
  28. struct kmmio_fault_page {
  29. struct list_head list;
  30. struct kmmio_fault_page *release_next;
  31. unsigned long page; /* location of the fault page */
  32. pteval_t old_presence; /* page presence prior to arming */
  33. bool armed;
  34. /*
  35. * Number of times this page has been registered as a part
  36. * of a probe. If zero, page is disarmed and this may be freed.
  37. * Used only by writers (RCU) and post_kmmio_handler().
  38. * Protected by kmmio_lock, when linked into kmmio_page_table.
  39. */
  40. int count;
  41. };
  42. struct kmmio_delayed_release {
  43. struct rcu_head rcu;
  44. struct kmmio_fault_page *release_list;
  45. };
  46. struct kmmio_context {
  47. struct kmmio_fault_page *fpage;
  48. struct kmmio_probe *probe;
  49. unsigned long saved_flags;
  50. unsigned long addr;
  51. int active;
  52. };
  53. static DEFINE_SPINLOCK(kmmio_lock);
  54. /* Protected by kmmio_lock */
  55. unsigned int kmmio_count;
  56. /* Read-protected by RCU, write-protected by kmmio_lock. */
  57. static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
  58. static LIST_HEAD(kmmio_probes);
  59. static struct list_head *kmmio_page_list(unsigned long page)
  60. {
  61. return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
  62. }
  63. /* Accessed per-cpu */
  64. static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
  65. /*
  66. * this is basically a dynamic stabbing problem:
  67. * Could use the existing prio tree code or
  68. * Possible better implementations:
  69. * The Interval Skip List: A Data Structure for Finding All Intervals That
  70. * Overlap a Point (might be simple)
  71. * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
  72. */
  73. /* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
  74. static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
  75. {
  76. struct kmmio_probe *p;
  77. list_for_each_entry_rcu(p, &kmmio_probes, list) {
  78. if (addr >= p->addr && addr < (p->addr + p->len))
  79. return p;
  80. }
  81. return NULL;
  82. }
  83. /* You must be holding RCU read lock. */
  84. static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
  85. {
  86. struct list_head *head;
  87. struct kmmio_fault_page *f;
  88. page &= PAGE_MASK;
  89. head = kmmio_page_list(page);
  90. list_for_each_entry_rcu(f, head, list) {
  91. if (f->page == page)
  92. return f;
  93. }
  94. return NULL;
  95. }
  96. static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
  97. {
  98. pmdval_t v = pmd_val(*pmd);
  99. if (clear) {
  100. *old = v & _PAGE_PRESENT;
  101. v &= ~_PAGE_PRESENT;
  102. } else /* presume this has been called with clear==true previously */
  103. v |= *old;
  104. set_pmd(pmd, __pmd(v));
  105. }
  106. static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
  107. {
  108. pteval_t v = pte_val(*pte);
  109. if (clear) {
  110. *old = v & _PAGE_PRESENT;
  111. v &= ~_PAGE_PRESENT;
  112. } else /* presume this has been called with clear==true previously */
  113. v |= *old;
  114. set_pte_atomic(pte, __pte(v));
  115. }
  116. static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
  117. {
  118. unsigned int level;
  119. pte_t *pte = lookup_address(f->page, &level);
  120. if (!pte) {
  121. pr_err("kmmio: no pte for page 0x%08lx\n", f->page);
  122. return -1;
  123. }
  124. switch (level) {
  125. case PG_LEVEL_2M:
  126. clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
  127. break;
  128. case PG_LEVEL_4K:
  129. clear_pte_presence(pte, clear, &f->old_presence);
  130. break;
  131. default:
  132. pr_err("kmmio: unexpected page level 0x%x.\n", level);
  133. return -1;
  134. }
  135. __flush_tlb_one(f->page);
  136. return 0;
  137. }
  138. /*
  139. * Mark the given page as not present. Access to it will trigger a fault.
  140. *
  141. * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
  142. * protection is ignored here. RCU read lock is assumed held, so the struct
  143. * will not disappear unexpectedly. Furthermore, the caller must guarantee,
  144. * that double arming the same virtual address (page) cannot occur.
  145. *
  146. * Double disarming on the other hand is allowed, and may occur when a fault
  147. * and mmiotrace shutdown happen simultaneously.
  148. */
  149. static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
  150. {
  151. int ret;
  152. WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
  153. if (f->armed) {
  154. pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
  155. f->page, f->count, !!f->old_presence);
  156. }
  157. ret = clear_page_presence(f, true);
  158. WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
  159. f->armed = true;
  160. return ret;
  161. }
  162. /** Restore the given page to saved presence state. */
  163. static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
  164. {
  165. int ret = clear_page_presence(f, false);
  166. WARN_ONCE(ret < 0,
  167. KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
  168. f->armed = false;
  169. }
  170. /*
  171. * This is being called from do_page_fault().
  172. *
  173. * We may be in an interrupt or a critical section. Also prefecthing may
  174. * trigger a page fault. We may be in the middle of process switch.
  175. * We cannot take any locks, because we could be executing especially
  176. * within a kmmio critical section.
  177. *
  178. * Local interrupts are disabled, so preemption cannot happen.
  179. * Do not enable interrupts, do not sleep, and watch out for other CPUs.
  180. */
  181. /*
  182. * Interrupts are disabled on entry as trap3 is an interrupt gate
  183. * and they remain disabled thorough out this function.
  184. */
  185. int kmmio_handler(struct pt_regs *regs, unsigned long addr)
  186. {
  187. struct kmmio_context *ctx;
  188. struct kmmio_fault_page *faultpage;
  189. int ret = 0; /* default to fault not handled */
  190. /*
  191. * Preemption is now disabled to prevent process switch during
  192. * single stepping. We can only handle one active kmmio trace
  193. * per cpu, so ensure that we finish it before something else
  194. * gets to run. We also hold the RCU read lock over single
  195. * stepping to avoid looking up the probe and kmmio_fault_page
  196. * again.
  197. */
  198. preempt_disable();
  199. rcu_read_lock();
  200. faultpage = get_kmmio_fault_page(addr);
  201. if (!faultpage) {
  202. /*
  203. * Either this page fault is not caused by kmmio, or
  204. * another CPU just pulled the kmmio probe from under
  205. * our feet. The latter case should not be possible.
  206. */
  207. goto no_kmmio;
  208. }
  209. ctx = &get_cpu_var(kmmio_ctx);
  210. if (ctx->active) {
  211. if (addr == ctx->addr) {
  212. /*
  213. * A second fault on the same page means some other
  214. * condition needs handling by do_page_fault(), the
  215. * page really not being present is the most common.
  216. */
  217. pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n",
  218. addr, smp_processor_id());
  219. if (!faultpage->old_presence)
  220. pr_info("kmmio: unexpected secondary hit for "
  221. "address 0x%08lx on CPU %d.\n", addr,
  222. smp_processor_id());
  223. } else {
  224. /*
  225. * Prevent overwriting already in-flight context.
  226. * This should not happen, let's hope disarming at
  227. * least prevents a panic.
  228. */
  229. pr_emerg("kmmio: recursive probe hit on CPU %d, "
  230. "for address 0x%08lx. Ignoring.\n",
  231. smp_processor_id(), addr);
  232. pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
  233. ctx->addr);
  234. disarm_kmmio_fault_page(faultpage);
  235. }
  236. goto no_kmmio_ctx;
  237. }
  238. ctx->active++;
  239. ctx->fpage = faultpage;
  240. ctx->probe = get_kmmio_probe(addr);
  241. ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
  242. ctx->addr = addr;
  243. if (ctx->probe && ctx->probe->pre_handler)
  244. ctx->probe->pre_handler(ctx->probe, regs, addr);
  245. /*
  246. * Enable single-stepping and disable interrupts for the faulting
  247. * context. Local interrupts must not get enabled during stepping.
  248. */
  249. regs->flags |= X86_EFLAGS_TF;
  250. regs->flags &= ~X86_EFLAGS_IF;
  251. /* Now we set present bit in PTE and single step. */
  252. disarm_kmmio_fault_page(ctx->fpage);
  253. /*
  254. * If another cpu accesses the same page while we are stepping,
  255. * the access will not be caught. It will simply succeed and the
  256. * only downside is we lose the event. If this becomes a problem,
  257. * the user should drop to single cpu before tracing.
  258. */
  259. put_cpu_var(kmmio_ctx);
  260. return 1; /* fault handled */
  261. no_kmmio_ctx:
  262. put_cpu_var(kmmio_ctx);
  263. no_kmmio:
  264. rcu_read_unlock();
  265. preempt_enable_no_resched();
  266. return ret;
  267. }
  268. /*
  269. * Interrupts are disabled on entry as trap1 is an interrupt gate
  270. * and they remain disabled thorough out this function.
  271. * This must always get called as the pair to kmmio_handler().
  272. */
  273. static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
  274. {
  275. int ret = 0;
  276. struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
  277. if (!ctx->active) {
  278. /*
  279. * debug traps without an active context are due to either
  280. * something external causing them (f.e. using a debugger while
  281. * mmio tracing enabled), or erroneous behaviour
  282. */
  283. pr_warning("kmmio: unexpected debug trap on CPU %d.\n",
  284. smp_processor_id());
  285. goto out;
  286. }
  287. if (ctx->probe && ctx->probe->post_handler)
  288. ctx->probe->post_handler(ctx->probe, condition, regs);
  289. /* Prevent racing against release_kmmio_fault_page(). */
  290. spin_lock(&kmmio_lock);
  291. if (ctx->fpage->count)
  292. arm_kmmio_fault_page(ctx->fpage);
  293. spin_unlock(&kmmio_lock);
  294. regs->flags &= ~X86_EFLAGS_TF;
  295. regs->flags |= ctx->saved_flags;
  296. /* These were acquired in kmmio_handler(). */
  297. ctx->active--;
  298. BUG_ON(ctx->active);
  299. rcu_read_unlock();
  300. preempt_enable_no_resched();
  301. /*
  302. * if somebody else is singlestepping across a probe point, flags
  303. * will have TF set, in which case, continue the remaining processing
  304. * of do_debug, as if this is not a probe hit.
  305. */
  306. if (!(regs->flags & X86_EFLAGS_TF))
  307. ret = 1;
  308. out:
  309. put_cpu_var(kmmio_ctx);
  310. return ret;
  311. }
  312. /* You must be holding kmmio_lock. */
  313. static int add_kmmio_fault_page(unsigned long page)
  314. {
  315. struct kmmio_fault_page *f;
  316. page &= PAGE_MASK;
  317. f = get_kmmio_fault_page(page);
  318. if (f) {
  319. if (!f->count)
  320. arm_kmmio_fault_page(f);
  321. f->count++;
  322. return 0;
  323. }
  324. f = kzalloc(sizeof(*f), GFP_ATOMIC);
  325. if (!f)
  326. return -1;
  327. f->count = 1;
  328. f->page = page;
  329. if (arm_kmmio_fault_page(f)) {
  330. kfree(f);
  331. return -1;
  332. }
  333. list_add_rcu(&f->list, kmmio_page_list(f->page));
  334. return 0;
  335. }
  336. /* You must be holding kmmio_lock. */
  337. static void release_kmmio_fault_page(unsigned long page,
  338. struct kmmio_fault_page **release_list)
  339. {
  340. struct kmmio_fault_page *f;
  341. page &= PAGE_MASK;
  342. f = get_kmmio_fault_page(page);
  343. if (!f)
  344. return;
  345. f->count--;
  346. BUG_ON(f->count < 0);
  347. if (!f->count) {
  348. disarm_kmmio_fault_page(f);
  349. f->release_next = *release_list;
  350. *release_list = f;
  351. }
  352. }
  353. /*
  354. * With page-unaligned ioremaps, one or two armed pages may contain
  355. * addresses from outside the intended mapping. Events for these addresses
  356. * are currently silently dropped. The events may result only from programming
  357. * mistakes by accessing addresses before the beginning or past the end of a
  358. * mapping.
  359. */
  360. int register_kmmio_probe(struct kmmio_probe *p)
  361. {
  362. unsigned long flags;
  363. int ret = 0;
  364. unsigned long size = 0;
  365. const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
  366. spin_lock_irqsave(&kmmio_lock, flags);
  367. if (get_kmmio_probe(p->addr)) {
  368. ret = -EEXIST;
  369. goto out;
  370. }
  371. kmmio_count++;
  372. list_add_rcu(&p->list, &kmmio_probes);
  373. while (size < size_lim) {
  374. if (add_kmmio_fault_page(p->addr + size))
  375. pr_err("kmmio: Unable to set page fault.\n");
  376. size += PAGE_SIZE;
  377. }
  378. out:
  379. spin_unlock_irqrestore(&kmmio_lock, flags);
  380. /*
  381. * XXX: What should I do here?
  382. * Here was a call to global_flush_tlb(), but it does not exist
  383. * anymore. It seems it's not needed after all.
  384. */
  385. return ret;
  386. }
  387. EXPORT_SYMBOL(register_kmmio_probe);
  388. static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
  389. {
  390. struct kmmio_delayed_release *dr = container_of(
  391. head,
  392. struct kmmio_delayed_release,
  393. rcu);
  394. struct kmmio_fault_page *f = dr->release_list;
  395. while (f) {
  396. struct kmmio_fault_page *next = f->release_next;
  397. BUG_ON(f->count);
  398. kfree(f);
  399. f = next;
  400. }
  401. kfree(dr);
  402. }
  403. static void remove_kmmio_fault_pages(struct rcu_head *head)
  404. {
  405. struct kmmio_delayed_release *dr =
  406. container_of(head, struct kmmio_delayed_release, rcu);
  407. struct kmmio_fault_page *f = dr->release_list;
  408. struct kmmio_fault_page **prevp = &dr->release_list;
  409. unsigned long flags;
  410. spin_lock_irqsave(&kmmio_lock, flags);
  411. while (f) {
  412. if (!f->count) {
  413. list_del_rcu(&f->list);
  414. prevp = &f->release_next;
  415. } else {
  416. *prevp = f->release_next;
  417. }
  418. f = f->release_next;
  419. }
  420. spin_unlock_irqrestore(&kmmio_lock, flags);
  421. /* This is the real RCU destroy call. */
  422. call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
  423. }
  424. /*
  425. * Remove a kmmio probe. You have to synchronize_rcu() before you can be
  426. * sure that the callbacks will not be called anymore. Only after that
  427. * you may actually release your struct kmmio_probe.
  428. *
  429. * Unregistering a kmmio fault page has three steps:
  430. * 1. release_kmmio_fault_page()
  431. * Disarm the page, wait a grace period to let all faults finish.
  432. * 2. remove_kmmio_fault_pages()
  433. * Remove the pages from kmmio_page_table.
  434. * 3. rcu_free_kmmio_fault_pages()
  435. * Actally free the kmmio_fault_page structs as with RCU.
  436. */
  437. void unregister_kmmio_probe(struct kmmio_probe *p)
  438. {
  439. unsigned long flags;
  440. unsigned long size = 0;
  441. const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
  442. struct kmmio_fault_page *release_list = NULL;
  443. struct kmmio_delayed_release *drelease;
  444. spin_lock_irqsave(&kmmio_lock, flags);
  445. while (size < size_lim) {
  446. release_kmmio_fault_page(p->addr + size, &release_list);
  447. size += PAGE_SIZE;
  448. }
  449. list_del_rcu(&p->list);
  450. kmmio_count--;
  451. spin_unlock_irqrestore(&kmmio_lock, flags);
  452. drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
  453. if (!drelease) {
  454. pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
  455. return;
  456. }
  457. drelease->release_list = release_list;
  458. /*
  459. * This is not really RCU here. We have just disarmed a set of
  460. * pages so that they cannot trigger page faults anymore. However,
  461. * we cannot remove the pages from kmmio_page_table,
  462. * because a probe hit might be in flight on another CPU. The
  463. * pages are collected into a list, and they will be removed from
  464. * kmmio_page_table when it is certain that no probe hit related to
  465. * these pages can be in flight. RCU grace period sounds like a
  466. * good choice.
  467. *
  468. * If we removed the pages too early, kmmio page fault handler might
  469. * not find the respective kmmio_fault_page and determine it's not
  470. * a kmmio fault, when it actually is. This would lead to madness.
  471. */
  472. call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
  473. }
  474. EXPORT_SYMBOL(unregister_kmmio_probe);
  475. static int
  476. kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
  477. {
  478. struct die_args *arg = args;
  479. if (val == DIE_DEBUG && (arg->err & DR_STEP))
  480. if (post_kmmio_handler(arg->err, arg->regs) == 1)
  481. return NOTIFY_STOP;
  482. return NOTIFY_DONE;
  483. }
  484. static struct notifier_block nb_die = {
  485. .notifier_call = kmmio_die_notifier
  486. };
  487. int kmmio_init(void)
  488. {
  489. int i;
  490. for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
  491. INIT_LIST_HEAD(&kmmio_page_table[i]);
  492. return register_die_notifier(&nb_die);
  493. }
  494. void kmmio_cleanup(void)
  495. {
  496. int i;
  497. unregister_die_notifier(&nb_die);
  498. for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
  499. WARN_ONCE(!list_empty(&kmmio_page_table[i]),
  500. KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
  501. }
  502. }