kmmio.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. /* Support for MMIO probes.
  2. * Benfit many code from kprobes
  3. * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
  4. * 2007 Alexander Eichner
  5. * 2008 Pekka Paalanen <pq@iki.fi>
  6. */
  7. #include <linux/version.h>
  8. #include <linux/list.h>
  9. #include <linux/spinlock.h>
  10. #include <linux/hash.h>
  11. #include <linux/init.h>
  12. #include <linux/module.h>
  13. #include <linux/slab.h>
  14. #include <linux/kernel.h>
  15. #include <linux/mm.h>
  16. #include <linux/uaccess.h>
  17. #include <linux/ptrace.h>
  18. #include <linux/preempt.h>
  19. #include <linux/percpu.h>
  20. #include <linux/kdebug.h>
  21. #include <asm/io.h>
  22. #include <asm/cacheflush.h>
  23. #include <asm/errno.h>
  24. #include <asm/tlbflush.h>
  25. #include <asm/pgtable.h>
  26. #include <linux/mmiotrace.h>
  27. #define KMMIO_PAGE_HASH_BITS 4
  28. #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
  29. struct kmmio_fault_page {
  30. struct list_head list;
  31. struct kmmio_fault_page *release_next;
  32. unsigned long page; /* location of the fault page */
  33. /*
  34. * Number of times this page has been registered as a part
  35. * of a probe. If zero, page is disarmed and this may be freed.
  36. * Used only by writers (RCU).
  37. */
  38. int count;
  39. };
  40. struct kmmio_delayed_release {
  41. struct rcu_head rcu;
  42. struct kmmio_fault_page *release_list;
  43. };
  44. struct kmmio_context {
  45. struct kmmio_fault_page *fpage;
  46. struct kmmio_probe *probe;
  47. unsigned long saved_flags;
  48. unsigned long addr;
  49. int active;
  50. };
  51. static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
  52. void *args);
  53. static DECLARE_MUTEX(kmmio_init_mutex);
  54. static DEFINE_SPINLOCK(kmmio_lock);
  55. /* These are protected by kmmio_lock */
  56. static int kmmio_initialized;
  57. unsigned int kmmio_count;
  58. /* Read-protected by RCU, write-protected by kmmio_lock. */
  59. static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
  60. static LIST_HEAD(kmmio_probes);
  61. static struct list_head *kmmio_page_list(unsigned long page)
  62. {
  63. return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
  64. }
  65. /* Accessed per-cpu */
  66. static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
  67. /* protected by kmmio_init_mutex */
  68. static struct notifier_block nb_die = {
  69. .notifier_call = kmmio_die_notifier
  70. };
  71. /**
  72. * Makes sure kmmio is initialized and usable.
  73. * This must be called before any other kmmio function defined here.
  74. * May sleep.
  75. */
  76. void reference_kmmio(void)
  77. {
  78. down(&kmmio_init_mutex);
  79. spin_lock_irq(&kmmio_lock);
  80. if (!kmmio_initialized) {
  81. int i;
  82. for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
  83. INIT_LIST_HEAD(&kmmio_page_table[i]);
  84. if (register_die_notifier(&nb_die))
  85. BUG();
  86. }
  87. kmmio_initialized++;
  88. spin_unlock_irq(&kmmio_lock);
  89. up(&kmmio_init_mutex);
  90. }
  91. EXPORT_SYMBOL_GPL(reference_kmmio);
  92. /**
  93. * Clean up kmmio after use. This must be called for every call to
  94. * reference_kmmio(). All probes registered after the corresponding
  95. * reference_kmmio() must have been unregistered when calling this.
  96. * May sleep.
  97. */
  98. void unreference_kmmio(void)
  99. {
  100. bool unreg = false;
  101. down(&kmmio_init_mutex);
  102. spin_lock_irq(&kmmio_lock);
  103. if (kmmio_initialized == 1) {
  104. BUG_ON(is_kmmio_active());
  105. unreg = true;
  106. }
  107. kmmio_initialized--;
  108. BUG_ON(kmmio_initialized < 0);
  109. spin_unlock_irq(&kmmio_lock);
  110. if (unreg)
  111. unregister_die_notifier(&nb_die); /* calls sync_rcu() */
  112. up(&kmmio_init_mutex);
  113. }
  114. EXPORT_SYMBOL(unreference_kmmio);
  115. /*
  116. * this is basically a dynamic stabbing problem:
  117. * Could use the existing prio tree code or
  118. * Possible better implementations:
  119. * The Interval Skip List: A Data Structure for Finding All Intervals That
  120. * Overlap a Point (might be simple)
  121. * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
  122. */
  123. /* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
  124. static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
  125. {
  126. struct kmmio_probe *p;
  127. list_for_each_entry_rcu(p, &kmmio_probes, list) {
  128. if (addr >= p->addr && addr <= (p->addr + p->len))
  129. return p;
  130. }
  131. return NULL;
  132. }
  133. /* You must be holding RCU read lock. */
  134. static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
  135. {
  136. struct list_head *head;
  137. struct kmmio_fault_page *p;
  138. page &= PAGE_MASK;
  139. head = kmmio_page_list(page);
  140. list_for_each_entry_rcu(p, head, list) {
  141. if (p->page == page)
  142. return p;
  143. }
  144. return NULL;
  145. }
  146. /** Mark the given page as not present. Access to it will trigger a fault. */
  147. static void arm_kmmio_fault_page(unsigned long page, int *page_level)
  148. {
  149. unsigned long address = page & PAGE_MASK;
  150. int level;
  151. pte_t *pte = lookup_address(address, &level);
  152. if (!pte) {
  153. pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
  154. __func__, page);
  155. return;
  156. }
  157. if (level == PG_LEVEL_2M) {
  158. pmd_t *pmd = (pmd_t *)pte;
  159. set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_PRESENT));
  160. } else {
  161. /* PG_LEVEL_4K */
  162. set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
  163. }
  164. if (page_level)
  165. *page_level = level;
  166. __flush_tlb_one(page);
  167. }
  168. /** Mark the given page as present. */
  169. static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
  170. {
  171. unsigned long address = page & PAGE_MASK;
  172. int level;
  173. pte_t *pte = lookup_address(address, &level);
  174. if (!pte) {
  175. pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
  176. __func__, page);
  177. return;
  178. }
  179. if (level == PG_LEVEL_2M) {
  180. pmd_t *pmd = (pmd_t *)pte;
  181. set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_PRESENT));
  182. } else {
  183. /* PG_LEVEL_4K */
  184. set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
  185. }
  186. if (page_level)
  187. *page_level = level;
  188. __flush_tlb_one(page);
  189. }
  190. /*
  191. * This is being called from do_page_fault().
  192. *
  193. * We may be in an interrupt or a critical section. Also prefecthing may
  194. * trigger a page fault. We may be in the middle of process switch.
  195. * We cannot take any locks, because we could be executing especially
  196. * within a kmmio critical section.
  197. *
  198. * Local interrupts are disabled, so preemption cannot happen.
  199. * Do not enable interrupts, do not sleep, and watch out for other CPUs.
  200. */
  201. /*
  202. * Interrupts are disabled on entry as trap3 is an interrupt gate
  203. * and they remain disabled thorough out this function.
  204. */
  205. int kmmio_handler(struct pt_regs *regs, unsigned long addr)
  206. {
  207. struct kmmio_context *ctx;
  208. struct kmmio_fault_page *faultpage;
  209. /*
  210. * Preemption is now disabled to prevent process switch during
  211. * single stepping. We can only handle one active kmmio trace
  212. * per cpu, so ensure that we finish it before something else
  213. * gets to run.
  214. *
  215. * XXX what if an interrupt occurs between returning from
  216. * do_page_fault() and entering the single-step exception handler?
  217. * And that interrupt triggers a kmmio trap?
  218. * XXX If we tracing an interrupt service routine or whatever, is
  219. * this enough to keep it on the current cpu?
  220. */
  221. preempt_disable();
  222. rcu_read_lock();
  223. faultpage = get_kmmio_fault_page(addr);
  224. if (!faultpage) {
  225. /*
  226. * Either this page fault is not caused by kmmio, or
  227. * another CPU just pulled the kmmio probe from under
  228. * our feet. In the latter case all hell breaks loose.
  229. */
  230. goto no_kmmio;
  231. }
  232. ctx = &get_cpu_var(kmmio_ctx);
  233. if (ctx->active) {
  234. /*
  235. * Prevent overwriting already in-flight context.
  236. * If this page fault really was due to kmmio trap,
  237. * all hell breaks loose.
  238. */
  239. pr_emerg("kmmio: recursive probe hit on CPU %d, "
  240. "for address 0x%08lx. Ignoring.\n",
  241. smp_processor_id(), addr);
  242. goto no_kmmio_ctx;
  243. }
  244. ctx->active++;
  245. ctx->fpage = faultpage;
  246. ctx->probe = get_kmmio_probe(addr);
  247. ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK));
  248. ctx->addr = addr;
  249. if (ctx->probe && ctx->probe->pre_handler)
  250. ctx->probe->pre_handler(ctx->probe, regs, addr);
  251. regs->flags |= TF_MASK;
  252. regs->flags &= ~IF_MASK;
  253. /* Now we set present bit in PTE and single step. */
  254. disarm_kmmio_fault_page(ctx->fpage->page, NULL);
  255. put_cpu_var(kmmio_ctx);
  256. rcu_read_unlock();
  257. return 1;
  258. no_kmmio_ctx:
  259. put_cpu_var(kmmio_ctx);
  260. no_kmmio:
  261. rcu_read_unlock();
  262. preempt_enable_no_resched();
  263. return 0; /* page fault not handled by kmmio */
  264. }
  265. /*
  266. * Interrupts are disabled on entry as trap1 is an interrupt gate
  267. * and they remain disabled thorough out this function.
  268. * This must always get called as the pair to kmmio_handler().
  269. */
  270. static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
  271. {
  272. int ret = 0;
  273. struct kmmio_probe *probe;
  274. struct kmmio_fault_page *faultpage;
  275. struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
  276. if (!ctx->active)
  277. goto out;
  278. rcu_read_lock();
  279. faultpage = get_kmmio_fault_page(ctx->addr);
  280. probe = get_kmmio_probe(ctx->addr);
  281. if (faultpage != ctx->fpage || probe != ctx->probe) {
  282. /*
  283. * The trace setup changed after kmmio_handler() and before
  284. * running this respective post handler. User does not want
  285. * the result anymore.
  286. */
  287. ctx->probe = NULL;
  288. ctx->fpage = NULL;
  289. }
  290. if (ctx->probe && ctx->probe->post_handler)
  291. ctx->probe->post_handler(ctx->probe, condition, regs);
  292. if (ctx->fpage)
  293. arm_kmmio_fault_page(ctx->fpage->page, NULL);
  294. regs->flags &= ~TF_MASK;
  295. regs->flags |= ctx->saved_flags;
  296. /* These were acquired in kmmio_handler(). */
  297. ctx->active--;
  298. BUG_ON(ctx->active);
  299. preempt_enable_no_resched();
  300. /*
  301. * if somebody else is singlestepping across a probe point, flags
  302. * will have TF set, in which case, continue the remaining processing
  303. * of do_debug, as if this is not a probe hit.
  304. */
  305. if (!(regs->flags & TF_MASK))
  306. ret = 1;
  307. rcu_read_unlock();
  308. out:
  309. put_cpu_var(kmmio_ctx);
  310. return ret;
  311. }
  312. /* You must be holding kmmio_lock. */
  313. static int add_kmmio_fault_page(unsigned long page)
  314. {
  315. struct kmmio_fault_page *f;
  316. page &= PAGE_MASK;
  317. f = get_kmmio_fault_page(page);
  318. if (f) {
  319. if (!f->count)
  320. arm_kmmio_fault_page(f->page, NULL);
  321. f->count++;
  322. return 0;
  323. }
  324. f = kmalloc(sizeof(*f), GFP_ATOMIC);
  325. if (!f)
  326. return -1;
  327. f->count = 1;
  328. f->page = page;
  329. list_add_rcu(&f->list, kmmio_page_list(f->page));
  330. arm_kmmio_fault_page(f->page, NULL);
  331. return 0;
  332. }
  333. /* You must be holding kmmio_lock. */
  334. static void release_kmmio_fault_page(unsigned long page,
  335. struct kmmio_fault_page **release_list)
  336. {
  337. struct kmmio_fault_page *f;
  338. page &= PAGE_MASK;
  339. f = get_kmmio_fault_page(page);
  340. if (!f)
  341. return;
  342. f->count--;
  343. BUG_ON(f->count < 0);
  344. if (!f->count) {
  345. disarm_kmmio_fault_page(f->page, NULL);
  346. f->release_next = *release_list;
  347. *release_list = f;
  348. }
  349. }
  350. int register_kmmio_probe(struct kmmio_probe *p)
  351. {
  352. int ret = 0;
  353. unsigned long size = 0;
  354. spin_lock_irq(&kmmio_lock);
  355. kmmio_count++;
  356. if (get_kmmio_probe(p->addr)) {
  357. ret = -EEXIST;
  358. goto out;
  359. }
  360. list_add_rcu(&p->list, &kmmio_probes);
  361. while (size < p->len) {
  362. if (add_kmmio_fault_page(p->addr + size))
  363. pr_err("kmmio: Unable to set page fault.\n");
  364. size += PAGE_SIZE;
  365. }
  366. out:
  367. spin_unlock_irq(&kmmio_lock);
  368. /*
  369. * XXX: What should I do here?
  370. * Here was a call to global_flush_tlb(), but it does not exist
  371. * anymore. It seems it's not needed after all.
  372. */
  373. return ret;
  374. }
  375. EXPORT_SYMBOL(register_kmmio_probe);
  376. static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
  377. {
  378. struct kmmio_delayed_release *dr = container_of(
  379. head,
  380. struct kmmio_delayed_release,
  381. rcu);
  382. struct kmmio_fault_page *p = dr->release_list;
  383. while (p) {
  384. struct kmmio_fault_page *next = p->release_next;
  385. BUG_ON(p->count);
  386. kfree(p);
  387. p = next;
  388. }
  389. kfree(dr);
  390. }
  391. static void remove_kmmio_fault_pages(struct rcu_head *head)
  392. {
  393. struct kmmio_delayed_release *dr = container_of(
  394. head,
  395. struct kmmio_delayed_release,
  396. rcu);
  397. struct kmmio_fault_page *p = dr->release_list;
  398. struct kmmio_fault_page **prevp = &dr->release_list;
  399. unsigned long flags;
  400. spin_lock_irqsave(&kmmio_lock, flags);
  401. while (p) {
  402. if (!p->count)
  403. list_del_rcu(&p->list);
  404. else
  405. *prevp = p->release_next;
  406. prevp = &p->release_next;
  407. p = p->release_next;
  408. }
  409. spin_unlock_irqrestore(&kmmio_lock, flags);
  410. /* This is the real RCU destroy call. */
  411. call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
  412. }
  413. /*
  414. * Remove a kmmio probe. You have to synchronize_rcu() before you can be
  415. * sure that the callbacks will not be called anymore.
  416. *
  417. * Unregistering a kmmio fault page has three steps:
  418. * 1. release_kmmio_fault_page()
  419. * Disarm the page, wait a grace period to let all faults finish.
  420. * 2. remove_kmmio_fault_pages()
  421. * Remove the pages from kmmio_page_table.
  422. * 3. rcu_free_kmmio_fault_pages()
  423. * Actally free the kmmio_fault_page structs as with RCU.
  424. */
  425. void unregister_kmmio_probe(struct kmmio_probe *p)
  426. {
  427. unsigned long size = 0;
  428. struct kmmio_fault_page *release_list = NULL;
  429. struct kmmio_delayed_release *drelease;
  430. spin_lock_irq(&kmmio_lock);
  431. while (size < p->len) {
  432. release_kmmio_fault_page(p->addr + size, &release_list);
  433. size += PAGE_SIZE;
  434. }
  435. list_del_rcu(&p->list);
  436. kmmio_count--;
  437. spin_unlock_irq(&kmmio_lock);
  438. drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
  439. if (!drelease) {
  440. pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
  441. return;
  442. }
  443. drelease->release_list = release_list;
  444. /*
  445. * This is not really RCU here. We have just disarmed a set of
  446. * pages so that they cannot trigger page faults anymore. However,
  447. * we cannot remove the pages from kmmio_page_table,
  448. * because a probe hit might be in flight on another CPU. The
  449. * pages are collected into a list, and they will be removed from
  450. * kmmio_page_table when it is certain that no probe hit related to
  451. * these pages can be in flight. RCU grace period sounds like a
  452. * good choice.
  453. *
  454. * If we removed the pages too early, kmmio page fault handler might
  455. * not find the respective kmmio_fault_page and determine it's not
  456. * a kmmio fault, when it actually is. This would lead to madness.
  457. */
  458. call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
  459. }
  460. EXPORT_SYMBOL(unregister_kmmio_probe);
  461. static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
  462. void *args)
  463. {
  464. struct die_args *arg = args;
  465. if (val == DIE_DEBUG)
  466. if (post_kmmio_handler(arg->err, arg->regs) == 1)
  467. return NOTIFY_STOP;
  468. return NOTIFY_DONE;
  469. }