nmi.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. /*
  2. * Copyright (C) 1991, 1992 Linus Torvalds
  3. * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
  4. * Copyright (C) 2011 Don Zickus Red Hat, Inc.
  5. *
  6. * Pentium III FXSR, SSE support
  7. * Gareth Hughes <gareth@valinux.com>, May 2000
  8. */
  9. /*
  10. * Handle hardware traps and faults.
  11. */
  12. #include <linux/spinlock.h>
  13. #include <linux/kprobes.h>
  14. #include <linux/kdebug.h>
  15. #include <linux/nmi.h>
  16. #include <linux/delay.h>
  17. #include <linux/hardirq.h>
  18. #include <linux/slab.h>
  19. #include <linux/export.h>
  20. #if defined(CONFIG_EDAC)
  21. #include <linux/edac.h>
  22. #endif
  23. #include <linux/atomic.h>
  24. #include <asm/traps.h>
  25. #include <asm/mach_traps.h>
  26. #include <asm/nmi.h>
  27. #include <asm/x86_init.h>
  28. #define NMI_MAX_NAMELEN 16
  29. struct nmiaction {
  30. struct list_head list;
  31. nmi_handler_t handler;
  32. unsigned int flags;
  33. char *name;
  34. };
  35. struct nmi_desc {
  36. spinlock_t lock;
  37. struct list_head head;
  38. };
  39. static struct nmi_desc nmi_desc[NMI_MAX] =
  40. {
  41. {
  42. .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
  43. .head = LIST_HEAD_INIT(nmi_desc[0].head),
  44. },
  45. {
  46. .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
  47. .head = LIST_HEAD_INIT(nmi_desc[1].head),
  48. },
  49. };
  50. struct nmi_stats {
  51. unsigned int normal;
  52. unsigned int unknown;
  53. unsigned int external;
  54. unsigned int swallow;
  55. };
  56. static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
  57. static int ignore_nmis;
  58. int unknown_nmi_panic;
  59. /*
  60. * Prevent NMI reason port (0x61) being accessed simultaneously, can
  61. * only be used in NMI handler.
  62. */
  63. static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
  64. static int __init setup_unknown_nmi_panic(char *str)
  65. {
  66. unknown_nmi_panic = 1;
  67. return 1;
  68. }
  69. __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
  70. #define nmi_to_desc(type) (&nmi_desc[type])
  71. static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
  72. {
  73. struct nmi_desc *desc = nmi_to_desc(type);
  74. struct nmiaction *a;
  75. int handled=0;
  76. rcu_read_lock();
  77. /*
  78. * NMIs are edge-triggered, which means if you have enough
  79. * of them concurrently, you can lose some because only one
  80. * can be latched at any given time. Walk the whole list
  81. * to handle those situations.
  82. */
  83. list_for_each_entry_rcu(a, &desc->head, list)
  84. handled += a->handler(type, regs);
  85. rcu_read_unlock();
  86. /* return total number of NMI events handled */
  87. return handled;
  88. }
  89. static int __setup_nmi(unsigned int type, struct nmiaction *action)
  90. {
  91. struct nmi_desc *desc = nmi_to_desc(type);
  92. unsigned long flags;
  93. spin_lock_irqsave(&desc->lock, flags);
  94. /*
  95. * most handlers of type NMI_UNKNOWN never return because
  96. * they just assume the NMI is theirs. Just a sanity check
  97. * to manage expectations
  98. */
  99. WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
  100. /*
  101. * some handlers need to be executed first otherwise a fake
  102. * event confuses some handlers (kdump uses this flag)
  103. */
  104. if (action->flags & NMI_FLAG_FIRST)
  105. list_add_rcu(&action->list, &desc->head);
  106. else
  107. list_add_tail_rcu(&action->list, &desc->head);
  108. spin_unlock_irqrestore(&desc->lock, flags);
  109. return 0;
  110. }
  111. static struct nmiaction *__free_nmi(unsigned int type, const char *name)
  112. {
  113. struct nmi_desc *desc = nmi_to_desc(type);
  114. struct nmiaction *n;
  115. unsigned long flags;
  116. spin_lock_irqsave(&desc->lock, flags);
  117. list_for_each_entry_rcu(n, &desc->head, list) {
  118. /*
  119. * the name passed in to describe the nmi handler
  120. * is used as the lookup key
  121. */
  122. if (!strcmp(n->name, name)) {
  123. WARN(in_nmi(),
  124. "Trying to free NMI (%s) from NMI context!\n", n->name);
  125. list_del_rcu(&n->list);
  126. break;
  127. }
  128. }
  129. spin_unlock_irqrestore(&desc->lock, flags);
  130. synchronize_rcu();
  131. return (n);
  132. }
  133. int register_nmi_handler(unsigned int type, nmi_handler_t handler,
  134. unsigned long nmiflags, const char *devname)
  135. {
  136. struct nmiaction *action;
  137. int retval = -ENOMEM;
  138. if (!handler)
  139. return -EINVAL;
  140. action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
  141. if (!action)
  142. goto fail_action;
  143. action->handler = handler;
  144. action->flags = nmiflags;
  145. action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
  146. if (!action->name)
  147. goto fail_action_name;
  148. retval = __setup_nmi(type, action);
  149. if (retval)
  150. goto fail_setup_nmi;
  151. return retval;
  152. fail_setup_nmi:
  153. kfree(action->name);
  154. fail_action_name:
  155. kfree(action);
  156. fail_action:
  157. return retval;
  158. }
  159. EXPORT_SYMBOL_GPL(register_nmi_handler);
  160. void unregister_nmi_handler(unsigned int type, const char *name)
  161. {
  162. struct nmiaction *a;
  163. a = __free_nmi(type, name);
  164. if (a) {
  165. kfree(a->name);
  166. kfree(a);
  167. }
  168. }
  169. EXPORT_SYMBOL_GPL(unregister_nmi_handler);
  170. static notrace __kprobes void
  171. pci_serr_error(unsigned char reason, struct pt_regs *regs)
  172. {
  173. pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
  174. reason, smp_processor_id());
  175. /*
  176. * On some machines, PCI SERR line is used to report memory
  177. * errors. EDAC makes use of it.
  178. */
  179. #if defined(CONFIG_EDAC)
  180. if (edac_handler_set()) {
  181. edac_atomic_assert_error();
  182. return;
  183. }
  184. #endif
  185. if (panic_on_unrecovered_nmi)
  186. panic("NMI: Not continuing");
  187. pr_emerg("Dazed and confused, but trying to continue\n");
  188. /* Clear and disable the PCI SERR error line. */
  189. reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
  190. outb(reason, NMI_REASON_PORT);
  191. }
  192. static notrace __kprobes void
  193. io_check_error(unsigned char reason, struct pt_regs *regs)
  194. {
  195. unsigned long i;
  196. pr_emerg(
  197. "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
  198. reason, smp_processor_id());
  199. show_registers(regs);
  200. if (panic_on_io_nmi)
  201. panic("NMI IOCK error: Not continuing");
  202. /* Re-enable the IOCK line, wait for a few seconds */
  203. reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
  204. outb(reason, NMI_REASON_PORT);
  205. i = 20000;
  206. while (--i) {
  207. touch_nmi_watchdog();
  208. udelay(100);
  209. }
  210. reason &= ~NMI_REASON_CLEAR_IOCHK;
  211. outb(reason, NMI_REASON_PORT);
  212. }
  213. static notrace __kprobes void
  214. unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
  215. {
  216. int handled;
  217. /*
  218. * Use 'false' as back-to-back NMIs are dealt with one level up.
  219. * Of course this makes having multiple 'unknown' handlers useless
  220. * as only the first one is ever run (unless it can actually determine
  221. * if it caused the NMI)
  222. */
  223. handled = nmi_handle(NMI_UNKNOWN, regs, false);
  224. if (handled) {
  225. __this_cpu_add(nmi_stats.unknown, handled);
  226. return;
  227. }
  228. __this_cpu_add(nmi_stats.unknown, 1);
  229. pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
  230. reason, smp_processor_id());
  231. pr_emerg("Do you have a strange power saving mode enabled?\n");
  232. if (unknown_nmi_panic || panic_on_unrecovered_nmi)
  233. panic("NMI: Not continuing");
  234. pr_emerg("Dazed and confused, but trying to continue\n");
  235. }
  236. static DEFINE_PER_CPU(bool, swallow_nmi);
  237. static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
  238. static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
  239. {
  240. unsigned char reason = 0;
  241. int handled;
  242. bool b2b = false;
  243. /*
  244. * CPU-specific NMI must be processed before non-CPU-specific
  245. * NMI, otherwise we may lose it, because the CPU-specific
  246. * NMI can not be detected/processed on other CPUs.
  247. */
  248. /*
  249. * Back-to-back NMIs are interesting because they can either
  250. * be two NMI or more than two NMIs (any thing over two is dropped
  251. * due to NMI being edge-triggered). If this is the second half
  252. * of the back-to-back NMI, assume we dropped things and process
  253. * more handlers. Otherwise reset the 'swallow' NMI behaviour
  254. */
  255. if (regs->ip == __this_cpu_read(last_nmi_rip))
  256. b2b = true;
  257. else
  258. __this_cpu_write(swallow_nmi, false);
  259. __this_cpu_write(last_nmi_rip, regs->ip);
  260. handled = nmi_handle(NMI_LOCAL, regs, b2b);
  261. __this_cpu_add(nmi_stats.normal, handled);
  262. if (handled) {
  263. /*
  264. * There are cases when a NMI handler handles multiple
  265. * events in the current NMI. One of these events may
  266. * be queued for in the next NMI. Because the event is
  267. * already handled, the next NMI will result in an unknown
  268. * NMI. Instead lets flag this for a potential NMI to
  269. * swallow.
  270. */
  271. if (handled > 1)
  272. __this_cpu_write(swallow_nmi, true);
  273. return;
  274. }
  275. /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
  276. raw_spin_lock(&nmi_reason_lock);
  277. reason = x86_platform.get_nmi_reason();
  278. if (reason & NMI_REASON_MASK) {
  279. if (reason & NMI_REASON_SERR)
  280. pci_serr_error(reason, regs);
  281. else if (reason & NMI_REASON_IOCHK)
  282. io_check_error(reason, regs);
  283. #ifdef CONFIG_X86_32
  284. /*
  285. * Reassert NMI in case it became active
  286. * meanwhile as it's edge-triggered:
  287. */
  288. reassert_nmi();
  289. #endif
  290. __this_cpu_add(nmi_stats.external, 1);
  291. raw_spin_unlock(&nmi_reason_lock);
  292. return;
  293. }
  294. raw_spin_unlock(&nmi_reason_lock);
  295. /*
  296. * Only one NMI can be latched at a time. To handle
  297. * this we may process multiple nmi handlers at once to
  298. * cover the case where an NMI is dropped. The downside
  299. * to this approach is we may process an NMI prematurely,
  300. * while its real NMI is sitting latched. This will cause
  301. * an unknown NMI on the next run of the NMI processing.
  302. *
  303. * We tried to flag that condition above, by setting the
  304. * swallow_nmi flag when we process more than one event.
  305. * This condition is also only present on the second half
  306. * of a back-to-back NMI, so we flag that condition too.
  307. *
  308. * If both are true, we assume we already processed this
  309. * NMI previously and we swallow it. Otherwise we reset
  310. * the logic.
  311. *
  312. * There are scenarios where we may accidentally swallow
  313. * a 'real' unknown NMI. For example, while processing
  314. * a perf NMI another perf NMI comes in along with a
  315. * 'real' unknown NMI. These two NMIs get combined into
  316. * one (as descibed above). When the next NMI gets
  317. * processed, it will be flagged by perf as handled, but
  318. * noone will know that there was a 'real' unknown NMI sent
  319. * also. As a result it gets swallowed. Or if the first
  320. * perf NMI returns two events handled then the second
  321. * NMI will get eaten by the logic below, again losing a
  322. * 'real' unknown NMI. But this is the best we can do
  323. * for now.
  324. */
  325. if (b2b && __this_cpu_read(swallow_nmi))
  326. __this_cpu_add(nmi_stats.swallow, 1);
  327. else
  328. unknown_nmi_error(reason, regs);
  329. }
  330. /*
  331. * NMIs can hit breakpoints which will cause it to lose its
  332. * NMI context with the CPU when the breakpoint does an iret.
  333. */
  334. #ifdef CONFIG_X86_32
  335. /*
  336. * For i386, NMIs use the same stack as the kernel, and we can
  337. * add a workaround to the iret problem in C. Simply have 3 states
  338. * the NMI can be in.
  339. *
  340. * 1) not running
  341. * 2) executing
  342. * 3) latched
  343. *
  344. * When no NMI is in progress, it is in the "not running" state.
  345. * When an NMI comes in, it goes into the "executing" state.
  346. * Normally, if another NMI is triggered, it does not interrupt
  347. * the running NMI and the HW will simply latch it so that when
  348. * the first NMI finishes, it will restart the second NMI.
  349. * (Note, the latch is binary, thus multiple NMIs triggering,
  350. * when one is running, are ignored. Only one NMI is restarted.)
  351. *
  352. * If an NMI hits a breakpoint that executes an iret, another
  353. * NMI can preempt it. We do not want to allow this new NMI
  354. * to run, but we want to execute it when the first one finishes.
  355. * We set the state to "latched", and the first NMI will perform
  356. * an cmpxchg on the state, and if it doesn't successfully
  357. * reset the state to "not running" it will restart the next
  358. * NMI.
  359. */
  360. enum nmi_states {
  361. NMI_NOT_RUNNING,
  362. NMI_EXECUTING,
  363. NMI_LATCHED,
  364. };
  365. static DEFINE_PER_CPU(enum nmi_states, nmi_state);
  366. #define nmi_nesting_preprocess(regs) \
  367. do { \
  368. if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \
  369. __get_cpu_var(nmi_state) = NMI_LATCHED; \
  370. return; \
  371. } \
  372. nmi_restart: \
  373. __get_cpu_var(nmi_state) = NMI_EXECUTING; \
  374. } while (0)
  375. #define nmi_nesting_postprocess() \
  376. do { \
  377. if (cmpxchg(&__get_cpu_var(nmi_state), \
  378. NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \
  379. goto nmi_restart; \
  380. } while (0)
  381. #else /* x86_64 */
  382. /*
  383. * In x86_64 things are a bit more difficult. This has the same problem
  384. * where an NMI hitting a breakpoint that calls iret will remove the
  385. * NMI context, allowing a nested NMI to enter. What makes this more
  386. * difficult is that both NMIs and breakpoints have their own stack.
  387. * When a new NMI or breakpoint is executed, the stack is set to a fixed
  388. * point. If an NMI is nested, it will have its stack set at that same
  389. * fixed address that the first NMI had, and will start corrupting the
  390. * stack. This is handled in entry_64.S, but the same problem exists with
  391. * the breakpoint stack.
  392. *
  393. * If a breakpoint is being processed, and the debug stack is being used,
  394. * if an NMI comes in and also hits a breakpoint, the stack pointer
  395. * will be set to the same fixed address as the breakpoint that was
  396. * interrupted, causing that stack to be corrupted. To handle this case,
  397. * check if the stack that was interrupted is the debug stack, and if
  398. * so, change the IDT so that new breakpoints will use the current stack
  399. * and not switch to the fixed address. On return of the NMI, switch back
  400. * to the original IDT.
  401. */
  402. static DEFINE_PER_CPU(int, update_debug_stack);
  403. static inline void nmi_nesting_preprocess(struct pt_regs *regs)
  404. {
  405. /*
  406. * If we interrupted a breakpoint, it is possible that
  407. * the nmi handler will have breakpoints too. We need to
  408. * change the IDT such that breakpoints that happen here
  409. * continue to use the NMI stack.
  410. */
  411. if (unlikely(is_debug_stack(regs->sp))) {
  412. debug_stack_set_zero();
  413. __get_cpu_var(update_debug_stack) = 1;
  414. }
  415. }
  416. static inline void nmi_nesting_postprocess(void)
  417. {
  418. if (unlikely(__get_cpu_var(update_debug_stack)))
  419. debug_stack_reset();
  420. }
  421. #endif
  422. dotraplinkage notrace __kprobes void
  423. do_nmi(struct pt_regs *regs, long error_code)
  424. {
  425. nmi_nesting_preprocess(regs);
  426. nmi_enter();
  427. inc_irq_stat(__nmi_count);
  428. if (!ignore_nmis)
  429. default_do_nmi(regs);
  430. nmi_exit();
  431. /* On i386, may loop back to preprocess */
  432. nmi_nesting_postprocess();
  433. }
  434. void stop_nmi(void)
  435. {
  436. ignore_nmis++;
  437. }
  438. void restart_nmi(void)
  439. {
  440. ignore_nmis--;
  441. }
  442. /* reset the back-to-back NMI logic */
  443. void local_touch_nmi(void)
  444. {
  445. __this_cpu_write(last_nmi_rip, 0);
  446. }