nmi.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. /*
  2. * Copyright (C) 1991, 1992 Linus Torvalds
  3. * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
  4. * Copyright (C) 2011 Don Zickus Red Hat, Inc.
  5. *
  6. * Pentium III FXSR, SSE support
  7. * Gareth Hughes <gareth@valinux.com>, May 2000
  8. */
  9. /*
  10. * Handle hardware traps and faults.
  11. */
  12. #include <linux/spinlock.h>
  13. #include <linux/kprobes.h>
  14. #include <linux/kdebug.h>
  15. #include <linux/nmi.h>
  16. #include <linux/delay.h>
  17. #include <linux/hardirq.h>
  18. #include <linux/slab.h>
  19. #include <linux/mca.h>
  20. #if defined(CONFIG_EDAC)
  21. #include <linux/edac.h>
  22. #endif
  23. #include <linux/atomic.h>
  24. #include <asm/traps.h>
  25. #include <asm/mach_traps.h>
  26. #include <asm/nmi.h>
  27. #define NMI_MAX_NAMELEN 16
  28. struct nmiaction {
  29. struct list_head list;
  30. nmi_handler_t handler;
  31. unsigned int flags;
  32. char *name;
  33. };
  34. struct nmi_desc {
  35. spinlock_t lock;
  36. struct list_head head;
  37. };
  38. static struct nmi_desc nmi_desc[NMI_MAX] =
  39. {
  40. {
  41. .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
  42. .head = LIST_HEAD_INIT(nmi_desc[0].head),
  43. },
  44. {
  45. .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
  46. .head = LIST_HEAD_INIT(nmi_desc[1].head),
  47. },
  48. };
  49. struct nmi_stats {
  50. unsigned int normal;
  51. unsigned int unknown;
  52. unsigned int external;
  53. unsigned int swallow;
  54. };
  55. static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
  56. static int ignore_nmis;
  57. int unknown_nmi_panic;
  58. /*
  59. * Prevent NMI reason port (0x61) being accessed simultaneously, can
  60. * only be used in NMI handler.
  61. */
  62. static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
  63. static int __init setup_unknown_nmi_panic(char *str)
  64. {
  65. unknown_nmi_panic = 1;
  66. return 1;
  67. }
  68. __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
  69. #define nmi_to_desc(type) (&nmi_desc[type])
  70. static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
  71. {
  72. struct nmi_desc *desc = nmi_to_desc(type);
  73. struct nmiaction *a;
  74. int handled=0;
  75. rcu_read_lock();
  76. /*
  77. * NMIs are edge-triggered, which means if you have enough
  78. * of them concurrently, you can lose some because only one
  79. * can be latched at any given time. Walk the whole list
  80. * to handle those situations.
  81. */
  82. list_for_each_entry_rcu(a, &desc->head, list)
  83. handled += a->handler(type, regs);
  84. rcu_read_unlock();
  85. /* return total number of NMI events handled */
  86. return handled;
  87. }
  88. static int __setup_nmi(unsigned int type, struct nmiaction *action)
  89. {
  90. struct nmi_desc *desc = nmi_to_desc(type);
  91. unsigned long flags;
  92. spin_lock_irqsave(&desc->lock, flags);
  93. /*
  94. * most handlers of type NMI_UNKNOWN never return because
  95. * they just assume the NMI is theirs. Just a sanity check
  96. * to manage expectations
  97. */
  98. WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
  99. /*
  100. * some handlers need to be executed first otherwise a fake
  101. * event confuses some handlers (kdump uses this flag)
  102. */
  103. if (action->flags & NMI_FLAG_FIRST)
  104. list_add_rcu(&action->list, &desc->head);
  105. else
  106. list_add_tail_rcu(&action->list, &desc->head);
  107. spin_unlock_irqrestore(&desc->lock, flags);
  108. return 0;
  109. }
  110. static struct nmiaction *__free_nmi(unsigned int type, const char *name)
  111. {
  112. struct nmi_desc *desc = nmi_to_desc(type);
  113. struct nmiaction *n;
  114. unsigned long flags;
  115. spin_lock_irqsave(&desc->lock, flags);
  116. list_for_each_entry_rcu(n, &desc->head, list) {
  117. /*
  118. * the name passed in to describe the nmi handler
  119. * is used as the lookup key
  120. */
  121. if (!strcmp(n->name, name)) {
  122. WARN(in_nmi(),
  123. "Trying to free NMI (%s) from NMI context!\n", n->name);
  124. list_del_rcu(&n->list);
  125. break;
  126. }
  127. }
  128. spin_unlock_irqrestore(&desc->lock, flags);
  129. synchronize_rcu();
  130. return (n);
  131. }
  132. int register_nmi_handler(unsigned int type, nmi_handler_t handler,
  133. unsigned long nmiflags, const char *devname)
  134. {
  135. struct nmiaction *action;
  136. int retval = -ENOMEM;
  137. if (!handler)
  138. return -EINVAL;
  139. action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
  140. if (!action)
  141. goto fail_action;
  142. action->handler = handler;
  143. action->flags = nmiflags;
  144. action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
  145. if (!action->name)
  146. goto fail_action_name;
  147. retval = __setup_nmi(type, action);
  148. if (retval)
  149. goto fail_setup_nmi;
  150. return retval;
  151. fail_setup_nmi:
  152. kfree(action->name);
  153. fail_action_name:
  154. kfree(action);
  155. fail_action:
  156. return retval;
  157. }
  158. EXPORT_SYMBOL_GPL(register_nmi_handler);
  159. void unregister_nmi_handler(unsigned int type, const char *name)
  160. {
  161. struct nmiaction *a;
  162. a = __free_nmi(type, name);
  163. if (a) {
  164. kfree(a->name);
  165. kfree(a);
  166. }
  167. }
  168. EXPORT_SYMBOL_GPL(unregister_nmi_handler);
  169. static notrace __kprobes void
  170. pci_serr_error(unsigned char reason, struct pt_regs *regs)
  171. {
  172. pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
  173. reason, smp_processor_id());
  174. /*
  175. * On some machines, PCI SERR line is used to report memory
  176. * errors. EDAC makes use of it.
  177. */
  178. #if defined(CONFIG_EDAC)
  179. if (edac_handler_set()) {
  180. edac_atomic_assert_error();
  181. return;
  182. }
  183. #endif
  184. if (panic_on_unrecovered_nmi)
  185. panic("NMI: Not continuing");
  186. pr_emerg("Dazed and confused, but trying to continue\n");
  187. /* Clear and disable the PCI SERR error line. */
  188. reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
  189. outb(reason, NMI_REASON_PORT);
  190. }
  191. static notrace __kprobes void
  192. io_check_error(unsigned char reason, struct pt_regs *regs)
  193. {
  194. unsigned long i;
  195. pr_emerg(
  196. "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
  197. reason, smp_processor_id());
  198. show_registers(regs);
  199. if (panic_on_io_nmi)
  200. panic("NMI IOCK error: Not continuing");
  201. /* Re-enable the IOCK line, wait for a few seconds */
  202. reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
  203. outb(reason, NMI_REASON_PORT);
  204. i = 20000;
  205. while (--i) {
  206. touch_nmi_watchdog();
  207. udelay(100);
  208. }
  209. reason &= ~NMI_REASON_CLEAR_IOCHK;
  210. outb(reason, NMI_REASON_PORT);
  211. }
  212. static notrace __kprobes void
  213. unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
  214. {
  215. int handled;
  216. /*
  217. * Use 'false' as back-to-back NMIs are dealt with one level up.
  218. * Of course this makes having multiple 'unknown' handlers useless
  219. * as only the first one is ever run (unless it can actually determine
  220. * if it caused the NMI)
  221. */
  222. handled = nmi_handle(NMI_UNKNOWN, regs, false);
  223. if (handled) {
  224. __this_cpu_add(nmi_stats.unknown, handled);
  225. return;
  226. }
  227. __this_cpu_add(nmi_stats.unknown, 1);
  228. #ifdef CONFIG_MCA
  229. /*
  230. * Might actually be able to figure out what the guilty party
  231. * is:
  232. */
  233. if (MCA_bus) {
  234. mca_handle_nmi();
  235. return;
  236. }
  237. #endif
  238. pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
  239. reason, smp_processor_id());
  240. pr_emerg("Do you have a strange power saving mode enabled?\n");
  241. if (unknown_nmi_panic || panic_on_unrecovered_nmi)
  242. panic("NMI: Not continuing");
  243. pr_emerg("Dazed and confused, but trying to continue\n");
  244. }
  245. static DEFINE_PER_CPU(bool, swallow_nmi);
  246. static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
  247. static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
  248. {
  249. unsigned char reason = 0;
  250. int handled;
  251. bool b2b = false;
  252. /*
  253. * CPU-specific NMI must be processed before non-CPU-specific
  254. * NMI, otherwise we may lose it, because the CPU-specific
  255. * NMI can not be detected/processed on other CPUs.
  256. */
  257. /*
  258. * Back-to-back NMIs are interesting because they can either
  259. * be two NMI or more than two NMIs (any thing over two is dropped
  260. * due to NMI being edge-triggered). If this is the second half
  261. * of the back-to-back NMI, assume we dropped things and process
  262. * more handlers. Otherwise reset the 'swallow' NMI behaviour
  263. */
  264. if (regs->ip == __this_cpu_read(last_nmi_rip))
  265. b2b = true;
  266. else
  267. __this_cpu_write(swallow_nmi, false);
  268. __this_cpu_write(last_nmi_rip, regs->ip);
  269. handled = nmi_handle(NMI_LOCAL, regs, b2b);
  270. __this_cpu_add(nmi_stats.normal, handled);
  271. if (handled) {
  272. /*
  273. * There are cases when a NMI handler handles multiple
  274. * events in the current NMI. One of these events may
  275. * be queued for in the next NMI. Because the event is
  276. * already handled, the next NMI will result in an unknown
  277. * NMI. Instead lets flag this for a potential NMI to
  278. * swallow.
  279. */
  280. if (handled > 1)
  281. __this_cpu_write(swallow_nmi, true);
  282. return;
  283. }
  284. /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
  285. raw_spin_lock(&nmi_reason_lock);
  286. reason = get_nmi_reason();
  287. if (reason & NMI_REASON_MASK) {
  288. if (reason & NMI_REASON_SERR)
  289. pci_serr_error(reason, regs);
  290. else if (reason & NMI_REASON_IOCHK)
  291. io_check_error(reason, regs);
  292. #ifdef CONFIG_X86_32
  293. /*
  294. * Reassert NMI in case it became active
  295. * meanwhile as it's edge-triggered:
  296. */
  297. reassert_nmi();
  298. #endif
  299. __this_cpu_add(nmi_stats.external, 1);
  300. raw_spin_unlock(&nmi_reason_lock);
  301. return;
  302. }
  303. raw_spin_unlock(&nmi_reason_lock);
  304. /*
  305. * Only one NMI can be latched at a time. To handle
  306. * this we may process multiple nmi handlers at once to
  307. * cover the case where an NMI is dropped. The downside
  308. * to this approach is we may process an NMI prematurely,
  309. * while its real NMI is sitting latched. This will cause
  310. * an unknown NMI on the next run of the NMI processing.
  311. *
  312. * We tried to flag that condition above, by setting the
  313. * swallow_nmi flag when we process more than one event.
  314. * This condition is also only present on the second half
  315. * of a back-to-back NMI, so we flag that condition too.
  316. *
  317. * If both are true, we assume we already processed this
  318. * NMI previously and we swallow it. Otherwise we reset
  319. * the logic.
  320. *
  321. * There are scenarios where we may accidentally swallow
  322. * a 'real' unknown NMI. For example, while processing
  323. * a perf NMI another perf NMI comes in along with a
  324. * 'real' unknown NMI. These two NMIs get combined into
  325. * one (as descibed above). When the next NMI gets
  326. * processed, it will be flagged by perf as handled, but
  327. * noone will know that there was a 'real' unknown NMI sent
  328. * also. As a result it gets swallowed. Or if the first
  329. * perf NMI returns two events handled then the second
  330. * NMI will get eaten by the logic below, again losing a
  331. * 'real' unknown NMI. But this is the best we can do
  332. * for now.
  333. */
  334. if (b2b && __this_cpu_read(swallow_nmi))
  335. __this_cpu_add(nmi_stats.swallow, 1);
  336. else
  337. unknown_nmi_error(reason, regs);
  338. }
  339. dotraplinkage notrace __kprobes void
  340. do_nmi(struct pt_regs *regs, long error_code)
  341. {
  342. nmi_enter();
  343. inc_irq_stat(__nmi_count);
  344. if (!ignore_nmis)
  345. default_do_nmi(regs);
  346. nmi_exit();
  347. }
  348. void stop_nmi(void)
  349. {
  350. ignore_nmis++;
  351. }
  352. void restart_nmi(void)
  353. {
  354. ignore_nmis--;
  355. }
  356. /* reset the back-to-back NMI logic */
  357. void local_touch_nmi(void)
  358. {
  359. __this_cpu_write(last_nmi_rip, 0);
  360. }