mce-inject.c 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. /*
  2. * Machine check injection support.
  3. * Copyright 2008 Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License
  7. * as published by the Free Software Foundation; version 2
  8. * of the License.
  9. *
  10. * Authors:
  11. * Andi Kleen
  12. * Ying Huang
  13. */
  14. #include <linux/uaccess.h>
  15. #include <linux/module.h>
  16. #include <linux/timer.h>
  17. #include <linux/kernel.h>
  18. #include <linux/string.h>
  19. #include <linux/fs.h>
  20. #include <linux/smp.h>
  21. #include <linux/notifier.h>
  22. #include <linux/kdebug.h>
  23. #include <linux/cpu.h>
  24. #include <linux/sched.h>
  25. #include <asm/mce.h>
  26. #include <asm/apic.h>
  27. /* Update fake mce registers on current CPU. */
  28. static void inject_mce(struct mce *m)
  29. {
  30. struct mce *i = &per_cpu(injectm, m->extcpu);
  31. /* Make sure noone reads partially written injectm */
  32. i->finished = 0;
  33. mb();
  34. m->finished = 0;
  35. /* First set the fields after finished */
  36. i->extcpu = m->extcpu;
  37. mb();
  38. /* Now write record in order, finished last (except above) */
  39. memcpy(i, m, sizeof(struct mce));
  40. /* Finally activate it */
  41. mb();
  42. i->finished = 1;
  43. }
  44. static void raise_poll(struct mce *m)
  45. {
  46. unsigned long flags;
  47. mce_banks_t b;
  48. memset(&b, 0xff, sizeof(mce_banks_t));
  49. local_irq_save(flags);
  50. machine_check_poll(0, &b);
  51. local_irq_restore(flags);
  52. m->finished = 0;
  53. }
  54. static void raise_exception(struct mce *m, struct pt_regs *pregs)
  55. {
  56. struct pt_regs regs;
  57. unsigned long flags;
  58. if (!pregs) {
  59. memset(&regs, 0, sizeof(struct pt_regs));
  60. regs.ip = m->ip;
  61. regs.cs = m->cs;
  62. pregs = &regs;
  63. }
  64. /* in mcheck exeception handler, irq will be disabled */
  65. local_irq_save(flags);
  66. do_machine_check(pregs, 0);
  67. local_irq_restore(flags);
  68. m->finished = 0;
  69. }
  70. static cpumask_t mce_inject_cpumask;
  71. static int mce_raise_notify(struct notifier_block *self,
  72. unsigned long val, void *data)
  73. {
  74. struct die_args *args = (struct die_args *)data;
  75. int cpu = smp_processor_id();
  76. struct mce *m = &__get_cpu_var(injectm);
  77. if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
  78. return NOTIFY_DONE;
  79. cpu_clear(cpu, mce_inject_cpumask);
  80. if (m->inject_flags & MCJ_EXCEPTION)
  81. raise_exception(m, args->regs);
  82. else if (m->status)
  83. raise_poll(m);
  84. return NOTIFY_STOP;
  85. }
  86. static struct notifier_block mce_raise_nb = {
  87. .notifier_call = mce_raise_notify,
  88. .priority = 1000,
  89. };
  90. /* Inject mce on current CPU */
  91. static int raise_local(void)
  92. {
  93. struct mce *m = &__get_cpu_var(injectm);
  94. int context = MCJ_CTX(m->inject_flags);
  95. int ret = 0;
  96. int cpu = m->extcpu;
  97. if (m->inject_flags & MCJ_EXCEPTION) {
  98. printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
  99. switch (context) {
  100. case MCJ_CTX_IRQ:
  101. /*
  102. * Could do more to fake interrupts like
  103. * calling irq_enter, but the necessary
  104. * machinery isn't exported currently.
  105. */
  106. /*FALL THROUGH*/
  107. case MCJ_CTX_PROCESS:
  108. raise_exception(m, NULL);
  109. break;
  110. default:
  111. printk(KERN_INFO "Invalid MCE context\n");
  112. ret = -EINVAL;
  113. }
  114. printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
  115. } else if (m->status) {
  116. printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
  117. raise_poll(m);
  118. mce_notify_irq();
  119. printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
  120. } else
  121. m->finished = 0;
  122. return ret;
  123. }
  124. static void raise_mce(struct mce *m)
  125. {
  126. int context = MCJ_CTX(m->inject_flags);
  127. inject_mce(m);
  128. if (context == MCJ_CTX_RANDOM)
  129. return;
  130. #ifdef CONFIG_X86_LOCAL_APIC
  131. if (m->inject_flags & MCJ_NMI_BROADCAST) {
  132. unsigned long start;
  133. int cpu;
  134. get_online_cpus();
  135. mce_inject_cpumask = cpu_online_map;
  136. cpu_clear(get_cpu(), mce_inject_cpumask);
  137. for_each_online_cpu(cpu) {
  138. struct mce *mcpu = &per_cpu(injectm, cpu);
  139. if (!mcpu->finished ||
  140. MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
  141. cpu_clear(cpu, mce_inject_cpumask);
  142. }
  143. if (!cpus_empty(mce_inject_cpumask))
  144. apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR);
  145. start = jiffies;
  146. while (!cpus_empty(mce_inject_cpumask)) {
  147. if (!time_before(jiffies, start + 2*HZ)) {
  148. printk(KERN_ERR
  149. "Timeout waiting for mce inject NMI %lx\n",
  150. *cpus_addr(mce_inject_cpumask));
  151. break;
  152. }
  153. cpu_relax();
  154. }
  155. raise_local();
  156. put_cpu();
  157. put_online_cpus();
  158. } else
  159. #endif
  160. raise_local();
  161. }
  162. /* Error injection interface */
  163. static ssize_t mce_write(struct file *filp, const char __user *ubuf,
  164. size_t usize, loff_t *off)
  165. {
  166. struct mce m;
  167. if (!capable(CAP_SYS_ADMIN))
  168. return -EPERM;
  169. /*
  170. * There are some cases where real MSR reads could slip
  171. * through.
  172. */
  173. if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
  174. return -EIO;
  175. if ((unsigned long)usize > sizeof(struct mce))
  176. usize = sizeof(struct mce);
  177. if (copy_from_user(&m, ubuf, usize))
  178. return -EFAULT;
  179. if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
  180. return -EINVAL;
  181. /*
  182. * Need to give user space some time to set everything up,
  183. * so do it a jiffie or two later everywhere.
  184. */
  185. schedule_timeout(2);
  186. raise_mce(&m);
  187. return usize;
  188. }
  189. static int inject_init(void)
  190. {
  191. printk(KERN_INFO "Machine check injector initialized\n");
  192. mce_chrdev_ops.write = mce_write;
  193. register_die_notifier(&mce_raise_nb);
  194. return 0;
  195. }
  196. module_init(inject_init);
  197. /*
  198. * Cannot tolerate unloading currently because we cannot
  199. * guarantee all openers of mce_chrdev will get a reference to us.
  200. */
  201. MODULE_LICENSE("GPL");