|
@@ -1475,11 +1475,166 @@ ENTRY(error_exit)
|
|
|
CFI_ENDPROC
|
|
|
END(error_exit)
|
|
|
|
|
|
+/*
|
|
|
+ * Test if a given stack is an NMI stack or not.
|
|
|
+ */
|
|
|
+ .macro test_in_nmi reg stack nmi_ret normal_ret
|
|
|
+ cmpq %\reg, \stack
|
|
|
+ ja \normal_ret
|
|
|
+ subq $EXCEPTION_STKSZ, %\reg
|
|
|
+ cmpq %\reg, \stack
|
|
|
+ jb \normal_ret
|
|
|
+ jmp \nmi_ret
|
|
|
+ .endm
|
|
|
|
|
|
/* runs on exception stack */
|
|
|
ENTRY(nmi)
|
|
|
INTR_FRAME
|
|
|
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
|
|
+ /*
|
|
|
+ * We allow breakpoints in NMIs. If a breakpoint occurs, then
|
|
|
+ * the iretq it performs will take us out of NMI context.
|
|
|
+ * This means that we can have nested NMIs where the next
|
|
|
+ * NMI is using the top of the stack of the previous NMI. We
|
|
|
+ * can't let it execute because the nested NMI will corrupt the
|
|
|
+ * stack of the previous NMI. NMI handlers are not re-entrant
|
|
|
+ * anyway.
|
|
|
+ *
|
|
|
+ * To handle this case we do the following:
|
|
|
+ * Check the a special location on the stack that contains
|
|
|
+ * a variable that is set when NMIs are executing.
|
|
|
+ * The interrupted task's stack is also checked to see if it
|
|
|
+ * is an NMI stack.
|
|
|
+ * If the variable is not set and the stack is not the NMI
|
|
|
+ * stack then:
|
|
|
+ * o Set the special variable on the stack
|
|
|
+ * o Copy the interrupt frame into a "saved" location on the stack
|
|
|
+ * o Copy the interrupt frame into a "copy" location on the stack
|
|
|
+ * o Continue processing the NMI
|
|
|
+ * If the variable is set or the previous stack is the NMI stack:
|
|
|
+ * o Modify the "copy" location to jump to the repeate_nmi
|
|
|
+ * o return back to the first NMI
|
|
|
+ *
|
|
|
+ * Now on exit of the first NMI, we first clear the stack variable
|
|
|
+ * The NMI stack will tell any nested NMIs at that point that it is
|
|
|
+ * nested. Then we pop the stack normally with iret, and if there was
|
|
|
+ * a nested NMI that updated the copy interrupt stack frame, a
|
|
|
+ * jump will be made to the repeat_nmi code that will handle the second
|
|
|
+ * NMI.
|
|
|
+ */
|
|
|
+
|
|
|
+ /* Use %rdx as out temp variable throughout */
|
|
|
+ pushq_cfi %rdx
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Check the special variable on the stack to see if NMIs are
|
|
|
+ * executing.
|
|
|
+ */
|
|
|
+ cmp $1, -8(%rsp)
|
|
|
+ je nested_nmi
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Now test if the previous stack was an NMI stack.
|
|
|
+ * We need the double check. We check the NMI stack to satisfy the
|
|
|
+ * race when the first NMI clears the variable before returning.
|
|
|
+ * We check the variable because the first NMI could be in a
|
|
|
+ * breakpoint routine using a breakpoint stack.
|
|
|
+ */
|
|
|
+ lea 6*8(%rsp), %rdx
|
|
|
+ test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
|
|
|
+
|
|
|
+nested_nmi:
|
|
|
+ /*
|
|
|
+ * Do nothing if we interrupted the fixup in repeat_nmi.
|
|
|
+ * It's about to repeat the NMI handler, so we are fine
|
|
|
+ * with ignoring this one.
|
|
|
+ */
|
|
|
+ movq $repeat_nmi, %rdx
|
|
|
+ cmpq 8(%rsp), %rdx
|
|
|
+ ja 1f
|
|
|
+ movq $end_repeat_nmi, %rdx
|
|
|
+ cmpq 8(%rsp), %rdx
|
|
|
+ ja nested_nmi_out
|
|
|
+
|
|
|
+1:
|
|
|
+ /* Set up the interrupted NMIs stack to jump to repeat_nmi */
|
|
|
+ leaq -6*8(%rsp), %rdx
|
|
|
+ movq %rdx, %rsp
|
|
|
+ CFI_ADJUST_CFA_OFFSET 6*8
|
|
|
+ pushq_cfi $__KERNEL_DS
|
|
|
+ pushq_cfi %rdx
|
|
|
+ pushfq_cfi
|
|
|
+ pushq_cfi $__KERNEL_CS
|
|
|
+ pushq_cfi $repeat_nmi
|
|
|
+
|
|
|
+ /* Put stack back */
|
|
|
+ addq $(11*8), %rsp
|
|
|
+ CFI_ADJUST_CFA_OFFSET -11*8
|
|
|
+
|
|
|
+nested_nmi_out:
|
|
|
+ popq_cfi %rdx
|
|
|
+
|
|
|
+ /* No need to check faults here */
|
|
|
+ INTERRUPT_RETURN
|
|
|
+
|
|
|
+first_nmi:
|
|
|
+ /*
|
|
|
+ * Because nested NMIs will use the pushed location that we
|
|
|
+ * stored in rdx, we must keep that space available.
|
|
|
+ * Here's what our stack frame will look like:
|
|
|
+ * +-------------------------+
|
|
|
+ * | original SS |
|
|
|
+ * | original Return RSP |
|
|
|
+ * | original RFLAGS |
|
|
|
+ * | original CS |
|
|
|
+ * | original RIP |
|
|
|
+ * +-------------------------+
|
|
|
+ * | temp storage for rdx |
|
|
|
+ * +-------------------------+
|
|
|
+ * | NMI executing variable |
|
|
|
+ * +-------------------------+
|
|
|
+ * | Saved SS |
|
|
|
+ * | Saved Return RSP |
|
|
|
+ * | Saved RFLAGS |
|
|
|
+ * | Saved CS |
|
|
|
+ * | Saved RIP |
|
|
|
+ * +-------------------------+
|
|
|
+ * | copied SS |
|
|
|
+ * | copied Return RSP |
|
|
|
+ * | copied RFLAGS |
|
|
|
+ * | copied CS |
|
|
|
+ * | copied RIP |
|
|
|
+ * +-------------------------+
|
|
|
+ * | pt_regs |
|
|
|
+ * +-------------------------+
|
|
|
+ *
|
|
|
+ * The saved RIP is used to fix up the copied RIP that a nested
|
|
|
+ * NMI may zero out. The original stack frame and the temp storage
|
|
|
+ * is also used by nested NMIs and can not be trusted on exit.
|
|
|
+ */
|
|
|
+ /* Set the NMI executing variable on the stack. */
|
|
|
+ pushq_cfi $1
|
|
|
+
|
|
|
+ /* Copy the stack frame to the Saved frame */
|
|
|
+ .rept 5
|
|
|
+ pushq_cfi 6*8(%rsp)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ /* Make another copy, this one may be modified by nested NMIs */
|
|
|
+ .rept 5
|
|
|
+ pushq_cfi 4*8(%rsp)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ /* Do not pop rdx, nested NMIs will corrupt it */
|
|
|
+ movq 11*8(%rsp), %rdx
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Everything below this point can be preempted by a nested
|
|
|
+ * NMI if the first NMI took an exception. Repeated NMIs
|
|
|
+ * caused by an exception and nested NMI will start here, and
|
|
|
+ * can still be preempted by another NMI.
|
|
|
+ */
|
|
|
+restart_nmi:
|
|
|
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
|
|
|
subq $ORIG_RAX-R15, %rsp
|
|
|
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
|
|
@@ -1502,10 +1657,32 @@ nmi_swapgs:
|
|
|
SWAPGS_UNSAFE_STACK
|
|
|
nmi_restore:
|
|
|
RESTORE_ALL 8
|
|
|
+ /* Clear the NMI executing stack variable */
|
|
|
+ movq $0, 10*8(%rsp)
|
|
|
jmp irq_return
|
|
|
CFI_ENDPROC
|
|
|
END(nmi)
|
|
|
|
|
|
+ /*
|
|
|
+ * If an NMI hit an iret because of an exception or breakpoint,
|
|
|
+ * it can lose its NMI context, and a nested NMI may come in.
|
|
|
+ * In that case, the nested NMI will change the preempted NMI's
|
|
|
+ * stack to jump to here when it does the final iret.
|
|
|
+ */
|
|
|
+repeat_nmi:
|
|
|
+ INTR_FRAME
|
|
|
+ /* Update the stack variable to say we are still in NMI */
|
|
|
+ movq $1, 5*8(%rsp)
|
|
|
+
|
|
|
+ /* copy the saved stack back to copy stack */
|
|
|
+ .rept 5
|
|
|
+ pushq_cfi 4*8(%rsp)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ jmp restart_nmi
|
|
|
+ CFI_ENDPROC
|
|
|
+end_repeat_nmi:
|
|
|
+
|
|
|
ENTRY(ignore_sysret)
|
|
|
CFI_STARTPROC
|
|
|
mov $-ENOSYS,%eax
|