před 13 roky · 3f3c8b8c4b
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1475,11 +1475,166 @@ ENTRY(error_exit)
 
				 	CFI_ENDPROC
			
 
				 END(error_exit)
			
 
				 
			
 
				+/*
			
 
				+ * Test if a given stack is an NMI stack or not.
			
 
				+ */
			
 
				+	.macro test_in_nmi reg stack nmi_ret normal_ret
			
 
				+	cmpq %\reg, \stack
			
 
				+	ja \normal_ret
			
 
				+	subq $EXCEPTION_STKSZ, %\reg
			
 
				+	cmpq %\reg, \stack
			
 
				+	jb \normal_ret
			
 
				+	jmp \nmi_ret
			
 
				+	.endm
			
 
				 
			
 
				 	/* runs on exception stack */
			
 
				 ENTRY(nmi)
			
 
				 	INTR_FRAME
			
 
				 	PARAVIRT_ADJUST_EXCEPTION_FRAME
			
 
				+	/*
			
 
				+	 * We allow breakpoints in NMIs. If a breakpoint occurs, then
			
 
				+	 * the iretq it performs will take us out of NMI context.
			
 
				+	 * This means that we can have nested NMIs where the next
			
 
				+	 * NMI is using the top of the stack of the previous NMI. We
			
 
				+	 * can't let it execute because the nested NMI will corrupt the
			
 
				+	 * stack of the previous NMI. NMI handlers are not re-entrant
			
 
				+	 * anyway.
			
 
				+	 *
			
 
				+	 * To handle this case we do the following:
			
 
				+	 *  Check the a special location on the stack that contains
			
 
				+	 *  a variable that is set when NMIs are executing.
			
 
				+	 *  The interrupted task's stack is also checked to see if it
			
 
				+	 *  is an NMI stack.
			
 
				+	 *  If the variable is not set and the stack is not the NMI
			
 
				+	 *  stack then:
			
 
				+	 *    o Set the special variable on the stack
			
 
				+	 *    o Copy the interrupt frame into a "saved" location on the stack
			
 
				+	 *    o Copy the interrupt frame into a "copy" location on the stack
			
 
				+	 *    o Continue processing the NMI
			
 
				+	 *  If the variable is set or the previous stack is the NMI stack:
			
 
				+	 *    o Modify the "copy" location to jump to the repeate_nmi
			
 
				+	 *    o return back to the first NMI
			
 
				+	 *
			
 
				+	 * Now on exit of the first NMI, we first clear the stack variable
			
 
				+	 * The NMI stack will tell any nested NMIs at that point that it is
			
 
				+	 * nested. Then we pop the stack normally with iret, and if there was
			
 
				+	 * a nested NMI that updated the copy interrupt stack frame, a
			
 
				+	 * jump will be made to the repeat_nmi code that will handle the second
			
 
				+	 * NMI.
			
 
				+	 */
			
 
				+
			
 
				+	/* Use %rdx as out temp variable throughout */
			
 
				+	pushq_cfi %rdx
			
 
				+
			
 
				+	/*
			
 
				+	 * Check the special variable on the stack to see if NMIs are
			
 
				+	 * executing.
			
 
				+	 */
			
 
				+	cmp $1, -8(%rsp)
			
 
				+	je nested_nmi
			
 
				+
			
 
				+	/*
			
 
				+	 * Now test if the previous stack was an NMI stack.
			
 
				+	 * We need the double check. We check the NMI stack to satisfy the
			
 
				+	 * race when the first NMI clears the variable before returning.
			
 
				+	 * We check the variable because the first NMI could be in a
			
 
				+	 * breakpoint routine using a breakpoint stack.
			
 
				+	 */
			
 
				+	lea 6*8(%rsp), %rdx
			
 
				+	test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
			
 
				+
			
 
				+nested_nmi:
			
 
				+	/*
			
 
				+	 * Do nothing if we interrupted the fixup in repeat_nmi.
			
 
				+	 * It's about to repeat the NMI handler, so we are fine
			
 
				+	 * with ignoring this one.
			
 
				+	 */
			
 
				+	movq $repeat_nmi, %rdx
			
 
				+	cmpq 8(%rsp), %rdx
			
 
				+	ja 1f
			
 
				+	movq $end_repeat_nmi, %rdx
			
 
				+	cmpq 8(%rsp), %rdx
			
 
				+	ja nested_nmi_out
			
 
				+
			
 
				+1:
			
 
				+	/* Set up the interrupted NMIs stack to jump to repeat_nmi */
			
 
				+	leaq -6*8(%rsp), %rdx
			
 
				+	movq %rdx, %rsp
			
 
				+	CFI_ADJUST_CFA_OFFSET 6*8
			
 
				+	pushq_cfi $__KERNEL_DS
			
 
				+	pushq_cfi %rdx
			
 
				+	pushfq_cfi
			
 
				+	pushq_cfi $__KERNEL_CS
			
 
				+	pushq_cfi $repeat_nmi
			
 
				+
			
 
				+	/* Put stack back */
			
 
				+	addq $(11*8), %rsp
			
 
				+	CFI_ADJUST_CFA_OFFSET -11*8
			
 
				+
			
 
				+nested_nmi_out:
			
 
				+	popq_cfi %rdx
			
 
				+
			
 
				+	/* No need to check faults here */
			
 
				+	INTERRUPT_RETURN
			
 
				+
			
 
				+first_nmi:
			
 
				+	/*
			
 
				+	 * Because nested NMIs will use the pushed location that we
			
 
				+	 * stored in rdx, we must keep that space available.
			
 
				+	 * Here's what our stack frame will look like:
			
 
				+	 * +-------------------------+
			
 
				+	 * | original SS             |
			
 
				+	 * | original Return RSP     |
			
 
				+	 * | original RFLAGS         |
			
 
				+	 * | original CS             |
			
 
				+	 * | original RIP            |
			
 
				+	 * +-------------------------+
			
 
				+	 * | temp storage for rdx    |
			
 
				+	 * +-------------------------+
			
 
				+	 * | NMI executing variable  |
			
 
				+	 * +-------------------------+
			
 
				+	 * | Saved SS                |
			
 
				+	 * | Saved Return RSP        |
			
 
				+	 * | Saved RFLAGS            |
			
 
				+	 * | Saved CS                |
			
 
				+	 * | Saved RIP               |
			
 
				+	 * +-------------------------+
			
 
				+	 * | copied SS               |
			
 
				+	 * | copied Return RSP       |
			
 
				+	 * | copied RFLAGS           |
			
 
				+	 * | copied CS               |
			
 
				+	 * | copied RIP              |
			
 
				+	 * +-------------------------+
			
 
				+	 * | pt_regs                 |
			
 
				+	 * +-------------------------+
			
 
				+	 *
			
 
				+	 * The saved RIP is used to fix up the copied RIP that a nested
			
 
				+	 * NMI may zero out. The original stack frame and the temp storage
			
 
				+	 * is also used by nested NMIs and can not be trusted on exit.
			
 
				+	 */
			
 
				+	/* Set the NMI executing variable on the stack. */
			
 
				+	pushq_cfi $1
			
 
				+
			
 
				+	/* Copy the stack frame to the Saved frame */
			
 
				+	.rept 5
			
 
				+	pushq_cfi 6*8(%rsp)
			
 
				+	.endr
			
 
				+
			
 
				+	/* Make another copy, this one may be modified by nested NMIs */
			
 
				+	.rept 5
			
 
				+	pushq_cfi 4*8(%rsp)
			
 
				+	.endr
			
 
				+
			
 
				+	/* Do not pop rdx, nested NMIs will corrupt it */
			
 
				+	movq 11*8(%rsp), %rdx
			
 
				+
			
 
				+	/*
			
 
				+	 * Everything below this point can be preempted by a nested
			
 
				+	 * NMI if the first NMI took an exception. Repeated NMIs
			
 
				+	 * caused by an exception and nested NMI will start here, and
			
 
				+	 * can still be preempted by another NMI.
			
 
				+	 */
			
 
				+restart_nmi:
			
 
				 	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
			
 
				 	subq $ORIG_RAX-R15, %rsp
			
 
				 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
			
@@ -1502,10 +1657,32 @@ nmi_swapgs:
 
				 	SWAPGS_UNSAFE_STACK
			
 
				 nmi_restore:
			
 
				 	RESTORE_ALL 8
			
 
				+	/* Clear the NMI executing stack variable */
			
 
				+	movq $0, 10*8(%rsp)
			
 
				 	jmp irq_return
			
 
				 	CFI_ENDPROC
			
 
				 END(nmi)
			
 
				 
			
 
				+	/*
			
 
				+	 * If an NMI hit an iret because of an exception or breakpoint,
			
 
				+	 * it can lose its NMI context, and a nested NMI may come in.
			
 
				+	 * In that case, the nested NMI will change the preempted NMI's
			
 
				+	 * stack to jump to here when it does the final iret.
			
 
				+	 */
			
 
				+repeat_nmi:
			
 
				+	INTR_FRAME
			
 
				+	/* Update the stack variable to say we are still in NMI */
			
 
				+	movq $1, 5*8(%rsp)
			
 
				+
			
 
				+	/* copy the saved stack back to copy stack */
			
 
				+	.rept 5
			
 
				+	pushq_cfi 4*8(%rsp)
			
 
				+	.endr
			
 
				+
			
 
				+	jmp restart_nmi
			
 
				+	CFI_ENDPROC
			
 
				+end_repeat_nmi:
			
 
				+
			
 
				 ENTRY(ignore_sysret)
			
 
				 	CFI_STARTPROC
			
 
				 	mov $-ENOSYS,%eax