Parcourir la source

[SPARC64]: Fix race in LOAD_PER_CPU_BASE()

Since we use %g5 itself as a temporary, it can get clobbered
if we take an interrupt mid-stream and thus cause end up with
the final %g5 value too early as a result of rtrap processing.

Set %g5 at the very end, atomically, to avoid this problem.

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller il y a 19 ans
Parent
commit
86b818687d

+ 2 - 2
arch/sparc64/kernel/etrap.S

@@ -100,7 +100,7 @@ etrap_irq:
 		stx	%i7, [%sp + PTREGS_OFF + PT_V9_I7]
 		wrpr	%g0, ETRAP_PSTATE2, %pstate
 		mov	%l6, %g6
-		LOAD_PER_CPU_BASE(%g4, %g3)
+		LOAD_PER_CPU_BASE(%g4, %g3, %l1)
 		jmpl	%l2 + 0x4, %g0
 		 ldx	[%g6 + TI_TASK], %g4
 
@@ -250,7 +250,7 @@ scetrap:
 		stx	%i6, [%sp + PTREGS_OFF + PT_V9_I6]
 		mov	%l6, %g6
 		stx	%i7, [%sp + PTREGS_OFF + PT_V9_I7]
-		LOAD_PER_CPU_BASE(%g4, %g3)
+		LOAD_PER_CPU_BASE(%g4, %g3, %l1)
 		ldx	[%g6 + TI_TASK], %g4
 		done
 

+ 1 - 1
arch/sparc64/kernel/rtrap.S

@@ -226,7 +226,7 @@ rt_continue:	ldx			[%sp + PTREGS_OFF + PT_V9_G1], %g1
 		brz,pt			%l3, 1f
 		 nop
 		/* Must do this before thread reg is clobbered below.  */
-		LOAD_PER_CPU_BASE(%g6, %g7)
+		LOAD_PER_CPU_BASE(%i0, %i1, %i2)
 1:
 		ldx			[%sp + PTREGS_OFF + PT_V9_G6], %g6
 		ldx			[%sp + PTREGS_OFF + PT_V9_G7], %g7

+ 3 - 3
arch/sparc64/kernel/winfixup.S

@@ -86,7 +86,7 @@ fill_fixup:
 	wrpr		%l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
 	mov		%o7, %g6
 	ldx		[%g6 + TI_TASK], %g4
-	LOAD_PER_CPU_BASE(%g1, %g2)
+	LOAD_PER_CPU_BASE(%g1, %g2, %g3)
 
 	/* This is the same as below, except we handle this a bit special
 	 * since we must preserve %l5 and %l6, see comment above.
@@ -209,7 +209,7 @@ fill_fixup_mna:
 	wrpr		%l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
 	mov		%o7, %g6			! Get current back.
 	ldx		[%g6 + TI_TASK], %g4		! Finish it.
-	LOAD_PER_CPU_BASE(%g1, %g2)
+	LOAD_PER_CPU_BASE(%g1, %g2, %g3)
 	call		mem_address_unaligned
 	 add		%sp, PTREGS_OFF, %o0
 
@@ -312,7 +312,7 @@ fill_fixup_dax:
 	wrpr		%l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
 	mov		%o7, %g6			! Get current back.
 	ldx		[%g6 + TI_TASK], %g4		! Finish it.
-	LOAD_PER_CPU_BASE(%g1, %g2)
+	LOAD_PER_CPU_BASE(%g1, %g2, %g3)
 	call		spitfire_data_access_exception
 	 add		%sp, PTREGS_OFF, %o0
 

+ 12 - 7
include/asm-sparc64/cpudata.h

@@ -101,20 +101,25 @@ extern void setup_tba(void);
 	ldx	[%g1 + %g6], %g6;
 
 /* Given the current thread info pointer in %g6, load the per-cpu
- * area base of the current processor into %g5.  REG1 and REG2 are
+ * area base of the current processor into %g5.  REG1, REG2, and REG3 are
  * clobbered.
+ *
+ * You absolutely cannot use %g5 as a temporary in this code.  The
+ * reason is that traps can happen during execution, and return from
+ * trap will load the fully resolved %g5 per-cpu base.  This can corrupt
+ * the calculations done by the macro mid-stream.
  */
 #ifdef CONFIG_SMP
-#define LOAD_PER_CPU_BASE(REG1, REG2)			\
+#define LOAD_PER_CPU_BASE(REG1, REG2, REG3)		\
 	ldub	[%g6 + TI_CPU], REG1;			\
-	sethi	%hi(__per_cpu_shift), %g5;		\
+	sethi	%hi(__per_cpu_shift), REG3;		\
 	sethi	%hi(__per_cpu_base), REG2;		\
-	ldx	[%g5 + %lo(__per_cpu_shift)], %g5;	\
+	ldx	[REG3 + %lo(__per_cpu_shift)], REG3;	\
 	ldx	[REG2 + %lo(__per_cpu_base)], REG2;	\
-	sllx	REG1, %g5, %g5;				\
-	add	%g5, REG2, %g5;
+	sllx	REG1, REG3, REG3;			\
+	add	REG3, REG2, %g5;
 #else
-#define LOAD_PER_CPU_BASE(REG1, REG2)
+#define LOAD_PER_CPU_BASE(REG1, REG2, REG3)
 #endif
 
 #endif /* _SPARC64_CPUDATA_H */