Эх сурвалжийг харах

[MIPS] IP28: added cache barrier to assembly routines

IP28 needs special treatment to avoid speculative accesses. gcc
takes care for .c code, but for assembly code we need to do it
manually.

This is taken from Peter Fuersts IP28 patches.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Thomas Bogendoerfer 17 жил өмнө
parent
commit
930bff8822

+ 10 - 0
arch/mips/lib/memcpy.S

@@ -199,6 +199,7 @@ FEXPORT(__copy_user)
 	 */
 #define rem t8
 
+	R10KCBARRIER(0(ra))
 	/*
 	 * The "issue break"s below are very approximate.
 	 * Issue delays for dcache fills will perturb the schedule, as will
@@ -231,6 +232,7 @@ both_aligned:
 	PREF(	1, 3*32(dst) )
 	.align	4
 1:
+	R10KCBARRIER(0(ra))
 EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
 EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
 EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
@@ -272,6 +274,7 @@ EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
 EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
 	SUB	len, len, 4*NBYTES
 	ADD	src, src, 4*NBYTES
+	R10KCBARRIER(0(ra))
 EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
 EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
 EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
@@ -287,6 +290,7 @@ less_than_4units:
 	beq	rem, len, copy_bytes
 	 nop
 1:
+	R10KCBARRIER(0(ra))
 EXC(	LOAD	t0, 0(src),		l_exc)
 	ADD	src, src, NBYTES
 	SUB	len, len, NBYTES
@@ -334,6 +338,7 @@ EXC(	LDFIRST	t3, FIRST(0)(src),	l_exc)
 EXC(	LDREST	t3, REST(0)(src),	l_exc_copy)
 	SUB	t2, t2, t1	# t2 = number of bytes copied
 	xor	match, t0, t1
+	R10KCBARRIER(0(ra))
 EXC(	STFIRST t3, FIRST(0)(dst),	s_exc)
 	beq	len, t2, done
 	 SUB	len, len, t2
@@ -354,6 +359,7 @@ src_unaligned_dst_aligned:
  * It's OK to load FIRST(N+1) before REST(N) because the two addresses
  * are to the same unit (unless src is aligned, but it's not).
  */
+	R10KCBARRIER(0(ra))
 EXC(	LDFIRST	t0, FIRST(0)(src),	l_exc)
 EXC(	LDFIRST	t1, FIRST(1)(src),	l_exc_copy)
 	SUB     len, len, 4*NBYTES
@@ -384,6 +390,7 @@ cleanup_src_unaligned:
 	beq	rem, len, copy_bytes
 	 nop
 1:
+	R10KCBARRIER(0(ra))
 EXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
 EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
 	ADD	src, src, NBYTES
@@ -399,6 +406,7 @@ copy_bytes_checklen:
 	 nop
 copy_bytes:
 	/* 0 < len < NBYTES  */
+	R10KCBARRIER(0(ra))
 #define COPY_BYTE(N)			\
 EXC(	lb	t0, N(src), l_exc);	\
 	SUB	len, len, 1;		\
@@ -528,6 +536,7 @@ LEAF(__rmemcpy)					/* a0=dst a1=src a2=len */
 	ADD	a1, a2				# src = src + len
 
 r_end_bytes:
+	R10KCBARRIER(0(ra))
 	lb	t0, -1(a1)
 	SUB	a2, a2, 0x1
 	sb	t0, -1(a0)
@@ -542,6 +551,7 @@ r_out:
 	 move	a2, zero
 
 r_end_bytes_up:
+	R10KCBARRIER(0(ra))
 	lb	t0, (a1)
 	SUB	a2, a2, 0x1
 	sb	t0, (a0)

+ 5 - 0
arch/mips/lib/memset.S

@@ -86,6 +86,7 @@ FEXPORT(__bzero)
 	.set		at
 #endif
 
+	R10KCBARRIER(0(ra))
 #ifdef __MIPSEB__
 	EX(LONG_S_L, a1, (a0), first_fixup)	/* make word/dword aligned */
 #endif
@@ -103,11 +104,13 @@ FEXPORT(__bzero)
 	PTR_ADDU	t1, a0			/* end address */
 	.set		reorder
 1:	PTR_ADDIU	a0, 64
+	R10KCBARRIER(0(ra))
 	f_fill64 a0, -64, a1, fwd_fixup
 	bne		t1, a0, 1b
 	.set		noreorder
 
 memset_partial:
+	R10KCBARRIER(0(ra))
 	PTR_LA		t1, 2f			/* where to start */
 #if LONGSIZE == 4
 	PTR_SUBU	t1, t0
@@ -129,6 +132,7 @@ memset_partial:
 
 	beqz		a2, 1f
 	 PTR_ADDU	a0, a2			/* What's left */
+	R10KCBARRIER(0(ra))
 #ifdef __MIPSEB__
 	EX(LONG_S_R, a1, -1(a0), last_fixup)
 #endif
@@ -143,6 +147,7 @@ small_memset:
 	 PTR_ADDU	t1, a0, a2
 
 1:	PTR_ADDIU	a0, 1			/* fill bytewise */
+	R10KCBARRIER(0(ra))
 	bne		t1, a0, 1b
 	 sb		a1, -1(a0)
 

+ 1 - 0
arch/mips/lib/strncpy_user.S

@@ -38,6 +38,7 @@ FEXPORT(__strncpy_from_user_nocheck_asm)
 	.set		noreorder
 1:	EX(lbu, t0, (v1), fault)
 	PTR_ADDIU	v1, 1
+	R10KCBARRIER(0(ra))
 	beqz		t0, 2f
 	 sb		t0, (a0)
 	PTR_ADDIU	v0, 1

+ 8 - 0
include/asm-mips/asm.h

@@ -398,4 +398,12 @@ symbol		=	value
 
 #define SSNOP		sll zero, zero, 1
 
+#ifdef CONFIG_SGI_IP28
+/* Inhibit speculative stores to volatile (e.g.DMA) or invalid addresses. */
+#include <asm/cacheops.h>
+#define R10KCBARRIER(addr)  cache   Cache_Barrier, addr;
+#else
+#define R10KCBARRIER(addr)
+#endif
+
 #endif /* __ASM_ASM_H */