瀏覽代碼

[ARM] cache align destination pointer when copying memory for some processors

The implementation for memory copy functions on ARM had a (disabled)
provision for aligning the source pointer before loading registers with
data.  Turns out that aligning the _destination_ pointer is much more
useful, as the read side is already sufficiently helped with the use of
preload.

So this changes the definition of the CALGN() macro to target the
destination pointer instead, and turns it on for Feroceon processors
where the gain is very noticeable.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Nicolas Pitre 17 年之前
父節點
當前提交
2239aff6ab
共有 3 個文件被更改,包括 19 次插入20 次删除
  1. 2 10
      arch/arm/lib/copy_template.S
  2. 2 10
      arch/arm/lib/memmove.S
  3. 15 0
      include/asm-arm/assembler.h

+ 2 - 10
arch/arm/lib/copy_template.S

@@ -12,14 +12,6 @@
  *  published by the Free Software Foundation.
  *  published by the Free Software Foundation.
  */
  */
 
 
-/*
- * This can be used to enable code to cacheline align the source pointer.
- * Experiments on tested architectures (StrongARM and XScale) didn't show
- * this a worthwhile thing to do.  That might be different in the future.
- */
-//#define CALGN(code...)	code
-#define CALGN(code...)
-
 /*
 /*
  * Theory of operation
  * Theory of operation
  * -------------------
  * -------------------
@@ -82,7 +74,7 @@
 		stmfd	sp!, {r5 - r8}
 		stmfd	sp!, {r5 - r8}
 		blt	5f
 		blt	5f
 
 
-	CALGN(	ands	ip, r1, #31		)
+	CALGN(	ands	ip, r0, #31		)
 	CALGN(	rsb	r3, ip, #32		)
 	CALGN(	rsb	r3, ip, #32		)
 	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
 	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
 	CALGN(	bcs	2f			)
 	CALGN(	bcs	2f			)
@@ -168,7 +160,7 @@
 		subs	r2, r2, #28
 		subs	r2, r2, #28
 		blt	14f
 		blt	14f
 
 
-	CALGN(	ands	ip, r1, #31		)
+	CALGN(	ands	ip, r0, #31		)
 	CALGN(	rsb	ip, ip, #32		)
 	CALGN(	rsb	ip, ip, #32		)
 	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
 	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
 	CALGN(	subcc	r2, r2, ip		)
 	CALGN(	subcc	r2, r2, ip		)

+ 2 - 10
arch/arm/lib/memmove.S

@@ -13,14 +13,6 @@
 #include <linux/linkage.h>
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/assembler.h>
 
 
-/*
- * This can be used to enable code to cacheline align the source pointer.
- * Experiments on tested architectures (StrongARM and XScale) didn't show
- * this a worthwhile thing to do.  That might be different in the future.
- */
-//#define CALGN(code...)        code
-#define CALGN(code...)
-
 		.text
 		.text
 
 
 /*
 /*
@@ -55,7 +47,7 @@ ENTRY(memmove)
 		stmfd	sp!, {r5 - r8}
 		stmfd	sp!, {r5 - r8}
 		blt	5f
 		blt	5f
 
 
-	CALGN(	ands	ip, r1, #31		)
+	CALGN(	ands	ip, r0, #31		)
 	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
 	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
 	CALGN(	bcs	2f			)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	adr	r4, 6f			)
@@ -139,7 +131,7 @@ ENTRY(memmove)
 		subs	r2, r2, #28
 		subs	r2, r2, #28
 		blt	14f
 		blt	14f
 
 
-	CALGN(	ands	ip, r1, #31		)
+	CALGN(	ands	ip, r0, #31		)
 	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
 	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
 	CALGN(	subcc	r2, r2, ip		)
 	CALGN(	subcc	r2, r2, ip		)
 	CALGN(	bcc	15f			)
 	CALGN(	bcc	15f			)

+ 15 - 0
include/asm-arm/assembler.h

@@ -55,6 +55,21 @@
 #define PLD(code...)
 #define PLD(code...)
 #endif
 #endif
 
 
+/*
+ * This can be used to enable code to cacheline align the destination
+ * pointer when bulk writing to memory.  Experiments on StrongARM and
+ * XScale didn't show this a worthwhile thing to do when the cache is not
+ * set to write-allocate (this would need further testing on XScale when WA
+ * is used).
+ *
+ * On Feroceon there is much to gain however, regardless of cache mode.
+ */
+#ifdef CONFIG_CPU_FEROCEON
+#define CALGN(code...) code
+#else
+#define CALGN(code...)
+#endif
+
 /*
 /*
  * Enable and disable interrupts
  * Enable and disable interrupts
  */
  */