|
@@ -19,6 +19,12 @@
|
|
|
*/
|
|
|
|
|
|
#include <asm/ppc_asm.h>
|
|
|
+#include <asm/asm-offsets.h>
|
|
|
+
|
|
|
+ .section ".toc","aw"
|
|
|
+PPC64_CACHES:
|
|
|
+ .tc ppc64_caches[TC],ppc64_caches
|
|
|
+ .section ".text"
|
|
|
|
|
|
/**
|
|
|
* __clear_user: - Zero a block of memory in user space, with less checking.
|
|
@@ -94,9 +100,14 @@ err1; stw r0,0(r3)
|
|
|
addi r3,r3,4
|
|
|
|
|
|
3: sub r4,r4,r6
|
|
|
- srdi r6,r4,5
|
|
|
+
|
|
|
cmpdi r4,32
|
|
|
+ cmpdi cr1,r4,512
|
|
|
blt .Lshort_clear
|
|
|
+ bgt cr1,.Llong_clear
|
|
|
+
|
|
|
+.Lmedium_clear:
|
|
|
+ srdi r6,r4,5
|
|
|
mtctr r6
|
|
|
|
|
|
/* Do 32 byte chunks */
|
|
@@ -139,3 +150,53 @@ err1; stb r0,0(r3)
|
|
|
|
|
|
10: li r3,0
|
|
|
blr
|
|
|
+
|
|
|
+.Llong_clear:
|
|
|
+ ld r5,PPC64_CACHES@toc(r2)
|
|
|
+
|
|
|
+ bf cr7*4+0,11f
|
|
|
+err2; std r0,0(r3)
|
|
|
+ addi r3,r3,8
|
|
|
+ addi r4,r4,-8
|
|
|
+
|
|
|
+ /* Destination is 16 byte aligned, need to get it cacheline aligned */
|
|
|
+11: lwz r7,DCACHEL1LOGLINESIZE(r5)
|
|
|
+ lwz r9,DCACHEL1LINESIZE(r5)
|
|
|
+
|
|
|
+ /*
|
|
|
+ * With worst case alignment the long clear loop takes a minimum
|
|
|
+ * of 1 byte less than 2 cachelines.
|
|
|
+ */
|
|
|
+ sldi r10,r9,2
|
|
|
+ cmpd r4,r10
|
|
|
+ blt .Lmedium_clear
|
|
|
+
|
|
|
+ neg r6,r3
|
|
|
+ addi r10,r9,-1
|
|
|
+ and. r5,r6,r10
|
|
|
+ beq 13f
|
|
|
+
|
|
|
+ srdi r6,r5,4
|
|
|
+ mtctr r6
|
|
|
+ mr r8,r3
|
|
|
+12:
|
|
|
+err1; std r0,0(r3)
|
|
|
+err1; std r0,8(r3)
|
|
|
+ addi r3,r3,16
|
|
|
+ bdnz 12b
|
|
|
+
|
|
|
+ sub r4,r4,r5
|
|
|
+
|
|
|
+13: srd r6,r4,r7
|
|
|
+ mtctr r6
|
|
|
+ mr r8,r3
|
|
|
+14:
|
|
|
+err1; dcbz r0,r3
|
|
|
+ add r3,r3,r9
|
|
|
+ bdnz 14b
|
|
|
+
|
|
|
+ and r4,r4,r10
|
|
|
+
|
|
|
+ cmpdi r4,32
|
|
|
+ blt .Lshort_clear
|
|
|
+ b .Lmedium_clear
|