|
@@ -27,9 +27,8 @@
|
|
|
.section .altinstr_replacement, "ax", @progbits
|
|
|
.Lmemcpy_c:
|
|
|
movq %rdi, %rax
|
|
|
-
|
|
|
- movl %edx, %ecx
|
|
|
- shrl $3, %ecx
|
|
|
+ movq %rdx, %rcx
|
|
|
+ shrq $3, %rcx
|
|
|
andl $7, %edx
|
|
|
rep movsq
|
|
|
movl %edx, %ecx
|
|
@@ -48,8 +47,7 @@
|
|
|
.section .altinstr_replacement, "ax", @progbits
|
|
|
.Lmemcpy_c_e:
|
|
|
movq %rdi, %rax
|
|
|
-
|
|
|
- movl %edx, %ecx
|
|
|
+ movq %rdx, %rcx
|
|
|
rep movsb
|
|
|
ret
|
|
|
.Lmemcpy_e_e:
|
|
@@ -60,10 +58,7 @@ ENTRY(memcpy)
|
|
|
CFI_STARTPROC
|
|
|
movq %rdi, %rax
|
|
|
|
|
|
- /*
|
|
|
- * Use 32bit CMP here to avoid long NOP padding.
|
|
|
- */
|
|
|
- cmp $0x20, %edx
|
|
|
+ cmpq $0x20, %rdx
|
|
|
jb .Lhandle_tail
|
|
|
|
|
|
/*
|
|
@@ -72,7 +67,7 @@ ENTRY(memcpy)
|
|
|
*/
|
|
|
cmp %dil, %sil
|
|
|
jl .Lcopy_backward
|
|
|
- subl $0x20, %edx
|
|
|
+ subq $0x20, %rdx
|
|
|
.Lcopy_forward_loop:
|
|
|
subq $0x20, %rdx
|
|
|
|
|
@@ -91,7 +86,7 @@ ENTRY(memcpy)
|
|
|
movq %r11, 3*8(%rdi)
|
|
|
leaq 4*8(%rdi), %rdi
|
|
|
jae .Lcopy_forward_loop
|
|
|
- addq $0x20, %rdx
|
|
|
+ addl $0x20, %edx
|
|
|
jmp .Lhandle_tail
|
|
|
|
|
|
.Lcopy_backward:
|
|
@@ -123,11 +118,11 @@ ENTRY(memcpy)
|
|
|
/*
|
|
|
* Calculate copy position to head.
|
|
|
*/
|
|
|
- addq $0x20, %rdx
|
|
|
+ addl $0x20, %edx
|
|
|
subq %rdx, %rsi
|
|
|
subq %rdx, %rdi
|
|
|
.Lhandle_tail:
|
|
|
- cmpq $16, %rdx
|
|
|
+ cmpl $16, %edx
|
|
|
jb .Lless_16bytes
|
|
|
|
|
|
/*
|
|
@@ -144,7 +139,7 @@ ENTRY(memcpy)
|
|
|
retq
|
|
|
.p2align 4
|
|
|
.Lless_16bytes:
|
|
|
- cmpq $8, %rdx
|
|
|
+ cmpl $8, %edx
|
|
|
jb .Lless_8bytes
|
|
|
/*
|
|
|
* Move data from 8 bytes to 15 bytes.
|
|
@@ -156,7 +151,7 @@ ENTRY(memcpy)
|
|
|
retq
|
|
|
.p2align 4
|
|
|
.Lless_8bytes:
|
|
|
- cmpq $4, %rdx
|
|
|
+ cmpl $4, %edx
|
|
|
jb .Lless_3bytes
|
|
|
|
|
|
/*
|