浏览代码

x86-64: Fix memcpy() to support sizes of 4Gb and above

While currently there doesn't appear to be any reachable in-tree
case where such large memory blocks may be passed to memcpy(),
we already had hit the problem in our Xen kernels. Just like
done recently for mmeset(), rather than working around it,
prevent others from falling into the same trap by fixing this
long standing limitation.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/4F21846F020000780006F3FA@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Jan Beulich 13 年之前
父节点
当前提交
2ab560911a
共有 1 个文件被更改,包括 10 次插入15 次删除
  1. 10 15
      arch/x86/lib/memcpy_64.S

+ 10 - 15
arch/x86/lib/memcpy_64.S

@@ -27,9 +27,8 @@
 	.section .altinstr_replacement, "ax", @progbits
 	.section .altinstr_replacement, "ax", @progbits
 .Lmemcpy_c:
 .Lmemcpy_c:
 	movq %rdi, %rax
 	movq %rdi, %rax
-
-	movl %edx, %ecx
-	shrl $3, %ecx
+	movq %rdx, %rcx
+	shrq $3, %rcx
 	andl $7, %edx
 	andl $7, %edx
 	rep movsq
 	rep movsq
 	movl %edx, %ecx
 	movl %edx, %ecx
@@ -48,8 +47,7 @@
 	.section .altinstr_replacement, "ax", @progbits
 	.section .altinstr_replacement, "ax", @progbits
 .Lmemcpy_c_e:
 .Lmemcpy_c_e:
 	movq %rdi, %rax
 	movq %rdi, %rax
-
-	movl %edx, %ecx
+	movq %rdx, %rcx
 	rep movsb
 	rep movsb
 	ret
 	ret
 .Lmemcpy_e_e:
 .Lmemcpy_e_e:
@@ -60,10 +58,7 @@ ENTRY(memcpy)
 	CFI_STARTPROC
 	CFI_STARTPROC
 	movq %rdi, %rax
 	movq %rdi, %rax
 
 
-	/*
-	 * Use 32bit CMP here to avoid long NOP padding.
-	 */
-	cmp  $0x20, %edx
+	cmpq $0x20, %rdx
 	jb .Lhandle_tail
 	jb .Lhandle_tail
 
 
 	/*
 	/*
@@ -72,7 +67,7 @@ ENTRY(memcpy)
 	 */
 	 */
 	cmp  %dil, %sil
 	cmp  %dil, %sil
 	jl .Lcopy_backward
 	jl .Lcopy_backward
-	subl $0x20, %edx
+	subq $0x20, %rdx
 .Lcopy_forward_loop:
 .Lcopy_forward_loop:
 	subq $0x20,	%rdx
 	subq $0x20,	%rdx
 
 
@@ -91,7 +86,7 @@ ENTRY(memcpy)
 	movq %r11,	3*8(%rdi)
 	movq %r11,	3*8(%rdi)
 	leaq 4*8(%rdi),	%rdi
 	leaq 4*8(%rdi),	%rdi
 	jae  .Lcopy_forward_loop
 	jae  .Lcopy_forward_loop
-	addq $0x20,	%rdx
+	addl $0x20,	%edx
 	jmp  .Lhandle_tail
 	jmp  .Lhandle_tail
 
 
 .Lcopy_backward:
 .Lcopy_backward:
@@ -123,11 +118,11 @@ ENTRY(memcpy)
 	/*
 	/*
 	 * Calculate copy position to head.
 	 * Calculate copy position to head.
 	 */
 	 */
-	addq $0x20,	%rdx
+	addl $0x20,	%edx
 	subq %rdx,	%rsi
 	subq %rdx,	%rsi
 	subq %rdx,	%rdi
 	subq %rdx,	%rdi
 .Lhandle_tail:
 .Lhandle_tail:
-	cmpq $16,	%rdx
+	cmpl $16,	%edx
 	jb   .Lless_16bytes
 	jb   .Lless_16bytes
 
 
 	/*
 	/*
@@ -144,7 +139,7 @@ ENTRY(memcpy)
 	retq
 	retq
 	.p2align 4
 	.p2align 4
 .Lless_16bytes:
 .Lless_16bytes:
-	cmpq $8,	%rdx
+	cmpl $8,	%edx
 	jb   .Lless_8bytes
 	jb   .Lless_8bytes
 	/*
 	/*
 	 * Move data from 8 bytes to 15 bytes.
 	 * Move data from 8 bytes to 15 bytes.
@@ -156,7 +151,7 @@ ENTRY(memcpy)
 	retq
 	retq
 	.p2align 4
 	.p2align 4
 .Lless_8bytes:
 .Lless_8bytes:
-	cmpq $4,	%rdx
+	cmpl $4,	%edx
 	jb   .Lless_3bytes
 	jb   .Lless_3bytes
 
 
 	/*
 	/*