memcpy.S 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. /* Copyright 2002 Andi Kleen */
  2. #include <asm/cpufeature.h>
  3. /*
  4. * memcpy - Copy a memory block.
  5. *
  6. * Input:
  7. * rdi destination
  8. * rsi source
  9. * rdx count
  10. *
  11. * Output:
  12. * rax original destination
  13. */
  14. .globl __memcpy
  15. .globl memcpy
  16. .p2align 4
  17. __memcpy:
  18. memcpy:
  19. pushq %rbx
  20. movq %rdi,%rax
  21. movl %edx,%ecx
  22. shrl $6,%ecx
  23. jz .Lhandle_tail
  24. .p2align 4
  25. .Lloop_64:
  26. decl %ecx
  27. movq (%rsi),%r11
  28. movq 8(%rsi),%r8
  29. movq %r11,(%rdi)
  30. movq %r8,1*8(%rdi)
  31. movq 2*8(%rsi),%r9
  32. movq 3*8(%rsi),%r10
  33. movq %r9,2*8(%rdi)
  34. movq %r10,3*8(%rdi)
  35. movq 4*8(%rsi),%r11
  36. movq 5*8(%rsi),%r8
  37. movq %r11,4*8(%rdi)
  38. movq %r8,5*8(%rdi)
  39. movq 6*8(%rsi),%r9
  40. movq 7*8(%rsi),%r10
  41. movq %r9,6*8(%rdi)
  42. movq %r10,7*8(%rdi)
  43. leaq 64(%rsi),%rsi
  44. leaq 64(%rdi),%rdi
  45. jnz .Lloop_64
  46. .Lhandle_tail:
  47. movl %edx,%ecx
  48. andl $63,%ecx
  49. shrl $3,%ecx
  50. jz .Lhandle_7
  51. .p2align 4
  52. .Lloop_8:
  53. decl %ecx
  54. movq (%rsi),%r8
  55. movq %r8,(%rdi)
  56. leaq 8(%rdi),%rdi
  57. leaq 8(%rsi),%rsi
  58. jnz .Lloop_8
  59. .Lhandle_7:
  60. movl %edx,%ecx
  61. andl $7,%ecx
  62. jz .Lende
  63. .p2align 4
  64. .Lloop_1:
  65. movb (%rsi),%r8b
  66. movb %r8b,(%rdi)
  67. incq %rdi
  68. incq %rsi
  69. decl %ecx
  70. jnz .Lloop_1
  71. .Lende:
  72. popq %rbx
  73. ret
  74. .Lfinal:
  75. /* C stepping K8 run faster using the string copy instructions.
  76. It is also a lot simpler. Use this when possible */
  77. .section .altinstructions,"a"
  78. .align 8
  79. .quad memcpy
  80. .quad memcpy_c
  81. .byte X86_FEATURE_K8_C
  82. .byte .Lfinal-memcpy
  83. .byte memcpy_c_end-memcpy_c
  84. .previous
  85. .section .altinstr_replacement,"ax"
  86. /* rdi destination
  87. * rsi source
  88. * rdx count
  89. */
  90. memcpy_c:
  91. movq %rdi,%rax
  92. movl %edx,%ecx
  93. shrl $3,%ecx
  94. andl $7,%edx
  95. rep
  96. movsq
  97. movl %edx,%ecx
  98. rep
  99. movsb
  100. ret
  101. memcpy_c_end:
  102. .previous