memcpy.S 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. /* Copyright 2002 Andi Kleen */
  2. #include <linux/config.h>
  3. #include <linux/linkage.h>
  4. #include <asm/dwarf2.h>
  5. #include <asm/cpufeature.h>
  6. /*
  7. * memcpy - Copy a memory block.
  8. *
  9. * Input:
  10. * rdi destination
  11. * rsi source
  12. * rdx count
  13. *
  14. * Output:
  15. * rax original destination
  16. */
  17. ALIGN
  18. memcpy_c:
  19. CFI_STARTPROC
  20. movq %rdi,%rax
  21. movl %edx,%ecx
  22. shrl $3,%ecx
  23. andl $7,%edx
  24. rep movsq
  25. movl %edx,%ecx
  26. rep movsb
  27. ret
  28. CFI_ENDPROC
  29. ENDPROC(memcpy_c)
  30. ENTRY(__memcpy)
  31. ENTRY(memcpy)
  32. CFI_STARTPROC
  33. pushq %rbx
  34. CFI_ADJUST_CFA_OFFSET 8
  35. CFI_REL_OFFSET rbx, 0
  36. movq %rdi,%rax
  37. movl %edx,%ecx
  38. shrl $6,%ecx
  39. jz .Lhandle_tail
  40. .p2align 4
  41. .Lloop_64:
  42. decl %ecx
  43. movq (%rsi),%r11
  44. movq 8(%rsi),%r8
  45. movq %r11,(%rdi)
  46. movq %r8,1*8(%rdi)
  47. movq 2*8(%rsi),%r9
  48. movq 3*8(%rsi),%r10
  49. movq %r9,2*8(%rdi)
  50. movq %r10,3*8(%rdi)
  51. movq 4*8(%rsi),%r11
  52. movq 5*8(%rsi),%r8
  53. movq %r11,4*8(%rdi)
  54. movq %r8,5*8(%rdi)
  55. movq 6*8(%rsi),%r9
  56. movq 7*8(%rsi),%r10
  57. movq %r9,6*8(%rdi)
  58. movq %r10,7*8(%rdi)
  59. leaq 64(%rsi),%rsi
  60. leaq 64(%rdi),%rdi
  61. jnz .Lloop_64
  62. .Lhandle_tail:
  63. movl %edx,%ecx
  64. andl $63,%ecx
  65. shrl $3,%ecx
  66. jz .Lhandle_7
  67. .p2align 4
  68. .Lloop_8:
  69. decl %ecx
  70. movq (%rsi),%r8
  71. movq %r8,(%rdi)
  72. leaq 8(%rdi),%rdi
  73. leaq 8(%rsi),%rsi
  74. jnz .Lloop_8
  75. .Lhandle_7:
  76. movl %edx,%ecx
  77. andl $7,%ecx
  78. jz .Lende
  79. .p2align 4
  80. .Lloop_1:
  81. movb (%rsi),%r8b
  82. movb %r8b,(%rdi)
  83. incq %rdi
  84. incq %rsi
  85. decl %ecx
  86. jnz .Lloop_1
  87. .Lende:
  88. popq %rbx
  89. CFI_ADJUST_CFA_OFFSET -8
  90. CFI_RESTORE rbx
  91. ret
  92. .Lfinal:
  93. CFI_ENDPROC
  94. ENDPROC(memcpy)
  95. ENDPROC(__memcpy)
  96. /* Some CPUs run faster using the string copy instructions.
  97. It is also a lot simpler. Use this when possible */
  98. .section .altinstr_replacement,"ax"
  99. 1: .byte 0xeb /* jmp <disp8> */
  100. .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
  101. 2:
  102. .previous
  103. .section .altinstructions,"a"
  104. .align 8
  105. .quad memcpy
  106. .quad 1b
  107. .byte X86_FEATURE_REP_GOOD
  108. .byte .Lfinal - memcpy
  109. .byte 2b - 1b
  110. .previous