memmove_64.c 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. /* Normally compiler builtins are used, but sometimes the compiler calls out
  2. of line code. Based on asm-i386/string.h.
  3. */
  4. #define _STRING_C
  5. #include <linux/string.h>
  6. #include <linux/module.h>
  7. #undef memmove
  8. void *memmove(void *dest, const void *src, size_t count)
  9. {
  10. unsigned long d0,d1,d2,d3,d4,d5,d6,d7;
  11. char *ret;
  12. __asm__ __volatile__(
  13. /* Handle more 32bytes in loop */
  14. "mov %2, %3\n\t"
  15. "cmp $0x20, %0\n\t"
  16. "jb 1f\n\t"
  17. /* Decide forward/backward copy mode */
  18. "cmp %2, %1\n\t"
  19. "jb 2f\n\t"
  20. /*
  21. * movsq instruction have many startup latency
  22. * so we handle small size by general register.
  23. */
  24. "cmp $680, %0\n\t"
  25. "jb 3f\n\t"
  26. /*
  27. * movsq instruction is only good for aligned case.
  28. */
  29. "cmpb %%dil, %%sil\n\t"
  30. "je 4f\n\t"
  31. "3:\n\t"
  32. "sub $0x20, %0\n\t"
  33. /*
  34. * We gobble 32byts forward in each loop.
  35. */
  36. "5:\n\t"
  37. "sub $0x20, %0\n\t"
  38. "movq 0*8(%1), %4\n\t"
  39. "movq 1*8(%1), %5\n\t"
  40. "movq 2*8(%1), %6\n\t"
  41. "movq 3*8(%1), %7\n\t"
  42. "leaq 4*8(%1), %1\n\t"
  43. "movq %4, 0*8(%2)\n\t"
  44. "movq %5, 1*8(%2)\n\t"
  45. "movq %6, 2*8(%2)\n\t"
  46. "movq %7, 3*8(%2)\n\t"
  47. "leaq 4*8(%2), %2\n\t"
  48. "jae 5b\n\t"
  49. "addq $0x20, %0\n\t"
  50. "jmp 1f\n\t"
  51. /*
  52. * Handle data forward by movsq.
  53. */
  54. ".p2align 4\n\t"
  55. "4:\n\t"
  56. "movq %0, %8\n\t"
  57. "movq -8(%1, %0), %4\n\t"
  58. "lea -8(%2, %0), %5\n\t"
  59. "shrq $3, %8\n\t"
  60. "rep movsq\n\t"
  61. "movq %4, (%5)\n\t"
  62. "jmp 13f\n\t"
  63. /*
  64. * Handle data backward by movsq.
  65. */
  66. ".p2align 4\n\t"
  67. "7:\n\t"
  68. "movq %0, %8\n\t"
  69. "movq (%1), %4\n\t"
  70. "movq %2, %5\n\t"
  71. "leaq -8(%1, %0), %1\n\t"
  72. "leaq -8(%2, %0), %2\n\t"
  73. "shrq $3, %8\n\t"
  74. "std\n\t"
  75. "rep movsq\n\t"
  76. "cld\n\t"
  77. "movq %4, (%5)\n\t"
  78. "jmp 13f\n\t"
  79. /*
  80. * Start to prepare for backward copy.
  81. */
  82. ".p2align 4\n\t"
  83. "2:\n\t"
  84. "cmp $680, %0\n\t"
  85. "jb 6f \n\t"
  86. "cmp %%dil, %%sil\n\t"
  87. "je 7b \n\t"
  88. "6:\n\t"
  89. /*
  90. * Calculate copy position to tail.
  91. */
  92. "addq %0, %1\n\t"
  93. "addq %0, %2\n\t"
  94. "subq $0x20, %0\n\t"
  95. /*
  96. * We gobble 32byts backward in each loop.
  97. */
  98. "8:\n\t"
  99. "subq $0x20, %0\n\t"
  100. "movq -1*8(%1), %4\n\t"
  101. "movq -2*8(%1), %5\n\t"
  102. "movq -3*8(%1), %6\n\t"
  103. "movq -4*8(%1), %7\n\t"
  104. "leaq -4*8(%1), %1\n\t"
  105. "movq %4, -1*8(%2)\n\t"
  106. "movq %5, -2*8(%2)\n\t"
  107. "movq %6, -3*8(%2)\n\t"
  108. "movq %7, -4*8(%2)\n\t"
  109. "leaq -4*8(%2), %2\n\t"
  110. "jae 8b\n\t"
  111. /*
  112. * Calculate copy position to head.
  113. */
  114. "addq $0x20, %0\n\t"
  115. "subq %0, %1\n\t"
  116. "subq %0, %2\n\t"
  117. "1:\n\t"
  118. "cmpq $16, %0\n\t"
  119. "jb 9f\n\t"
  120. /*
  121. * Move data from 16 bytes to 31 bytes.
  122. */
  123. "movq 0*8(%1), %4\n\t"
  124. "movq 1*8(%1), %5\n\t"
  125. "movq -2*8(%1, %0), %6\n\t"
  126. "movq -1*8(%1, %0), %7\n\t"
  127. "movq %4, 0*8(%2)\n\t"
  128. "movq %5, 1*8(%2)\n\t"
  129. "movq %6, -2*8(%2, %0)\n\t"
  130. "movq %7, -1*8(%2, %0)\n\t"
  131. "jmp 13f\n\t"
  132. ".p2align 4\n\t"
  133. "9:\n\t"
  134. "cmpq $8, %0\n\t"
  135. "jb 10f\n\t"
  136. /*
  137. * Move data from 8 bytes to 15 bytes.
  138. */
  139. "movq 0*8(%1), %4\n\t"
  140. "movq -1*8(%1, %0), %5\n\t"
  141. "movq %4, 0*8(%2)\n\t"
  142. "movq %5, -1*8(%2, %0)\n\t"
  143. "jmp 13f\n\t"
  144. "10:\n\t"
  145. "cmpq $4, %0\n\t"
  146. "jb 11f\n\t"
  147. /*
  148. * Move data from 4 bytes to 7 bytes.
  149. */
  150. "movl (%1), %4d\n\t"
  151. "movl -4(%1, %0), %5d\n\t"
  152. "movl %4d, (%2)\n\t"
  153. "movl %5d, -4(%2, %0)\n\t"
  154. "jmp 13f\n\t"
  155. "11:\n\t"
  156. "cmp $2, %0\n\t"
  157. "jb 12f\n\t"
  158. /*
  159. * Move data from 2 bytes to 3 bytes.
  160. */
  161. "movw (%1), %4w\n\t"
  162. "movw -2(%1, %0), %5w\n\t"
  163. "movw %4w, (%2)\n\t"
  164. "movw %5w, -2(%2, %0)\n\t"
  165. "jmp 13f\n\t"
  166. "12:\n\t"
  167. "cmp $1, %0\n\t"
  168. "jb 13f\n\t"
  169. /*
  170. * Move data for 1 byte.
  171. */
  172. "movb (%1), %4b\n\t"
  173. "movb %4b, (%2)\n\t"
  174. "13:\n\t"
  175. : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) ,
  176. "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7)
  177. :"0" (count),
  178. "1" (src),
  179. "2" (dest)
  180. :"memory");
  181. return ret;
  182. }
  183. EXPORT_SYMBOL(memmove);