memmove.S 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. /*
  2. * linux/arch/arm/lib/memmove.S
  3. *
  4. * Author: Nicolas Pitre
  5. * Created: Sep 28, 2005
  6. * Copyright: (C) MontaVista Software Inc.
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License version 2 as
  10. * published by the Free Software Foundation.
  11. */
  12. #include <linux/linkage.h>
  13. #include <asm/assembler.h>
  14. /*
  15. * This can be used to enable code to cacheline align the source pointer.
  16. * Experiments on tested architectures (StrongARM and XScale) didn't show
  17. * this a worthwhile thing to do. That might be different in the future.
  18. */
  19. //#define CALGN(code...) code
  20. #define CALGN(code...)
  21. .text
  22. /*
  23. * Prototype: void *memmove(void *dest, const void *src, size_t n);
  24. *
  25. * Note:
  26. *
  27. * If the memory regions don't overlap, we simply branch to memcpy which is
  28. * normally a bit faster. Otherwise the copy is done going downwards. This
  29. * is a transposition of the code from copy_template.S but with the copy
  30. * occurring in the opposite direction.
  31. */
  32. ENTRY(memmove)
  33. subs ip, r0, r1
  34. cmphi r2, ip
  35. bls memcpy
  36. stmfd sp!, {r0, r4, lr}
  37. add r1, r1, r2
  38. add r0, r0, r2
  39. subs r2, r2, #4
  40. blt 8f
  41. ands ip, r0, #3
  42. PLD( pld [r1, #-4] )
  43. bne 9f
  44. ands ip, r1, #3
  45. bne 10f
  46. 1: subs r2, r2, #(28)
  47. stmfd sp!, {r5 - r8}
  48. blt 5f
  49. CALGN( ands ip, r1, #31 )
  50. CALGN( sbcnes r4, ip, r2 ) @ C is always set here
  51. CALGN( bcs 2f )
  52. CALGN( adr r4, 6f )
  53. CALGN( subs r2, r2, ip ) @ C is set here
  54. CALGN( add pc, r4, ip )
  55. PLD( pld [r1, #-4] )
  56. 2: PLD( subs r2, r2, #96 )
  57. PLD( pld [r1, #-32] )
  58. PLD( blt 4f )
  59. PLD( pld [r1, #-64] )
  60. PLD( pld [r1, #-96] )
  61. 3: PLD( pld [r1, #-128] )
  62. 4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
  63. subs r2, r2, #32
  64. stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
  65. bge 3b
  66. PLD( cmn r2, #96 )
  67. PLD( bge 4b )
  68. 5: ands ip, r2, #28
  69. rsb ip, ip, #32
  70. addne pc, pc, ip @ C is always clear here
  71. b 7f
  72. 6: nop
  73. ldr r3, [r1, #-4]!
  74. ldr r4, [r1, #-4]!
  75. ldr r5, [r1, #-4]!
  76. ldr r6, [r1, #-4]!
  77. ldr r7, [r1, #-4]!
  78. ldr r8, [r1, #-4]!
  79. ldr lr, [r1, #-4]!
  80. add pc, pc, ip
  81. nop
  82. nop
  83. str r3, [r0, #-4]!
  84. str r4, [r0, #-4]!
  85. str r5, [r0, #-4]!
  86. str r6, [r0, #-4]!
  87. str r7, [r0, #-4]!
  88. str r8, [r0, #-4]!
  89. str lr, [r0, #-4]!
  90. CALGN( bcs 2b )
  91. 7: ldmfd sp!, {r5 - r8}
  92. 8: movs r2, r2, lsl #31
  93. ldrneb r3, [r1, #-1]!
  94. ldrcsb r4, [r1, #-1]!
  95. ldrcsb ip, [r1, #-1]
  96. strneb r3, [r0, #-1]!
  97. strcsb r4, [r0, #-1]!
  98. strcsb ip, [r0, #-1]
  99. ldmfd sp!, {r0, r4, pc}
  100. 9: cmp ip, #2
  101. ldrgtb r3, [r1, #-1]!
  102. ldrgeb r4, [r1, #-1]!
  103. ldrb lr, [r1, #-1]!
  104. strgtb r3, [r0, #-1]!
  105. strgeb r4, [r0, #-1]!
  106. subs r2, r2, ip
  107. strb lr, [r0, #-1]!
  108. blt 8b
  109. ands ip, r1, #3
  110. beq 1b
  111. 10: bic r1, r1, #3
  112. cmp ip, #2
  113. ldr r3, [r1, #0]
  114. beq 17f
  115. blt 18f
  116. .macro backward_copy_shift push pull
  117. subs r2, r2, #28
  118. blt 14f
  119. CALGN( ands ip, r1, #31 )
  120. CALGN( rsb ip, ip, #32 )
  121. CALGN( sbcnes r4, ip, r2 ) @ C is always set here
  122. CALGN( subcc r2, r2, ip )
  123. CALGN( bcc 15f )
  124. 11: stmfd sp!, {r5 - r9}
  125. PLD( pld [r1, #-4] )
  126. PLD( subs r2, r2, #96 )
  127. PLD( pld [r1, #-32] )
  128. PLD( blt 13f )
  129. PLD( pld [r1, #-64] )
  130. PLD( pld [r1, #-96] )
  131. 12: PLD( pld [r1, #-128] )
  132. 13: ldmdb r1!, {r7, r8, r9, ip}
  133. mov lr, r3, push #\push
  134. subs r2, r2, #32
  135. ldmdb r1!, {r3, r4, r5, r6}
  136. orr lr, lr, ip, pull #\pull
  137. mov ip, ip, push #\push
  138. orr ip, ip, r9, pull #\pull
  139. mov r9, r9, push #\push
  140. orr r9, r9, r8, pull #\pull
  141. mov r8, r8, push #\push
  142. orr r8, r8, r7, pull #\pull
  143. mov r7, r7, push #\push
  144. orr r7, r7, r6, pull #\pull
  145. mov r6, r6, push #\push
  146. orr r6, r6, r5, pull #\pull
  147. mov r5, r5, push #\push
  148. orr r5, r5, r4, pull #\pull
  149. mov r4, r4, push #\push
  150. orr r4, r4, r3, pull #\pull
  151. stmdb r0!, {r4 - r9, ip, lr}
  152. bge 12b
  153. PLD( cmn r2, #96 )
  154. PLD( bge 13b )
  155. ldmfd sp!, {r5 - r9}
  156. 14: ands ip, r2, #28
  157. beq 16f
  158. 15: mov lr, r3, push #\push
  159. ldr r3, [r1, #-4]!
  160. subs ip, ip, #4
  161. orr lr, lr, r3, pull #\pull
  162. str lr, [r0, #-4]!
  163. bgt 15b
  164. CALGN( cmp r2, #0 )
  165. CALGN( bge 11b )
  166. 16: add r1, r1, #(\pull / 8)
  167. b 8b
  168. .endm
  169. backward_copy_shift push=8 pull=24
  170. 17: backward_copy_shift push=16 pull=16
  171. 18: backward_copy_shift push=24 pull=8