memcpy.S 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. /*
  2. * linux/arch/arm/lib/memcpy.S
  3. *
  4. * Author: Nicolas Pitre
  5. * Created: Sep 28, 2005
  6. * Copyright: MontaVista Software, Inc.
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License version 2 as
  10. * published by the Free Software Foundation.
  11. */
  12. #include <asm/assembler.h>
  13. #define W(instr) instr
  14. #define LDR1W_SHIFT 0
  15. #define STR1W_SHIFT 0
  16. .macro ldr1w ptr reg abort
  17. W(ldr) \reg, [\ptr], #4
  18. .endm
  19. .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
  20. ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
  21. .endm
  22. .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
  23. ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
  24. .endm
  25. .macro ldr1b ptr reg cond=al abort
  26. ldr\cond\()b \reg, [\ptr], #1
  27. .endm
  28. .macro str1w ptr reg abort
  29. W(str) \reg, [\ptr], #4
  30. .endm
  31. .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
  32. stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
  33. .endm
  34. .macro str1b ptr reg cond=al abort
  35. str\cond\()b \reg, [\ptr], #1
  36. .endm
  37. .macro enter reg1 reg2
  38. stmdb sp!, {r0, \reg1, \reg2}
  39. .endm
  40. .macro exit reg1 reg2
  41. ldmfd sp!, {r0, \reg1, \reg2}
  42. .endm
  43. .text
  44. /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
  45. .globl memcpy
  46. memcpy:
  47. enter r4, lr
  48. subs r2, r2, #4
  49. blt 8f
  50. ands ip, r0, #3
  51. PLD( pld [r1, #0] )
  52. bne 9f
  53. ands ip, r1, #3
  54. bne 10f
  55. 1: subs r2, r2, #(28)
  56. stmfd sp!, {r5 - r8}
  57. blt 5f
  58. CALGN( ands ip, r0, #31 )
  59. CALGN( rsb r3, ip, #32 )
  60. CALGN( sbcnes r4, r3, r2 ) @ C is always set here
  61. CALGN( bcs 2f )
  62. CALGN( adr r4, 6f )
  63. CALGN( subs r2, r2, r3 ) @ C gets set
  64. CALGN( add pc, r4, ip )
  65. PLD( pld [r1, #0] )
  66. 2: PLD( subs r2, r2, #96 )
  67. PLD( pld [r1, #28] )
  68. PLD( blt 4f )
  69. PLD( pld [r1, #60] )
  70. PLD( pld [r1, #92] )
  71. 3: PLD( pld [r1, #124] )
  72. 4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
  73. subs r2, r2, #32
  74. str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
  75. bge 3b
  76. PLD( cmn r2, #96 )
  77. PLD( bge 4b )
  78. 5: ands ip, r2, #28
  79. rsb ip, ip, #32
  80. #if LDR1W_SHIFT > 0
  81. lsl ip, ip, #LDR1W_SHIFT
  82. #endif
  83. addne pc, pc, ip @ C is always clear here
  84. b 7f
  85. 6:
  86. .rept (1 << LDR1W_SHIFT)
  87. W(nop)
  88. .endr
  89. ldr1w r1, r3, abort=20f
  90. ldr1w r1, r4, abort=20f
  91. ldr1w r1, r5, abort=20f
  92. ldr1w r1, r6, abort=20f
  93. ldr1w r1, r7, abort=20f
  94. ldr1w r1, r8, abort=20f
  95. ldr1w r1, lr, abort=20f
  96. #if LDR1W_SHIFT < STR1W_SHIFT
  97. lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
  98. #elif LDR1W_SHIFT > STR1W_SHIFT
  99. lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
  100. #endif
  101. add pc, pc, ip
  102. nop
  103. .rept (1 << STR1W_SHIFT)
  104. W(nop)
  105. .endr
  106. str1w r0, r3, abort=20f
  107. str1w r0, r4, abort=20f
  108. str1w r0, r5, abort=20f
  109. str1w r0, r6, abort=20f
  110. str1w r0, r7, abort=20f
  111. str1w r0, r8, abort=20f
  112. str1w r0, lr, abort=20f
  113. CALGN( bcs 2b )
  114. 7: ldmfd sp!, {r5 - r8}
  115. 8: movs r2, r2, lsl #31
  116. ldr1b r1, r3, ne, abort=21f
  117. ldr1b r1, r4, cs, abort=21f
  118. ldr1b r1, ip, cs, abort=21f
  119. str1b r0, r3, ne, abort=21f
  120. str1b r0, r4, cs, abort=21f
  121. str1b r0, ip, cs, abort=21f
  122. exit r4, pc
  123. 9: rsb ip, ip, #4
  124. cmp ip, #2
  125. ldr1b r1, r3, gt, abort=21f
  126. ldr1b r1, r4, ge, abort=21f
  127. ldr1b r1, lr, abort=21f
  128. str1b r0, r3, gt, abort=21f
  129. str1b r0, r4, ge, abort=21f
  130. subs r2, r2, ip
  131. str1b r0, lr, abort=21f
  132. blt 8b
  133. ands ip, r1, #3
  134. beq 1b
  135. 10: bic r1, r1, #3
  136. cmp ip, #2
  137. ldr1w r1, lr, abort=21f
  138. beq 17f
  139. bgt 18f
  140. .macro forward_copy_shift pull push
  141. subs r2, r2, #28
  142. blt 14f
  143. CALGN( ands ip, r0, #31 )
  144. CALGN( rsb ip, ip, #32 )
  145. CALGN( sbcnes r4, ip, r2 ) @ C is always set here
  146. CALGN( subcc r2, r2, ip )
  147. CALGN( bcc 15f )
  148. 11: stmfd sp!, {r5 - r9}
  149. PLD( pld [r1, #0] )
  150. PLD( subs r2, r2, #96 )
  151. PLD( pld [r1, #28] )
  152. PLD( blt 13f )
  153. PLD( pld [r1, #60] )
  154. PLD( pld [r1, #92] )
  155. 12: PLD( pld [r1, #124] )
  156. 13: ldr4w r1, r4, r5, r6, r7, abort=19f
  157. mov r3, lr, pull #\pull
  158. subs r2, r2, #32
  159. ldr4w r1, r8, r9, ip, lr, abort=19f
  160. orr r3, r3, r4, push #\push
  161. mov r4, r4, pull #\pull
  162. orr r4, r4, r5, push #\push
  163. mov r5, r5, pull #\pull
  164. orr r5, r5, r6, push #\push
  165. mov r6, r6, pull #\pull
  166. orr r6, r6, r7, push #\push
  167. mov r7, r7, pull #\pull
  168. orr r7, r7, r8, push #\push
  169. mov r8, r8, pull #\pull
  170. orr r8, r8, r9, push #\push
  171. mov r9, r9, pull #\pull
  172. orr r9, r9, ip, push #\push
  173. mov ip, ip, pull #\pull
  174. orr ip, ip, lr, push #\push
  175. str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
  176. bge 12b
  177. PLD( cmn r2, #96 )
  178. PLD( bge 13b )
  179. ldmfd sp!, {r5 - r9}
  180. 14: ands ip, r2, #28
  181. beq 16f
  182. 15: mov r3, lr, pull #\pull
  183. ldr1w r1, lr, abort=21f
  184. subs ip, ip, #4
  185. orr r3, r3, lr, push #\push
  186. str1w r0, r3, abort=21f
  187. bgt 15b
  188. CALGN( cmp r2, #0 )
  189. CALGN( bge 11b )
  190. 16: sub r1, r1, #(\push / 8)
  191. b 8b
  192. .endm
  193. forward_copy_shift pull=8 push=24
  194. 17: forward_copy_shift pull=16 push=16
  195. 18: forward_copy_shift pull=24 push=8