copy_template.S 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. /*
  2. * linux/arch/arm/lib/copy_template.s
  3. *
  4. * Code template for optimized memory copy functions
  5. *
  6. * Author: Nicolas Pitre
  7. * Created: Sep 28, 2005
  8. * Copyright: MontaVista Software, Inc.
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License version 2 as
  12. * published by the Free Software Foundation.
  13. */
  14. /*
  15. * This can be used to enable code to cacheline align the source pointer.
  16. * Experiments on tested architectures (StrongARM and XScale) didn't show
  17. * this a worthwhile thing to do. That might be different in the future.
  18. */
  19. //#define CALGN(code...) code
  20. #define CALGN(code...)
  21. /*
  22. * Theory of operation
  23. * -------------------
  24. *
  25. * This file provides the core code for a forward memory copy used in
  26. * the implementation of memcopy(), copy_to_user() and copy_from_user().
  27. *
  28. * The including file must define the following accessor macros
  29. * according to the need of the given function:
  30. *
  31. * ldr1w ptr reg abort
  32. *
  33. * This loads one word from 'ptr', stores it in 'reg' and increments
  34. * 'ptr' to the next word. The 'abort' argument is used for fixup tables.
  35. *
  36. * ldr4w ptr reg1 reg2 reg3 reg4 abort
  37. * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
  38. *
  39. * This loads four or eight words starting from 'ptr', stores them
  40. * in provided registers and increments 'ptr' past those words.
  41. * The'abort' argument is used for fixup tables.
  42. *
  43. * ldr1b ptr reg cond abort
  44. *
  45. * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
  46. * It also must apply the condition code if provided, otherwise the
  47. * "al" condition is assumed by default.
  48. *
  49. * str1w ptr reg abort
  50. * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
  51. * str1b ptr reg cond abort
  52. *
  53. * Same as their ldr* counterparts, but data is stored to 'ptr' location
  54. * rather than being loaded.
  55. *
  56. * enter reg1 reg2
  57. *
  58. * Preserve the provided registers on the stack plus any additional
  59. * data as needed by the implementation including this code. Called
  60. * upon code entry.
  61. *
  62. * exit reg1 reg2
  63. *
  64. * Restore registers with the values previously saved with the
  65. * 'preserv' macro. Called upon code termination.
  66. */
  67. enter r4, lr
  68. subs r2, r2, #4
  69. blt 8f
  70. ands ip, r0, #3
  71. PLD( pld [r1, #0] )
  72. bne 9f
  73. ands ip, r1, #3
  74. bne 10f
  75. 1: subs r2, r2, #(28)
  76. stmfd sp!, {r5 - r8}
  77. blt 5f
  78. CALGN( ands ip, r1, #31 )
  79. CALGN( rsb r3, ip, #32 )
  80. CALGN( sbcnes r4, r3, r2 ) @ C is always set here
  81. CALGN( bcs 2f )
  82. CALGN( adr r4, 6f )
  83. CALGN( subs r2, r2, r3 ) @ C gets set
  84. CALGN( add pc, r4, ip )
  85. PLD( pld [r1, #0] )
  86. 2: PLD( subs r2, r2, #96 )
  87. PLD( pld [r1, #28] )
  88. PLD( blt 4f )
  89. PLD( pld [r1, #60] )
  90. PLD( pld [r1, #92] )
  91. 3: PLD( pld [r1, #124] )
  92. 4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
  93. subs r2, r2, #32
  94. str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
  95. bge 3b
  96. PLD( cmn r2, #96 )
  97. PLD( bge 4b )
  98. 5: ands ip, r2, #28
  99. rsb ip, ip, #32
  100. addne pc, pc, ip @ C is always clear here
  101. b 7f
  102. 6: nop
  103. ldr1w r1, r3, abort=20f
  104. ldr1w r1, r4, abort=20f
  105. ldr1w r1, r5, abort=20f
  106. ldr1w r1, r6, abort=20f
  107. ldr1w r1, r7, abort=20f
  108. ldr1w r1, r8, abort=20f
  109. ldr1w r1, lr, abort=20f
  110. add pc, pc, ip
  111. nop
  112. nop
  113. str1w r0, r3, abort=20f
  114. str1w r0, r4, abort=20f
  115. str1w r0, r5, abort=20f
  116. str1w r0, r6, abort=20f
  117. str1w r0, r7, abort=20f
  118. str1w r0, r8, abort=20f
  119. str1w r0, lr, abort=20f
  120. CALGN( bcs 2b )
  121. 7: ldmfd sp!, {r5 - r8}
  122. 8: movs r2, r2, lsl #31
  123. ldr1b r1, r3, ne, abort=21f
  124. ldr1b r1, r4, cs, abort=21f
  125. ldr1b r1, ip, cs, abort=21f
  126. str1b r0, r3, ne, abort=21f
  127. str1b r0, r4, cs, abort=21f
  128. str1b r0, ip, cs, abort=21f
  129. exit r4, pc
  130. 9: rsb ip, ip, #4
  131. cmp ip, #2
  132. ldr1b r1, r3, gt, abort=21f
  133. ldr1b r1, r4, ge, abort=21f
  134. ldr1b r1, lr, abort=21f
  135. str1b r0, r3, gt, abort=21f
  136. str1b r0, r4, ge, abort=21f
  137. subs r2, r2, ip
  138. str1b r0, lr, abort=21f
  139. blt 8b
  140. ands ip, r1, #3
  141. beq 1b
  142. 10: bic r1, r1, #3
  143. cmp ip, #2
  144. ldr1w r1, lr, abort=21f
  145. beq 17f
  146. bgt 18f
  147. .macro forward_copy_shift pull push
  148. subs r2, r2, #28
  149. blt 14f
  150. CALGN( ands ip, r1, #31 )
  151. CALGN( rsb ip, ip, #32 )
  152. CALGN( sbcnes r4, ip, r2 ) @ C is always set here
  153. CALGN( subcc r2, r2, ip )
  154. CALGN( bcc 15f )
  155. 11: stmfd sp!, {r5 - r9}
  156. PLD( pld [r1, #0] )
  157. PLD( subs r2, r2, #96 )
  158. PLD( pld [r1, #28] )
  159. PLD( blt 13f )
  160. PLD( pld [r1, #60] )
  161. PLD( pld [r1, #92] )
  162. 12: PLD( pld [r1, #124] )
  163. 13: ldr4w r1, r4, r5, r6, r7, abort=19f
  164. mov r3, lr, pull #\pull
  165. subs r2, r2, #32
  166. ldr4w r1, r8, r9, ip, lr, abort=19f
  167. orr r3, r3, r4, push #\push
  168. mov r4, r4, pull #\pull
  169. orr r4, r4, r5, push #\push
  170. mov r5, r5, pull #\pull
  171. orr r5, r5, r6, push #\push
  172. mov r6, r6, pull #\pull
  173. orr r6, r6, r7, push #\push
  174. mov r7, r7, pull #\pull
  175. orr r7, r7, r8, push #\push
  176. mov r8, r8, pull #\pull
  177. orr r8, r8, r9, push #\push
  178. mov r9, r9, pull #\pull
  179. orr r9, r9, ip, push #\push
  180. mov ip, ip, pull #\pull
  181. orr ip, ip, lr, push #\push
  182. str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
  183. bge 12b
  184. PLD( cmn r2, #96 )
  185. PLD( bge 13b )
  186. ldmfd sp!, {r5 - r9}
  187. 14: ands ip, r2, #28
  188. beq 16f
  189. 15: mov r3, lr, pull #\pull
  190. ldr1w r1, lr, abort=21f
  191. subs ip, ip, #4
  192. orr r3, r3, lr, push #\push
  193. str1w r0, r3, abort=21f
  194. bgt 15b
  195. CALGN( cmp r2, #0 )
  196. CALGN( bge 11b )
  197. 16: sub r1, r1, #(\push / 8)
  198. b 8b
  199. .endm
  200. forward_copy_shift pull=8 push=24
  201. 17: forward_copy_shift pull=16 push=16
  202. 18: forward_copy_shift pull=24 push=8
  203. /*
  204. * Abort preamble and completion macros.
  205. * If a fixup handler is required then those macros must surround it.
  206. * It is assumed that the fixup code will handle the private part of
  207. * the exit macro.
  208. */
  209. .macro copy_abort_preamble
  210. 19: ldmfd sp!, {r5 - r9}
  211. b 21f
  212. 20: ldmfd sp!, {r5 - r8}
  213. 21:
  214. .endm
  215. .macro copy_abort_end
  216. ldmfd sp!, {r4, pc}
  217. .endm