memcpy.S 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. /*
  2. * linux/arch/arm/lib/memcpy.S
  3. *
  4. * Copyright (C) 1995-1999 Russell King
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. *
  10. * ASM optimised string functions
  11. */
  12. #include <linux/linkage.h>
  13. #include <asm/assembler.h>
  14. .text
  15. #define ENTER \
  16. mov ip,sp ;\
  17. stmfd sp!,{r0,r4-r9,fp,ip,lr,pc} ;\
  18. sub fp,ip,#4
  19. #define EXIT \
  20. LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc})
  21. #define EXITEQ \
  22. LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc})
  23. /*
  24. * Prototype: void memcpy(void *to,const void *from,unsigned long n);
  25. */
  26. ENTRY(memcpy)
  27. ENTRY(memmove)
  28. ENTER
  29. cmp r1, r0
  30. bcc 23f
  31. subs r2, r2, #4
  32. blt 6f
  33. PLD( pld [r1, #0] )
  34. ands ip, r0, #3
  35. bne 7f
  36. ands ip, r1, #3
  37. bne 8f
  38. 1: subs r2, r2, #8
  39. blt 5f
  40. subs r2, r2, #20
  41. blt 4f
  42. PLD( pld [r1, #28] )
  43. PLD( subs r2, r2, #64 )
  44. PLD( blt 3f )
  45. 2: PLD( pld [r1, #60] )
  46. PLD( pld [r1, #92] )
  47. ldmia r1!, {r3 - r9, ip}
  48. subs r2, r2, #32
  49. stmgeia r0!, {r3 - r9, ip}
  50. ldmgeia r1!, {r3 - r9, ip}
  51. subges r2, r2, #32
  52. stmia r0!, {r3 - r9, ip}
  53. bge 2b
  54. 3: PLD( ldmia r1!, {r3 - r9, ip} )
  55. PLD( adds r2, r2, #32 )
  56. PLD( stmgeia r0!, {r3 - r9, ip} )
  57. PLD( ldmgeia r1!, {r3 - r9, ip} )
  58. PLD( subges r2, r2, #32 )
  59. PLD( stmia r0!, {r3 - r9, ip} )
  60. 4: cmn r2, #16
  61. ldmgeia r1!, {r3 - r6}
  62. subge r2, r2, #16
  63. stmgeia r0!, {r3 - r6}
  64. adds r2, r2, #20
  65. ldmgeia r1!, {r3 - r5}
  66. subge r2, r2, #12
  67. stmgeia r0!, {r3 - r5}
  68. 5: adds r2, r2, #8
  69. blt 6f
  70. subs r2, r2, #4
  71. ldrlt r3, [r1], #4
  72. ldmgeia r1!, {r4, r5}
  73. subge r2, r2, #4
  74. strlt r3, [r0], #4
  75. stmgeia r0!, {r4, r5}
  76. 6: adds r2, r2, #4
  77. EXITEQ
  78. cmp r2, #2
  79. ldrb r3, [r1], #1
  80. ldrgeb r4, [r1], #1
  81. ldrgtb r5, [r1], #1
  82. strb r3, [r0], #1
  83. strgeb r4, [r0], #1
  84. strgtb r5, [r0], #1
  85. EXIT
  86. 7: rsb ip, ip, #4
  87. cmp ip, #2
  88. ldrb r3, [r1], #1
  89. ldrgeb r4, [r1], #1
  90. ldrgtb r5, [r1], #1
  91. strb r3, [r0], #1
  92. strgeb r4, [r0], #1
  93. strgtb r5, [r0], #1
  94. subs r2, r2, ip
  95. blt 6b
  96. ands ip, r1, #3
  97. beq 1b
  98. 8: bic r1, r1, #3
  99. ldr r7, [r1], #4
  100. cmp ip, #2
  101. bgt 18f
  102. beq 13f
  103. cmp r2, #12
  104. blt 11f
  105. PLD( pld [r1, #12] )
  106. sub r2, r2, #12
  107. PLD( subs r2, r2, #32 )
  108. PLD( blt 10f )
  109. PLD( pld [r1, #28] )
  110. 9: PLD( pld [r1, #44] )
  111. 10: mov r3, r7, pull #8
  112. ldmia r1!, {r4 - r7}
  113. subs r2, r2, #16
  114. orr r3, r3, r4, push #24
  115. mov r4, r4, pull #8
  116. orr r4, r4, r5, push #24
  117. mov r5, r5, pull #8
  118. orr r5, r5, r6, push #24
  119. mov r6, r6, pull #8
  120. orr r6, r6, r7, push #24
  121. stmia r0!, {r3 - r6}
  122. bge 9b
  123. PLD( cmn r2, #32 )
  124. PLD( bge 10b )
  125. PLD( add r2, r2, #32 )
  126. adds r2, r2, #12
  127. blt 12f
  128. 11: mov r3, r7, pull #8
  129. ldr r7, [r1], #4
  130. subs r2, r2, #4
  131. orr r3, r3, r7, push #24
  132. str r3, [r0], #4
  133. bge 11b
  134. 12: sub r1, r1, #3
  135. b 6b
  136. 13: cmp r2, #12
  137. blt 16f
  138. PLD( pld [r1, #12] )
  139. sub r2, r2, #12
  140. PLD( subs r2, r2, #32 )
  141. PLD( blt 15f )
  142. PLD( pld [r1, #28] )
  143. 14: PLD( pld [r1, #44] )
  144. 15: mov r3, r7, pull #16
  145. ldmia r1!, {r4 - r7}
  146. subs r2, r2, #16
  147. orr r3, r3, r4, push #16
  148. mov r4, r4, pull #16
  149. orr r4, r4, r5, push #16
  150. mov r5, r5, pull #16
  151. orr r5, r5, r6, push #16
  152. mov r6, r6, pull #16
  153. orr r6, r6, r7, push #16
  154. stmia r0!, {r3 - r6}
  155. bge 14b
  156. PLD( cmn r2, #32 )
  157. PLD( bge 15b )
  158. PLD( add r2, r2, #32 )
  159. adds r2, r2, #12
  160. blt 17f
  161. 16: mov r3, r7, pull #16
  162. ldr r7, [r1], #4
  163. subs r2, r2, #4
  164. orr r3, r3, r7, push #16
  165. str r3, [r0], #4
  166. bge 16b
  167. 17: sub r1, r1, #2
  168. b 6b
  169. 18: cmp r2, #12
  170. blt 21f
  171. PLD( pld [r1, #12] )
  172. sub r2, r2, #12
  173. PLD( subs r2, r2, #32 )
  174. PLD( blt 20f )
  175. PLD( pld [r1, #28] )
  176. 19: PLD( pld [r1, #44] )
  177. 20: mov r3, r7, pull #24
  178. ldmia r1!, {r4 - r7}
  179. subs r2, r2, #16
  180. orr r3, r3, r4, push #8
  181. mov r4, r4, pull #24
  182. orr r4, r4, r5, push #8
  183. mov r5, r5, pull #24
  184. orr r5, r5, r6, push #8
  185. mov r6, r6, pull #24
  186. orr r6, r6, r7, push #8
  187. stmia r0!, {r3 - r6}
  188. bge 19b
  189. PLD( cmn r2, #32 )
  190. PLD( bge 20b )
  191. PLD( add r2, r2, #32 )
  192. adds r2, r2, #12
  193. blt 22f
  194. 21: mov r3, r7, pull #24
  195. ldr r7, [r1], #4
  196. subs r2, r2, #4
  197. orr r3, r3, r7, push #8
  198. str r3, [r0], #4
  199. bge 21b
  200. 22: sub r1, r1, #1
  201. b 6b
  202. 23: add r1, r1, r2
  203. add r0, r0, r2
  204. subs r2, r2, #4
  205. blt 29f
  206. PLD( pld [r1, #-4] )
  207. ands ip, r0, #3
  208. bne 30f
  209. ands ip, r1, #3
  210. bne 31f
  211. 24: subs r2, r2, #8
  212. blt 28f
  213. subs r2, r2, #20
  214. blt 27f
  215. PLD( pld [r1, #-32] )
  216. PLD( subs r2, r2, #64 )
  217. PLD( blt 26f )
  218. 25: PLD( pld [r1, #-64] )
  219. PLD( pld [r1, #-96] )
  220. ldmdb r1!, {r3 - r9, ip}
  221. subs r2, r2, #32
  222. stmgedb r0!, {r3 - r9, ip}
  223. ldmgedb r1!, {r3 - r9, ip}
  224. subges r2, r2, #32
  225. stmdb r0!, {r3 - r9, ip}
  226. bge 25b
  227. 26: PLD( ldmdb r1!, {r3 - r9, ip} )
  228. PLD( adds r2, r2, #32 )
  229. PLD( stmgedb r0!, {r3 - r9, ip} )
  230. PLD( ldmgedb r1!, {r3 - r9, ip} )
  231. PLD( subges r2, r2, #32 )
  232. PLD( stmdb r0!, {r3 - r9, ip} )
  233. 27: cmn r2, #16
  234. ldmgedb r1!, {r3 - r6}
  235. subge r2, r2, #16
  236. stmgedb r0!, {r3 - r6}
  237. adds r2, r2, #20
  238. ldmgedb r1!, {r3 - r5}
  239. subge r2, r2, #12
  240. stmgedb r0!, {r3 - r5}
  241. 28: adds r2, r2, #8
  242. blt 29f
  243. subs r2, r2, #4
  244. ldrlt r3, [r1, #-4]!
  245. ldmgedb r1!, {r4, r5}
  246. subge r2, r2, #4
  247. strlt r3, [r0, #-4]!
  248. stmgedb r0!, {r4, r5}
  249. 29: adds r2, r2, #4
  250. EXITEQ
  251. cmp r2, #2
  252. ldrb r3, [r1, #-1]!
  253. ldrgeb r4, [r1, #-1]!
  254. ldrgtb r5, [r1, #-1]!
  255. strb r3, [r0, #-1]!
  256. strgeb r4, [r0, #-1]!
  257. strgtb r5, [r0, #-1]!
  258. EXIT
  259. 30: cmp ip, #2
  260. ldrb r3, [r1, #-1]!
  261. ldrgeb r4, [r1, #-1]!
  262. ldrgtb r5, [r1, #-1]!
  263. strb r3, [r0, #-1]!
  264. strgeb r4, [r0, #-1]!
  265. strgtb r5, [r0, #-1]!
  266. subs r2, r2, ip
  267. blt 29b
  268. ands ip, r1, #3
  269. beq 24b
  270. 31: bic r1, r1, #3
  271. ldr r3, [r1], #0
  272. cmp ip, #2
  273. blt 41f
  274. beq 36f
  275. cmp r2, #12
  276. blt 34f
  277. PLD( pld [r1, #-16] )
  278. sub r2, r2, #12
  279. PLD( subs r2, r2, #32 )
  280. PLD( blt 33f )
  281. PLD( pld [r1, #-32] )
  282. 32: PLD( pld [r1, #-48] )
  283. 33: mov r7, r3, push #8
  284. ldmdb r1!, {r3, r4, r5, r6}
  285. subs r2, r2, #16
  286. orr r7, r7, r6, pull #24
  287. mov r6, r6, push #8
  288. orr r6, r6, r5, pull #24
  289. mov r5, r5, push #8
  290. orr r5, r5, r4, pull #24
  291. mov r4, r4, push #8
  292. orr r4, r4, r3, pull #24
  293. stmdb r0!, {r4, r5, r6, r7}
  294. bge 32b
  295. PLD( cmn r2, #32 )
  296. PLD( bge 33b )
  297. PLD( add r2, r2, #32 )
  298. adds r2, r2, #12
  299. blt 35f
  300. 34: mov ip, r3, push #8
  301. ldr r3, [r1, #-4]!
  302. subs r2, r2, #4
  303. orr ip, ip, r3, pull #24
  304. str ip, [r0, #-4]!
  305. bge 34b
  306. 35: add r1, r1, #3
  307. b 29b
  308. 36: cmp r2, #12
  309. blt 39f
  310. PLD( pld [r1, #-16] )
  311. sub r2, r2, #12
  312. PLD( subs r2, r2, #32 )
  313. PLD( blt 38f )
  314. PLD( pld [r1, #-32] )
  315. 37: PLD( pld [r1, #-48] )
  316. 38: mov r7, r3, push #16
  317. ldmdb r1!, {r3, r4, r5, r6}
  318. subs r2, r2, #16
  319. orr r7, r7, r6, pull #16
  320. mov r6, r6, push #16
  321. orr r6, r6, r5, pull #16
  322. mov r5, r5, push #16
  323. orr r5, r5, r4, pull #16
  324. mov r4, r4, push #16
  325. orr r4, r4, r3, pull #16
  326. stmdb r0!, {r4, r5, r6, r7}
  327. bge 37b
  328. PLD( cmn r2, #32 )
  329. PLD( bge 38b )
  330. PLD( add r2, r2, #32 )
  331. adds r2, r2, #12
  332. blt 40f
  333. 39: mov ip, r3, push #16
  334. ldr r3, [r1, #-4]!
  335. subs r2, r2, #4
  336. orr ip, ip, r3, pull #16
  337. str ip, [r0, #-4]!
  338. bge 39b
  339. 40: add r1, r1, #2
  340. b 29b
  341. 41: cmp r2, #12
  342. blt 44f
  343. PLD( pld [r1, #-16] )
  344. sub r2, r2, #12
  345. PLD( subs r2, r2, #32 )
  346. PLD( blt 43f )
  347. PLD( pld [r1, #-32] )
  348. 42: PLD( pld [r1, #-48] )
  349. 43: mov r7, r3, push #24
  350. ldmdb r1!, {r3, r4, r5, r6}
  351. subs r2, r2, #16
  352. orr r7, r7, r6, pull #8
  353. mov r6, r6, push #24
  354. orr r6, r6, r5, pull #8
  355. mov r5, r5, push #24
  356. orr r5, r5, r4, pull #8
  357. mov r4, r4, push #24
  358. orr r4, r4, r3, pull #8
  359. stmdb r0!, {r4, r5, r6, r7}
  360. bge 42b
  361. PLD( cmn r2, #32 )
  362. PLD( bge 43b )
  363. PLD( add r2, r2, #32 )
  364. adds r2, r2, #12
  365. blt 45f
  366. 44: mov ip, r3, push #24
  367. ldr r3, [r1, #-4]!
  368. subs r2, r2, #4
  369. orr ip, ip, r3, pull #8
  370. str ip, [r0, #-4]!
  371. bge 44b
  372. 45: add r1, r1, #1
  373. b 29b