csumpartialcopygeneric.S 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. /*
  2. * linux/arch/arm/lib/csumpartialcopygeneric.S
  3. *
  4. * Copyright (C) 1995-2001 Russell King
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. /*
  11. * unsigned int
  12. * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
  13. * r0 = src, r1 = dst, r2 = len, r3 = sum
  14. * Returns : r0 = checksum
  15. *
  16. * Note that 'tst' and 'teq' preserve the carry flag.
  17. */
  18. src .req r0
  19. dst .req r1
  20. len .req r2
  21. sum .req r3
  22. .Lzero: mov r0, sum
  23. load_regs ea
  24. /*
  25. * Align an unaligned destination pointer. We know that
  26. * we have >= 8 bytes here, so we don't need to check
  27. * the length. Note that the source pointer hasn't been
  28. * aligned yet.
  29. */
  30. .Ldst_unaligned:
  31. tst dst, #1
  32. beq .Ldst_16bit
  33. load1b ip
  34. sub len, len, #1
  35. adcs sum, sum, ip, put_byte_1 @ update checksum
  36. strb ip, [dst], #1
  37. tst dst, #2
  38. moveq pc, lr @ dst is now 32bit aligned
  39. .Ldst_16bit: load2b r8, ip
  40. sub len, len, #2
  41. adcs sum, sum, r8, put_byte_0
  42. strb r8, [dst], #1
  43. adcs sum, sum, ip, put_byte_1
  44. strb ip, [dst], #1
  45. mov pc, lr @ dst is now 32bit aligned
  46. /*
  47. * Handle 0 to 7 bytes, with any alignment of source and
  48. * destination pointers. Note that when we get here, C = 0
  49. */
  50. .Lless8: teq len, #0 @ check for zero count
  51. beq .Lzero
  52. /* we must have at least one byte. */
  53. tst dst, #1 @ dst 16-bit aligned
  54. beq .Lless8_aligned
  55. /* Align dst */
  56. load1b ip
  57. sub len, len, #1
  58. adcs sum, sum, ip, put_byte_1 @ update checksum
  59. strb ip, [dst], #1
  60. tst len, #6
  61. beq .Lless8_byteonly
  62. 1: load2b r8, ip
  63. sub len, len, #2
  64. adcs sum, sum, r8, put_byte_0
  65. strb r8, [dst], #1
  66. adcs sum, sum, ip, put_byte_1
  67. strb ip, [dst], #1
  68. .Lless8_aligned:
  69. tst len, #6
  70. bne 1b
  71. .Lless8_byteonly:
  72. tst len, #1
  73. beq .Ldone
  74. load1b r8
  75. adcs sum, sum, r8, put_byte_0 @ update checksum
  76. strb r8, [dst], #1
  77. b .Ldone
  78. FN_ENTRY
  79. mov ip, sp
  80. save_regs
  81. sub fp, ip, #4
  82. cmp len, #8 @ Ensure that we have at least
  83. blo .Lless8 @ 8 bytes to copy.
  84. adds sum, sum, #0 @ C = 0
  85. tst dst, #3 @ Test destination alignment
  86. blne .Ldst_unaligned @ align destination, return here
  87. /*
  88. * Ok, the dst pointer is now 32bit aligned, and we know
  89. * that we must have more than 4 bytes to copy. Note
  90. * that C contains the carry from the dst alignment above.
  91. */
  92. tst src, #3 @ Test source alignment
  93. bne .Lsrc_not_aligned
  94. /* Routine for src & dst aligned */
  95. bics ip, len, #15
  96. beq 2f
  97. 1: load4l r4, r5, r6, r7
  98. stmia dst!, {r4, r5, r6, r7}
  99. adcs sum, sum, r4
  100. adcs sum, sum, r5
  101. adcs sum, sum, r6
  102. adcs sum, sum, r7
  103. sub ip, ip, #16
  104. teq ip, #0
  105. bne 1b
  106. 2: ands ip, len, #12
  107. beq 4f
  108. tst ip, #8
  109. beq 3f
  110. load2l r4, r5
  111. stmia dst!, {r4, r5}
  112. adcs sum, sum, r4
  113. adcs sum, sum, r5
  114. tst ip, #4
  115. beq 4f
  116. 3: load1l r4
  117. str r4, [dst], #4
  118. adcs sum, sum, r4
  119. 4: ands len, len, #3
  120. beq .Ldone
  121. load1l r4
  122. tst len, #2
  123. mov r5, r4, get_byte_0
  124. beq .Lexit
  125. adcs sum, sum, r4, push #16
  126. strb r5, [dst], #1
  127. mov r5, r4, get_byte_1
  128. strb r5, [dst], #1
  129. mov r5, r4, get_byte_2
  130. .Lexit: tst len, #1
  131. strneb r5, [dst], #1
  132. andne r5, r5, #255
  133. adcnes sum, sum, r5, put_byte_0
  134. /*
  135. * If the dst pointer was not 16-bit aligned, we
  136. * need to rotate the checksum here to get around
  137. * the inefficient byte manipulations in the
  138. * architecture independent code.
  139. */
  140. .Ldone: adc r0, sum, #0
  141. ldr sum, [sp, #0] @ dst
  142. tst sum, #1
  143. movne r0, r0, ror #8
  144. load_regs ea
  145. .Lsrc_not_aligned:
  146. adc sum, sum, #0 @ include C from dst alignment
  147. and ip, src, #3
  148. bic src, src, #3
  149. load1l r5
  150. cmp ip, #2
  151. beq .Lsrc2_aligned
  152. bhi .Lsrc3_aligned
  153. mov r4, r5, pull #8 @ C = 0
  154. bics ip, len, #15
  155. beq 2f
  156. 1: load4l r5, r6, r7, r8
  157. orr r4, r4, r5, push #24
  158. mov r5, r5, pull #8
  159. orr r5, r5, r6, push #24
  160. mov r6, r6, pull #8
  161. orr r6, r6, r7, push #24
  162. mov r7, r7, pull #8
  163. orr r7, r7, r8, push #24
  164. stmia dst!, {r4, r5, r6, r7}
  165. adcs sum, sum, r4
  166. adcs sum, sum, r5
  167. adcs sum, sum, r6
  168. adcs sum, sum, r7
  169. mov r4, r8, pull #8
  170. sub ip, ip, #16
  171. teq ip, #0
  172. bne 1b
  173. 2: ands ip, len, #12
  174. beq 4f
  175. tst ip, #8
  176. beq 3f
  177. load2l r5, r6
  178. orr r4, r4, r5, push #24
  179. mov r5, r5, pull #8
  180. orr r5, r5, r6, push #24
  181. stmia dst!, {r4, r5}
  182. adcs sum, sum, r4
  183. adcs sum, sum, r5
  184. mov r4, r6, pull #8
  185. tst ip, #4
  186. beq 4f
  187. 3: load1l r5
  188. orr r4, r4, r5, push #24
  189. str r4, [dst], #4
  190. adcs sum, sum, r4
  191. mov r4, r5, pull #8
  192. 4: ands len, len, #3
  193. beq .Ldone
  194. mov r5, r4, get_byte_0
  195. tst len, #2
  196. beq .Lexit
  197. adcs sum, sum, r4, push #16
  198. strb r5, [dst], #1
  199. mov r5, r4, get_byte_1
  200. strb r5, [dst], #1
  201. mov r5, r4, get_byte_2
  202. b .Lexit
  203. .Lsrc2_aligned: mov r4, r5, pull #16
  204. adds sum, sum, #0
  205. bics ip, len, #15
  206. beq 2f
  207. 1: load4l r5, r6, r7, r8
  208. orr r4, r4, r5, push #16
  209. mov r5, r5, pull #16
  210. orr r5, r5, r6, push #16
  211. mov r6, r6, pull #16
  212. orr r6, r6, r7, push #16
  213. mov r7, r7, pull #16
  214. orr r7, r7, r8, push #16
  215. stmia dst!, {r4, r5, r6, r7}
  216. adcs sum, sum, r4
  217. adcs sum, sum, r5
  218. adcs sum, sum, r6
  219. adcs sum, sum, r7
  220. mov r4, r8, pull #16
  221. sub ip, ip, #16
  222. teq ip, #0
  223. bne 1b
  224. 2: ands ip, len, #12
  225. beq 4f
  226. tst ip, #8
  227. beq 3f
  228. load2l r5, r6
  229. orr r4, r4, r5, push #16
  230. mov r5, r5, pull #16
  231. orr r5, r5, r6, push #16
  232. stmia dst!, {r4, r5}
  233. adcs sum, sum, r4
  234. adcs sum, sum, r5
  235. mov r4, r6, pull #16
  236. tst ip, #4
  237. beq 4f
  238. 3: load1l r5
  239. orr r4, r4, r5, push #16
  240. str r4, [dst], #4
  241. adcs sum, sum, r4
  242. mov r4, r5, pull #16
  243. 4: ands len, len, #3
  244. beq .Ldone
  245. mov r5, r4, get_byte_0
  246. tst len, #2
  247. beq .Lexit
  248. adcs sum, sum, r4
  249. strb r5, [dst], #1
  250. mov r5, r4, get_byte_1
  251. strb r5, [dst], #1
  252. tst len, #1
  253. beq .Ldone
  254. load1b r5
  255. b .Lexit
  256. .Lsrc3_aligned: mov r4, r5, pull #24
  257. adds sum, sum, #0
  258. bics ip, len, #15
  259. beq 2f
  260. 1: load4l r5, r6, r7, r8
  261. orr r4, r4, r5, push #8
  262. mov r5, r5, pull #24
  263. orr r5, r5, r6, push #8
  264. mov r6, r6, pull #24
  265. orr r6, r6, r7, push #8
  266. mov r7, r7, pull #24
  267. orr r7, r7, r8, push #8
  268. stmia dst!, {r4, r5, r6, r7}
  269. adcs sum, sum, r4
  270. adcs sum, sum, r5
  271. adcs sum, sum, r6
  272. adcs sum, sum, r7
  273. mov r4, r8, pull #24
  274. sub ip, ip, #16
  275. teq ip, #0
  276. bne 1b
  277. 2: ands ip, len, #12
  278. beq 4f
  279. tst ip, #8
  280. beq 3f
  281. load2l r5, r6
  282. orr r4, r4, r5, push #8
  283. mov r5, r5, pull #24
  284. orr r5, r5, r6, push #8
  285. stmia dst!, {r4, r5}
  286. adcs sum, sum, r4
  287. adcs sum, sum, r5
  288. mov r4, r6, pull #24
  289. tst ip, #4
  290. beq 4f
  291. 3: load1l r5
  292. orr r4, r4, r5, push #8
  293. str r4, [dst], #4
  294. adcs sum, sum, r4
  295. mov r4, r5, pull #24
  296. 4: ands len, len, #3
  297. beq .Ldone
  298. mov r5, r4, get_byte_0
  299. tst len, #2
  300. beq .Lexit
  301. strb r5, [dst], #1
  302. adcs sum, sum, r4
  303. load1l r4
  304. mov r5, r4, get_byte_0
  305. strb r5, [dst], #1
  306. adcs sum, sum, r4, push #24
  307. mov r5, r4, get_byte_1
  308. b .Lexit