checksum.S 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. /*
  2. * INET An implementation of the TCP/IP protocol suite for the LINUX
  3. * operating system. INET is implemented using the BSD Socket
  4. * interface as the means of communication with the user level.
  5. *
  6. * IP/TCP/UDP checksumming routines
  7. *
  8. * Authors: Jorge Cwik, <jorge@laser.satlink.net>
  9. * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  10. * Tom May, <ftom@netcom.com>
  11. * Pentium Pro/II routines:
  12. * Alexander Kjeldaas <astor@guardian.no>
  13. * Finn Arne Gangstad <finnag@guardian.no>
  14. * Lots of code moved from tcp.c and ip.c; see those files
  15. * for more names.
  16. *
  17. * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  18. * handling.
  19. * Andi Kleen, add zeroing on error
  20. * converted to pure assembler
  21. * Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
  22. *
  23. * This program is free software; you can redistribute it and/or
  24. * modify it under the terms of the GNU General Public License
  25. * as published by the Free Software Foundation; either version
  26. * 2 of the License, or (at your option) any later version.
  27. */
  28. /* $Id$ */
  29. #include <linux/linkage.h>
  30. #include <asm/assembler.h>
  31. #include <asm/errno.h>
  32. /*
  33. * computes a partial checksum, e.g. for TCP/UDP fragments
  34. */
  35. /*
  36. unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
  37. */
  38. #ifdef CONFIG_ISA_DUAL_ISSUE
  39. /*
  40. * Experiments with Ethernet and SLIP connections show that buff
  41. * is aligned on either a 2-byte or 4-byte boundary. We get at
  42. * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  43. * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  44. * alignment for the unrolled loop.
  45. */
  46. .text
  47. ENTRY(csum_partial)
  48. ; Function args
  49. ; r0: unsigned char *buff
  50. ; r1: int len
  51. ; r2: unsigned int sum
  52. push r2 || ldi r2, #0
  53. and3 r7, r0, #1 ; Check alignment.
  54. beqz r7, 1f ; Jump if alignment is ok.
  55. ; 1-byte mis aligned
  56. ldub r4, @r0 || addi r0, #1
  57. ; clear c-bit || Alignment uses up bytes.
  58. cmp r0, r0 || addi r1, #-1
  59. ldi r3, #0 || addx r2, r4
  60. addx r2, r3
  61. .fillinsn
  62. 1:
  63. and3 r4, r0, #2 ; Check alignment.
  64. beqz r4, 2f ; Jump if alignment is ok.
  65. ; clear c-bit || Alignment uses up two bytes.
  66. cmp r0, r0 || addi r1, #-2
  67. bgtz r1, 1f ; Jump if we had at least two bytes.
  68. bra 4f || addi r1, #2
  69. .fillinsn ; len(r1) was < 2. Deal with it.
  70. 1:
  71. ; 2-byte aligned
  72. lduh r4, @r0 || ldi r3, #0
  73. addx r2, r4 || addi r0, #2
  74. addx r2, r3
  75. .fillinsn
  76. 2:
  77. ; 4-byte aligned
  78. cmp r0, r0 ; clear c-bit
  79. srl3 r6, r1, #5
  80. beqz r6, 2f
  81. .fillinsn
  82. 1: ld r3, @r0+
  83. ld r4, @r0+ ; +4
  84. ld r5, @r0+ ; +8
  85. ld r3, @r0+ || addx r2, r3 ; +12
  86. ld r4, @r0+ || addx r2, r4 ; +16
  87. ld r5, @r0+ || addx r2, r5 ; +20
  88. ld r3, @r0+ || addx r2, r3 ; +24
  89. ld r4, @r0+ || addx r2, r4 ; +28
  90. addx r2, r5 || addi r6, #-1
  91. addx r2, r3
  92. addx r2, r4
  93. bnez r6, 1b
  94. addx r2, r6 ; r6=0
  95. cmp r0, r0 ; This clears c-bit
  96. .fillinsn
  97. 2: and3 r6, r1, #0x1c ; withdraw len
  98. beqz r6, 4f
  99. srli r6, #2
  100. .fillinsn
  101. 3: ld r4, @r0+ || addi r6, #-1
  102. addx r2, r4
  103. bnez r6, 3b
  104. addx r2, r6 ; r6=0
  105. cmp r0, r0 ; This clears c-bit
  106. .fillinsn
  107. 4: and3 r1, r1, #3
  108. beqz r1, 7f ; if len == 0 goto end
  109. and3 r6, r1, #2
  110. beqz r6, 5f ; if len < 2 goto 5f(1byte)
  111. lduh r4, @r0 || addi r0, #2
  112. addi r1, #-2 || slli r4, #16
  113. addx r2, r4
  114. beqz r1, 6f
  115. .fillinsn
  116. 5: ldub r4, @r0 || ldi r1, #0
  117. #ifndef __LITTLE_ENDIAN__
  118. slli r4, #8
  119. #endif
  120. addx r2, r4
  121. .fillinsn
  122. 6: addx r2, r1
  123. .fillinsn
  124. 7:
  125. and3 r0, r2, #0xffff
  126. srli r2, #16
  127. add r0, r2
  128. srl3 r2, r0, #16
  129. beqz r2, 1f
  130. addi r0, #1
  131. and3 r0, r0, #0xffff
  132. .fillinsn
  133. 1:
  134. beqz r7, 1f ; swap the upper byte for the lower
  135. and3 r2, r0, #0xff
  136. srl3 r0, r0, #8
  137. slli r2, #8
  138. or r0, r2
  139. .fillinsn
  140. 1:
  141. pop r2 || cmp r0, r0
  142. addx r0, r2 || ldi r2, #0
  143. addx r0, r2
  144. jmp r14
  145. #else /* not CONFIG_ISA_DUAL_ISSUE */
  146. /*
  147. * Experiments with Ethernet and SLIP connections show that buff
  148. * is aligned on either a 2-byte or 4-byte boundary. We get at
  149. * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  150. * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  151. * alignment for the unrolled loop.
  152. */
  153. .text
  154. ENTRY(csum_partial)
  155. ; Function args
  156. ; r0: unsigned char *buff
  157. ; r1: int len
  158. ; r2: unsigned int sum
  159. push r2
  160. ldi r2, #0
  161. and3 r7, r0, #1 ; Check alignment.
  162. beqz r7, 1f ; Jump if alignment is ok.
  163. ; 1-byte mis aligned
  164. ldub r4, @r0
  165. addi r0, #1
  166. addi r1, #-1 ; Alignment uses up bytes.
  167. cmp r0, r0 ; clear c-bit
  168. ldi r3, #0
  169. addx r2, r4
  170. addx r2, r3
  171. .fillinsn
  172. 1:
  173. and3 r4, r0, #2 ; Check alignment.
  174. beqz r4, 2f ; Jump if alignment is ok.
  175. addi r1, #-2 ; Alignment uses up two bytes.
  176. cmp r0, r0 ; clear c-bit
  177. bgtz r1, 1f ; Jump if we had at least two bytes.
  178. addi r1, #2 ; len(r1) was < 2. Deal with it.
  179. bra 4f
  180. .fillinsn
  181. 1:
  182. ; 2-byte aligned
  183. lduh r4, @r0
  184. addi r0, #2
  185. ldi r3, #0
  186. addx r2, r4
  187. addx r2, r3
  188. .fillinsn
  189. 2:
  190. ; 4-byte aligned
  191. cmp r0, r0 ; clear c-bit
  192. srl3 r6, r1, #5
  193. beqz r6, 2f
  194. .fillinsn
  195. 1: ld r3, @r0+
  196. ld r4, @r0+ ; +4
  197. ld r5, @r0+ ; +8
  198. addx r2, r3
  199. addx r2, r4
  200. addx r2, r5
  201. ld r3, @r0+ ; +12
  202. ld r4, @r0+ ; +16
  203. ld r5, @r0+ ; +20
  204. addx r2, r3
  205. addx r2, r4
  206. addx r2, r5
  207. ld r3, @r0+ ; +24
  208. ld r4, @r0+ ; +28
  209. addi r6, #-1
  210. addx r2, r3
  211. addx r2, r4
  212. bnez r6, 1b
  213. addx r2, r6 ; r6=0
  214. cmp r0, r0 ; This clears c-bit
  215. .fillinsn
  216. 2: and3 r6, r1, #0x1c ; withdraw len
  217. beqz r6, 4f
  218. srli r6, #2
  219. .fillinsn
  220. 3: ld r4, @r0+
  221. addi r6, #-1
  222. addx r2, r4
  223. bnez r6, 3b
  224. addx r2, r6 ; r6=0
  225. cmp r0, r0 ; This clears c-bit
  226. .fillinsn
  227. 4: and3 r1, r1, #3
  228. beqz r1, 7f ; if len == 0 goto end
  229. and3 r6, r1, #2
  230. beqz r6, 5f ; if len < 2 goto 5f(1byte)
  231. lduh r4, @r0
  232. addi r0, #2
  233. addi r1, #-2
  234. slli r4, #16
  235. addx r2, r4
  236. beqz r1, 6f
  237. .fillinsn
  238. 5: ldub r4, @r0
  239. #ifndef __LITTLE_ENDIAN__
  240. slli r4, #8
  241. #endif
  242. addx r2, r4
  243. .fillinsn
  244. 6: ldi r5, #0
  245. addx r2, r5
  246. .fillinsn
  247. 7:
  248. and3 r0, r2, #0xffff
  249. srli r2, #16
  250. add r0, r2
  251. srl3 r2, r0, #16
  252. beqz r2, 1f
  253. addi r0, #1
  254. and3 r0, r0, #0xffff
  255. .fillinsn
  256. 1:
  257. beqz r7, 1f
  258. mv r2, r0
  259. srl3 r0, r2, #8
  260. and3 r2, r2, #0xff
  261. slli r2, #8
  262. or r0, r2
  263. .fillinsn
  264. 1:
  265. pop r2
  266. cmp r0, r0
  267. addx r0, r2
  268. ldi r2, #0
  269. addx r0, r2
  270. jmp r14
  271. #endif /* not CONFIG_ISA_DUAL_ISSUE */
  272. /*
  273. unsigned int csum_partial_copy_generic (const char *src, char *dst,
  274. int len, int sum, int *src_err_ptr, int *dst_err_ptr)
  275. */
  276. /*
  277. * Copy from ds while checksumming, otherwise like csum_partial
  278. *
  279. * The macros SRC and DST specify the type of access for the instruction.
  280. * thus we can call a custom exception handler for all access types.
  281. *
  282. * FIXME: could someone double-check whether I haven't mixed up some SRC and
  283. * DST definitions? It's damn hard to trigger all cases. I hope I got
  284. * them all but there's no guarantee.
  285. */
  286. ENTRY(csum_partial_copy_generic)
  287. nop
  288. nop
  289. nop
  290. nop
  291. jmp r14
  292. nop
  293. nop
  294. nop