checksum.S 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /*
  2. * INET An implementation of the TCP/IP protocol suite for the LINUX
  3. * operating system. INET is implemented using the BSD Socket
  4. * interface as the means of communication with the user level.
  5. *
  6. * IP/TCP/UDP checksumming routines
  7. *
  8. * Authors: Jorge Cwik, <jorge@laser.satlink.net>
  9. * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  10. * Tom May, <ftom@netcom.com>
  11. * Pentium Pro/II routines:
  12. * Alexander Kjeldaas <astor@guardian.no>
  13. * Finn Arne Gangstad <finnag@guardian.no>
  14. * Lots of code moved from tcp.c and ip.c; see those files
  15. * for more names.
  16. *
  17. * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  18. * handling.
  19. * Andi Kleen, add zeroing on error
  20. * converted to pure assembler
  21. * Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
  22. *
  23. * This program is free software; you can redistribute it and/or
  24. * modify it under the terms of the GNU General Public License
  25. * as published by the Free Software Foundation; either version
  26. * 2 of the License, or (at your option) any later version.
  27. */
  28. /* $Id$ */
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/assembler.h>
  32. #include <asm/errno.h>
  33. /*
  34. * computes a partial checksum, e.g. for TCP/UDP fragments
  35. */
  36. /*
  37. unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
  38. */
  39. #ifdef CONFIG_ISA_DUAL_ISSUE
  40. /*
  41. * Experiments with Ethernet and SLIP connections show that buff
  42. * is aligned on either a 2-byte or 4-byte boundary. We get at
  43. * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  44. * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  45. * alignment for the unrolled loop.
  46. */
  47. .text
  48. ENTRY(csum_partial)
  49. ; Function args
  50. ; r0: unsigned char *buff
  51. ; r1: int len
  52. ; r2: unsigned int sum
  53. push r2 || ldi r2, #0
  54. and3 r7, r0, #1 ; Check alignment.
  55. beqz r7, 1f ; Jump if alignment is ok.
  56. ; 1-byte mis aligned
  57. ldub r4, @r0 || addi r0, #1
  58. ; clear c-bit || Alignment uses up bytes.
  59. cmp r0, r0 || addi r1, #-1
  60. ldi r3, #0 || addx r2, r4
  61. addx r2, r3
  62. .fillinsn
  63. 1:
  64. and3 r4, r0, #2 ; Check alignment.
  65. beqz r4, 2f ; Jump if alignment is ok.
  66. ; clear c-bit || Alignment uses up two bytes.
  67. cmp r0, r0 || addi r1, #-2
  68. bgtz r1, 1f ; Jump if we had at least two bytes.
  69. bra 4f || addi r1, #2
  70. .fillinsn ; len(r1) was < 2. Deal with it.
  71. 1:
  72. ; 2-byte aligned
  73. lduh r4, @r0 || ldi r3, #0
  74. addx r2, r4 || addi r0, #2
  75. addx r2, r3
  76. .fillinsn
  77. 2:
  78. ; 4-byte aligned
  79. cmp r0, r0 ; clear c-bit
  80. srl3 r6, r1, #5
  81. beqz r6, 2f
  82. .fillinsn
  83. 1: ld r3, @r0+
  84. ld r4, @r0+ ; +4
  85. ld r5, @r0+ ; +8
  86. ld r3, @r0+ || addx r2, r3 ; +12
  87. ld r4, @r0+ || addx r2, r4 ; +16
  88. ld r5, @r0+ || addx r2, r5 ; +20
  89. ld r3, @r0+ || addx r2, r3 ; +24
  90. ld r4, @r0+ || addx r2, r4 ; +28
  91. addx r2, r5 || addi r6, #-1
  92. addx r2, r3
  93. addx r2, r4
  94. bnez r6, 1b
  95. addx r2, r6 ; r6=0
  96. cmp r0, r0 ; This clears c-bit
  97. .fillinsn
  98. 2: and3 r6, r1, #0x1c ; withdraw len
  99. beqz r6, 4f
  100. srli r6, #2
  101. .fillinsn
  102. 3: ld r4, @r0+ || addi r6, #-1
  103. addx r2, r4
  104. bnez r6, 3b
  105. addx r2, r6 ; r6=0
  106. cmp r0, r0 ; This clears c-bit
  107. .fillinsn
  108. 4: and3 r1, r1, #3
  109. beqz r1, 7f ; if len == 0 goto end
  110. and3 r6, r1, #2
  111. beqz r6, 5f ; if len < 2 goto 5f(1byte)
  112. lduh r4, @r0 || addi r0, #2
  113. addi r1, #-2 || slli r4, #16
  114. addx r2, r4
  115. beqz r1, 6f
  116. .fillinsn
  117. 5: ldub r4, @r0 || ldi r1, #0
  118. #ifndef __LITTLE_ENDIAN__
  119. slli r4, #8
  120. #endif
  121. addx r2, r4
  122. .fillinsn
  123. 6: addx r2, r1
  124. .fillinsn
  125. 7:
  126. and3 r0, r2, #0xffff
  127. srli r2, #16
  128. add r0, r2
  129. srl3 r2, r0, #16
  130. beqz r2, 1f
  131. addi r0, #1
  132. and3 r0, r0, #0xffff
  133. .fillinsn
  134. 1:
  135. beqz r7, 1f ; swap the upper byte for the lower
  136. and3 r2, r0, #0xff
  137. srl3 r0, r0, #8
  138. slli r2, #8
  139. or r0, r2
  140. .fillinsn
  141. 1:
  142. pop r2 || cmp r0, r0
  143. addx r0, r2 || ldi r2, #0
  144. addx r0, r2
  145. jmp r14
  146. #else /* not CONFIG_ISA_DUAL_ISSUE */
  147. /*
  148. * Experiments with Ethernet and SLIP connections show that buff
  149. * is aligned on either a 2-byte or 4-byte boundary. We get at
  150. * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  151. * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  152. * alignment for the unrolled loop.
  153. */
  154. .text
  155. ENTRY(csum_partial)
  156. ; Function args
  157. ; r0: unsigned char *buff
  158. ; r1: int len
  159. ; r2: unsigned int sum
  160. push r2
  161. ldi r2, #0
  162. and3 r7, r0, #1 ; Check alignment.
  163. beqz r7, 1f ; Jump if alignment is ok.
  164. ; 1-byte mis aligned
  165. ldub r4, @r0
  166. addi r0, #1
  167. addi r1, #-1 ; Alignment uses up bytes.
  168. cmp r0, r0 ; clear c-bit
  169. ldi r3, #0
  170. addx r2, r4
  171. addx r2, r3
  172. .fillinsn
  173. 1:
  174. and3 r4, r0, #2 ; Check alignment.
  175. beqz r4, 2f ; Jump if alignment is ok.
  176. addi r1, #-2 ; Alignment uses up two bytes.
  177. cmp r0, r0 ; clear c-bit
  178. bgtz r1, 1f ; Jump if we had at least two bytes.
  179. addi r1, #2 ; len(r1) was < 2. Deal with it.
  180. bra 4f
  181. .fillinsn
  182. 1:
  183. ; 2-byte aligned
  184. lduh r4, @r0
  185. addi r0, #2
  186. ldi r3, #0
  187. addx r2, r4
  188. addx r2, r3
  189. .fillinsn
  190. 2:
  191. ; 4-byte aligned
  192. cmp r0, r0 ; clear c-bit
  193. srl3 r6, r1, #5
  194. beqz r6, 2f
  195. .fillinsn
  196. 1: ld r3, @r0+
  197. ld r4, @r0+ ; +4
  198. ld r5, @r0+ ; +8
  199. addx r2, r3
  200. addx r2, r4
  201. addx r2, r5
  202. ld r3, @r0+ ; +12
  203. ld r4, @r0+ ; +16
  204. ld r5, @r0+ ; +20
  205. addx r2, r3
  206. addx r2, r4
  207. addx r2, r5
  208. ld r3, @r0+ ; +24
  209. ld r4, @r0+ ; +28
  210. addi r6, #-1
  211. addx r2, r3
  212. addx r2, r4
  213. bnez r6, 1b
  214. addx r2, r6 ; r6=0
  215. cmp r0, r0 ; This clears c-bit
  216. .fillinsn
  217. 2: and3 r6, r1, #0x1c ; withdraw len
  218. beqz r6, 4f
  219. srli r6, #2
  220. .fillinsn
  221. 3: ld r4, @r0+
  222. addi r6, #-1
  223. addx r2, r4
  224. bnez r6, 3b
  225. addx r2, r6 ; r6=0
  226. cmp r0, r0 ; This clears c-bit
  227. .fillinsn
  228. 4: and3 r1, r1, #3
  229. beqz r1, 7f ; if len == 0 goto end
  230. and3 r6, r1, #2
  231. beqz r6, 5f ; if len < 2 goto 5f(1byte)
  232. lduh r4, @r0
  233. addi r0, #2
  234. addi r1, #-2
  235. slli r4, #16
  236. addx r2, r4
  237. beqz r1, 6f
  238. .fillinsn
  239. 5: ldub r4, @r0
  240. #ifndef __LITTLE_ENDIAN__
  241. slli r4, #8
  242. #endif
  243. addx r2, r4
  244. .fillinsn
  245. 6: ldi r5, #0
  246. addx r2, r5
  247. .fillinsn
  248. 7:
  249. and3 r0, r2, #0xffff
  250. srli r2, #16
  251. add r0, r2
  252. srl3 r2, r0, #16
  253. beqz r2, 1f
  254. addi r0, #1
  255. and3 r0, r0, #0xffff
  256. .fillinsn
  257. 1:
  258. beqz r7, 1f
  259. mv r2, r0
  260. srl3 r0, r2, #8
  261. and3 r2, r2, #0xff
  262. slli r2, #8
  263. or r0, r2
  264. .fillinsn
  265. 1:
  266. pop r2
  267. cmp r0, r0
  268. addx r0, r2
  269. ldi r2, #0
  270. addx r0, r2
  271. jmp r14
  272. #endif /* not CONFIG_ISA_DUAL_ISSUE */
  273. /*
  274. unsigned int csum_partial_copy_generic (const char *src, char *dst,
  275. int len, int sum, int *src_err_ptr, int *dst_err_ptr)
  276. */
  277. /*
  278. * Copy from ds while checksumming, otherwise like csum_partial
  279. *
  280. * The macros SRC and DST specify the type of access for the instruction.
  281. * thus we can call a custom exception handler for all access types.
  282. *
  283. * FIXME: could someone double-check whether I haven't mixed up some SRC and
  284. * DST definitions? It's damn hard to trigger all cases. I hope I got
  285. * them all but there's no guarantee.
  286. */
  287. ENTRY(csum_partial_copy_generic)
  288. nop
  289. nop
  290. nop
  291. nop
  292. jmp r14
  293. nop
  294. nop
  295. nop