checksum.S 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. /* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
  2. *
  3. * INET An implementation of the TCP/IP protocol suite for the LINUX
  4. * operating system. INET is implemented using the BSD Socket
  5. * interface as the means of communication with the user level.
  6. *
  7. * IP/TCP/UDP checksumming routines
  8. *
  9. * Authors: Jorge Cwik, <jorge@laser.satlink.net>
  10. * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  11. * Tom May, <ftom@netcom.com>
  12. * Pentium Pro/II routines:
  13. * Alexander Kjeldaas <astor@guardian.no>
  14. * Finn Arne Gangstad <finnag@guardian.no>
  15. * Lots of code moved from tcp.c and ip.c; see those files
  16. * for more names.
  17. *
  18. * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  19. * handling.
  20. * Andi Kleen, add zeroing on error
  21. * converted to pure assembler
  22. *
  23. * SuperH version: Copyright (C) 1999 Niibe Yutaka
  24. *
  25. * This program is free software; you can redistribute it and/or
  26. * modify it under the terms of the GNU General Public License
  27. * as published by the Free Software Foundation; either version
  28. * 2 of the License, or (at your option) any later version.
  29. */
  30. #include <asm/errno.h>
  31. #include <linux/linkage.h>
  32. /*
  33. * computes a partial checksum, e.g. for TCP/UDP fragments
  34. */
  35. /*
  36. * unsigned int csum_partial(const unsigned char *buf, int len,
  37. * unsigned int sum);
  38. */
  39. .text
  40. ENTRY(csum_partial)
  41. /*
  42. * Experiments with Ethernet and SLIP connections show that buff
  43. * is aligned on either a 2-byte or 4-byte boundary. We get at
  44. * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  45. * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  46. * alignment for the unrolled loop.
  47. */
  48. mov r5, r1
  49. mov r4, r0
  50. tst #2, r0 ! Check alignment.
  51. bt 2f ! Jump if alignment is ok.
  52. !
  53. add #-2, r5 ! Alignment uses up two bytes.
  54. cmp/pz r5 !
  55. bt/s 1f ! Jump if we had at least two bytes.
  56. clrt
  57. bra 6f
  58. add #2, r5 ! r5 was < 2. Deal with it.
  59. 1:
  60. mov r5, r1 ! Save new len for later use.
  61. mov.w @r4+, r0
  62. extu.w r0, r0
  63. addc r0, r6
  64. bf 2f
  65. add #1, r6
  66. 2:
  67. mov #-5, r0
  68. shld r0, r5
  69. tst r5, r5
  70. bt/s 4f ! if it's =0, go to 4f
  71. clrt
  72. .align 2
  73. 3:
  74. mov.l @r4+, r0
  75. mov.l @r4+, r2
  76. mov.l @r4+, r3
  77. addc r0, r6
  78. mov.l @r4+, r0
  79. addc r2, r6
  80. mov.l @r4+, r2
  81. addc r3, r6
  82. mov.l @r4+, r3
  83. addc r0, r6
  84. mov.l @r4+, r0
  85. addc r2, r6
  86. mov.l @r4+, r2
  87. addc r3, r6
  88. addc r0, r6
  89. addc r2, r6
  90. movt r0
  91. dt r5
  92. bf/s 3b
  93. cmp/eq #1, r0
  94. ! here, we know r5==0
  95. addc r5, r6 ! add carry to r6
  96. 4:
  97. mov r1, r0
  98. and #0x1c, r0
  99. tst r0, r0
  100. bt/s 6f
  101. mov r0, r5
  102. shlr2 r5
  103. mov #0, r2
  104. 5:
  105. addc r2, r6
  106. mov.l @r4+, r2
  107. movt r0
  108. dt r5
  109. bf/s 5b
  110. cmp/eq #1, r0
  111. addc r2, r6
  112. addc r5, r6 ! r5==0 here, so it means add carry-bit
  113. 6:
  114. mov r1, r5
  115. mov #3, r0
  116. and r0, r5
  117. tst r5, r5
  118. bt 9f ! if it's =0 go to 9f
  119. mov #2, r1
  120. cmp/hs r1, r5
  121. bf 7f
  122. mov.w @r4+, r0
  123. extu.w r0, r0
  124. cmp/eq r1, r5
  125. bt/s 8f
  126. clrt
  127. shll16 r0
  128. addc r0, r6
  129. 7:
  130. mov.b @r4+, r0
  131. extu.b r0, r0
  132. #ifndef __LITTLE_ENDIAN__
  133. shll8 r0
  134. #endif
  135. 8:
  136. addc r0, r6
  137. mov #0, r0
  138. addc r0, r6
  139. 9:
  140. rts
  141. mov r6, r0
  142. /*
  143. unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
  144. int sum, int *src_err_ptr, int *dst_err_ptr)
  145. */
  146. /*
  147. * Copy from ds while checksumming, otherwise like csum_partial
  148. *
  149. * The macros SRC and DST specify the type of access for the instruction.
  150. * thus we can call a custom exception handler for all access types.
  151. *
  152. * FIXME: could someone double-check whether I haven't mixed up some SRC and
  153. * DST definitions? It's damn hard to trigger all cases. I hope I got
  154. * them all but there's no guarantee.
  155. */
  156. #define SRC(...) \
  157. 9999: __VA_ARGS__ ; \
  158. .section __ex_table, "a"; \
  159. .long 9999b, 6001f ; \
  160. .previous
  161. #define DST(...) \
  162. 9999: __VA_ARGS__ ; \
  163. .section __ex_table, "a"; \
  164. .long 9999b, 6002f ; \
  165. .previous
  166. !
  167. ! r4: const char *SRC
  168. ! r5: char *DST
  169. ! r6: int LEN
  170. ! r7: int SUM
  171. !
  172. ! on stack:
  173. ! int *SRC_ERR_PTR
  174. ! int *DST_ERR_PTR
  175. !
  176. ENTRY(csum_partial_copy_generic)
  177. mov.l r5,@-r15
  178. mov.l r6,@-r15
  179. mov #3,r0 ! Check src and dest are equally aligned
  180. mov r4,r1
  181. and r0,r1
  182. and r5,r0
  183. cmp/eq r1,r0
  184. bf 3f ! Different alignments, use slow version
  185. tst #1,r0 ! Check dest word aligned
  186. bf 3f ! If not, do it the slow way
  187. mov #2,r0
  188. tst r0,r5 ! Check dest alignment.
  189. bt 2f ! Jump if alignment is ok.
  190. add #-2,r6 ! Alignment uses up two bytes.
  191. cmp/pz r6 ! Jump if we had at least two bytes.
  192. bt/s 1f
  193. clrt
  194. bra 4f
  195. add #2,r6 ! r6 was < 2. Deal with it.
  196. 3: ! Handle different src and dest alignments.
  197. ! This is not common, so simple byte by byte copy will do.
  198. mov r6,r2
  199. shlr r6
  200. tst r6,r6
  201. bt 4f
  202. clrt
  203. .align 2
  204. 5:
  205. SRC( mov.b @r4+,r1 )
  206. SRC( mov.b @r4+,r0 )
  207. extu.b r1,r1
  208. DST( mov.b r1,@r5 )
  209. DST( mov.b r0,@(1,r5) )
  210. extu.b r0,r0
  211. add #2,r5
  212. #ifdef __LITTLE_ENDIAN__
  213. shll8 r0
  214. #else
  215. shll8 r1
  216. #endif
  217. or r1,r0
  218. addc r0,r7
  219. movt r0
  220. dt r6
  221. bf/s 5b
  222. cmp/eq #1,r0
  223. mov #0,r0
  224. addc r0, r7
  225. mov r2, r0
  226. tst #1, r0
  227. bt 7f
  228. bra 5f
  229. clrt
  230. ! src and dest equally aligned, but to a two byte boundary.
  231. ! Handle first two bytes as a special case
  232. .align 2
  233. 1:
  234. SRC( mov.w @r4+,r0 )
  235. DST( mov.w r0,@r5 )
  236. add #2,r5
  237. extu.w r0,r0
  238. addc r0,r7
  239. mov #0,r0
  240. addc r0,r7
  241. 2:
  242. mov r6,r2
  243. mov #-5,r0
  244. shld r0,r6
  245. tst r6,r6
  246. bt/s 2f
  247. clrt
  248. .align 2
  249. 1:
  250. SRC( mov.l @r4+,r0 )
  251. SRC( mov.l @r4+,r1 )
  252. addc r0,r7
  253. DST( mov.l r0,@r5 )
  254. DST( mov.l r1,@(4,r5) )
  255. addc r1,r7
  256. SRC( mov.l @r4+,r0 )
  257. SRC( mov.l @r4+,r1 )
  258. addc r0,r7
  259. DST( mov.l r0,@(8,r5) )
  260. DST( mov.l r1,@(12,r5) )
  261. addc r1,r7
  262. SRC( mov.l @r4+,r0 )
  263. SRC( mov.l @r4+,r1 )
  264. addc r0,r7
  265. DST( mov.l r0,@(16,r5) )
  266. DST( mov.l r1,@(20,r5) )
  267. addc r1,r7
  268. SRC( mov.l @r4+,r0 )
  269. SRC( mov.l @r4+,r1 )
  270. addc r0,r7
  271. DST( mov.l r0,@(24,r5) )
  272. DST( mov.l r1,@(28,r5) )
  273. addc r1,r7
  274. add #32,r5
  275. movt r0
  276. dt r6
  277. bf/s 1b
  278. cmp/eq #1,r0
  279. mov #0,r0
  280. addc r0,r7
  281. 2: mov r2,r6
  282. mov #0x1c,r0
  283. and r0,r6
  284. cmp/pl r6
  285. bf/s 4f
  286. clrt
  287. shlr2 r6
  288. 3:
  289. SRC( mov.l @r4+,r0 )
  290. addc r0,r7
  291. DST( mov.l r0,@r5 )
  292. add #4,r5
  293. movt r0
  294. dt r6
  295. bf/s 3b
  296. cmp/eq #1,r0
  297. mov #0,r0
  298. addc r0,r7
  299. 4: mov r2,r6
  300. mov #3,r0
  301. and r0,r6
  302. cmp/pl r6
  303. bf 7f
  304. mov #2,r1
  305. cmp/hs r1,r6
  306. bf 5f
  307. SRC( mov.w @r4+,r0 )
  308. DST( mov.w r0,@r5 )
  309. extu.w r0,r0
  310. add #2,r5
  311. cmp/eq r1,r6
  312. bt/s 6f
  313. clrt
  314. shll16 r0
  315. addc r0,r7
  316. 5:
  317. SRC( mov.b @r4+,r0 )
  318. DST( mov.b r0,@r5 )
  319. extu.b r0,r0
  320. #ifndef __LITTLE_ENDIAN__
  321. shll8 r0
  322. #endif
  323. 6: addc r0,r7
  324. mov #0,r0
  325. addc r0,r7
  326. 7:
  327. 5000:
  328. # Exception handler:
  329. .section .fixup, "ax"
  330. 6001:
  331. mov.l @(8,r15),r0 ! src_err_ptr
  332. mov #-EFAULT,r1
  333. mov.l r1,@r0
  334. ! zero the complete destination - computing the rest
  335. ! is too much work
  336. mov.l @(4,r15),r5 ! dst
  337. mov.l @r15,r6 ! len
  338. mov #0,r7
  339. 1: mov.b r7,@r5
  340. dt r6
  341. bf/s 1b
  342. add #1,r5
  343. mov.l 8000f,r0
  344. jmp @r0
  345. nop
  346. .align 2
  347. 8000: .long 5000b
  348. 6002:
  349. mov.l @(12,r15),r0 ! dst_err_ptr
  350. mov #-EFAULT,r1
  351. mov.l r1,@r0
  352. mov.l 8001f,r0
  353. jmp @r0
  354. nop
  355. .align 2
  356. 8001: .long 5000b
  357. .previous
  358. add #8,r15
  359. rts
  360. mov r7,r0