checksum.S 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. /* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
  2. *
  3. * INET An implementation of the TCP/IP protocol suite for the LINUX
  4. * operating system. INET is implemented using the BSD Socket
  5. * interface as the means of communication with the user level.
  6. *
  7. * IP/TCP/UDP checksumming routines
  8. *
  9. * Authors: Jorge Cwik, <jorge@laser.satlink.net>
  10. * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  11. * Tom May, <ftom@netcom.com>
  12. * Pentium Pro/II routines:
  13. * Alexander Kjeldaas <astor@guardian.no>
  14. * Finn Arne Gangstad <finnag@guardian.no>
  15. * Lots of code moved from tcp.c and ip.c; see those files
  16. * for more names.
  17. *
  18. * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  19. * handling.
  20. * Andi Kleen, add zeroing on error
  21. * converted to pure assembler
  22. *
  23. * SuperH version: Copyright (C) 1999 Niibe Yutaka
  24. *
  25. * This program is free software; you can redistribute it and/or
  26. * modify it under the terms of the GNU General Public License
  27. * as published by the Free Software Foundation; either version
  28. * 2 of the License, or (at your option) any later version.
  29. */
  30. #include <asm/errno.h>
  31. #include <linux/linkage.h>
  32. /*
  33. * computes a partial checksum, e.g. for TCP/UDP fragments
  34. */
  35. /*
  36. * unsigned int csum_partial(const unsigned char *buf, int len,
  37. * unsigned int sum);
  38. */
  39. .text
  40. ENTRY(csum_partial)
  41. /*
  42. * Experiments with Ethernet and SLIP connections show that buff
  43. * is aligned on either a 2-byte or 4-byte boundary. We get at
  44. * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  45. * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  46. * alignment for the unrolled loop.
  47. */
  48. mov r5, r1
  49. mov r4, r0
  50. tst #2, r0 ! Check alignment.
  51. bt 2f ! Jump if alignment is ok.
  52. !
  53. add #-2, r5 ! Alignment uses up two bytes.
  54. cmp/pz r5 !
  55. bt/s 1f ! Jump if we had at least two bytes.
  56. clrt
  57. bra 6f
  58. add #2, r5 ! r5 was < 2. Deal with it.
  59. 1:
  60. mov r5, r1 ! Save new len for later use.
  61. mov.w @r4+, r0
  62. extu.w r0, r0
  63. addc r0, r6
  64. bf 2f
  65. add #1, r6
  66. 2:
  67. mov #-5, r0
  68. shld r0, r5
  69. tst r5, r5
  70. bt/s 4f ! if it's =0, go to 4f
  71. clrt
  72. .align 2
  73. 3:
  74. mov.l @r4+, r0
  75. mov.l @r4+, r2
  76. mov.l @r4+, r3
  77. addc r0, r6
  78. mov.l @r4+, r0
  79. addc r2, r6
  80. mov.l @r4+, r2
  81. addc r3, r6
  82. mov.l @r4+, r3
  83. addc r0, r6
  84. mov.l @r4+, r0
  85. addc r2, r6
  86. mov.l @r4+, r2
  87. addc r3, r6
  88. addc r0, r6
  89. addc r2, r6
  90. movt r0
  91. dt r5
  92. bf/s 3b
  93. cmp/eq #1, r0
  94. ! here, we know r5==0
  95. addc r5, r6 ! add carry to r6
  96. 4:
  97. mov r1, r0
  98. and #0x1c, r0
  99. tst r0, r0
  100. bt/s 6f
  101. mov r0, r5
  102. shlr2 r5
  103. mov #0, r2
  104. 5:
  105. addc r2, r6
  106. mov.l @r4+, r2
  107. movt r0
  108. dt r5
  109. bf/s 5b
  110. cmp/eq #1, r0
  111. addc r2, r6
  112. addc r5, r6 ! r5==0 here, so it means add carry-bit
  113. 6:
  114. mov r1, r5
  115. mov #3, r0
  116. and r0, r5
  117. tst r5, r5
  118. bt 9f ! if it's =0 go to 9f
  119. mov #2, r1
  120. cmp/hs r1, r5
  121. bf 7f
  122. mov.w @r4+, r0
  123. extu.w r0, r0
  124. cmp/eq r1, r5
  125. bt/s 8f
  126. clrt
  127. shll16 r0
  128. addc r0, r6
  129. 7:
  130. mov.b @r4+, r0
  131. extu.b r0, r0
  132. #ifndef __LITTLE_ENDIAN__
  133. shll8 r0
  134. #endif
  135. 8:
  136. addc r0, r6
  137. mov #0, r0
  138. addc r0, r6
  139. 9:
  140. rts
  141. mov r6, r0
  142. /*
  143. unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
  144. int sum, int *src_err_ptr, int *dst_err_ptr)
  145. */
  146. /*
  147. * Copy from ds while checksumming, otherwise like csum_partial
  148. *
  149. * The macros SRC and DST specify the type of access for the instruction.
  150. * thus we can call a custom exception handler for all access types.
  151. *
  152. * FIXME: could someone double-check whether I haven't mixed up some SRC and
  153. * DST definitions? It's damn hard to trigger all cases. I hope I got
  154. * them all but there's no guarantee.
  155. */
  156. #define SRC(...) \
  157. 9999: __VA_ARGS__ ; \
  158. .section __ex_table, "a"; \
  159. .long 9999b, 6001f ; \
  160. .previous
  161. #define DST(...) \
  162. 9999: __VA_ARGS__ ; \
  163. .section __ex_table, "a"; \
  164. .long 9999b, 6002f ; \
  165. .previous
  166. !
  167. ! r4: const char *SRC
  168. ! r5: char *DST
  169. ! r6: int LEN
  170. ! r7: int SUM
  171. !
  172. ! on stack:
  173. ! int *SRC_ERR_PTR
  174. ! int *DST_ERR_PTR
  175. !
  176. ENTRY(csum_partial_copy_generic)
  177. mov.l r5,@-r15
  178. mov.l r6,@-r15
  179. mov #3,r0 ! Check src and dest are equally aligned
  180. mov r4,r1
  181. and r0,r1
  182. and r5,r0
  183. cmp/eq r1,r0
  184. bf 3f ! Different alignments, use slow version
  185. tst #1,r0 ! Check dest word aligned
  186. bf 3f ! If not, do it the slow way
  187. mov #2,r0
  188. tst r0,r5 ! Check dest alignment.
  189. bt 2f ! Jump if alignment is ok.
  190. add #-2,r6 ! Alignment uses up two bytes.
  191. cmp/pz r6 ! Jump if we had at least two bytes.
  192. bt/s 1f
  193. clrt
  194. add #2,r6 ! r6 was < 2. Deal with it.
  195. bra 4f
  196. mov r6,r2
  197. 3: ! Handle different src and dest alignments.
  198. ! This is not common, so simple byte by byte copy will do.
  199. mov r6,r2
  200. shlr r6
  201. tst r6,r6
  202. bt 4f
  203. clrt
  204. .align 2
  205. 5:
  206. SRC( mov.b @r4+,r1 )
  207. SRC( mov.b @r4+,r0 )
  208. extu.b r1,r1
  209. DST( mov.b r1,@r5 )
  210. DST( mov.b r0,@(1,r5) )
  211. extu.b r0,r0
  212. add #2,r5
  213. #ifdef __LITTLE_ENDIAN__
  214. shll8 r0
  215. #else
  216. shll8 r1
  217. #endif
  218. or r1,r0
  219. addc r0,r7
  220. movt r0
  221. dt r6
  222. bf/s 5b
  223. cmp/eq #1,r0
  224. mov #0,r0
  225. addc r0, r7
  226. mov r2, r0
  227. tst #1, r0
  228. bt 7f
  229. bra 5f
  230. clrt
  231. ! src and dest equally aligned, but to a two byte boundary.
  232. ! Handle first two bytes as a special case
  233. .align 2
  234. 1:
  235. SRC( mov.w @r4+,r0 )
  236. DST( mov.w r0,@r5 )
  237. add #2,r5
  238. extu.w r0,r0
  239. addc r0,r7
  240. mov #0,r0
  241. addc r0,r7
  242. 2:
  243. mov r6,r2
  244. mov #-5,r0
  245. shld r0,r6
  246. tst r6,r6
  247. bt/s 2f
  248. clrt
  249. .align 2
  250. 1:
  251. SRC( mov.l @r4+,r0 )
  252. SRC( mov.l @r4+,r1 )
  253. addc r0,r7
  254. DST( mov.l r0,@r5 )
  255. DST( mov.l r1,@(4,r5) )
  256. addc r1,r7
  257. SRC( mov.l @r4+,r0 )
  258. SRC( mov.l @r4+,r1 )
  259. addc r0,r7
  260. DST( mov.l r0,@(8,r5) )
  261. DST( mov.l r1,@(12,r5) )
  262. addc r1,r7
  263. SRC( mov.l @r4+,r0 )
  264. SRC( mov.l @r4+,r1 )
  265. addc r0,r7
  266. DST( mov.l r0,@(16,r5) )
  267. DST( mov.l r1,@(20,r5) )
  268. addc r1,r7
  269. SRC( mov.l @r4+,r0 )
  270. SRC( mov.l @r4+,r1 )
  271. addc r0,r7
  272. DST( mov.l r0,@(24,r5) )
  273. DST( mov.l r1,@(28,r5) )
  274. addc r1,r7
  275. add #32,r5
  276. movt r0
  277. dt r6
  278. bf/s 1b
  279. cmp/eq #1,r0
  280. mov #0,r0
  281. addc r0,r7
  282. 2: mov r2,r6
  283. mov #0x1c,r0
  284. and r0,r6
  285. cmp/pl r6
  286. bf/s 4f
  287. clrt
  288. shlr2 r6
  289. 3:
  290. SRC( mov.l @r4+,r0 )
  291. addc r0,r7
  292. DST( mov.l r0,@r5 )
  293. add #4,r5
  294. movt r0
  295. dt r6
  296. bf/s 3b
  297. cmp/eq #1,r0
  298. mov #0,r0
  299. addc r0,r7
  300. 4: mov r2,r6
  301. mov #3,r0
  302. and r0,r6
  303. cmp/pl r6
  304. bf 7f
  305. mov #2,r1
  306. cmp/hs r1,r6
  307. bf 5f
  308. SRC( mov.w @r4+,r0 )
  309. DST( mov.w r0,@r5 )
  310. extu.w r0,r0
  311. add #2,r5
  312. cmp/eq r1,r6
  313. bt/s 6f
  314. clrt
  315. shll16 r0
  316. addc r0,r7
  317. 5:
  318. SRC( mov.b @r4+,r0 )
  319. DST( mov.b r0,@r5 )
  320. extu.b r0,r0
  321. #ifndef __LITTLE_ENDIAN__
  322. shll8 r0
  323. #endif
  324. 6: addc r0,r7
  325. mov #0,r0
  326. addc r0,r7
  327. 7:
  328. 5000:
  329. # Exception handler:
  330. .section .fixup, "ax"
  331. 6001:
  332. mov.l @(8,r15),r0 ! src_err_ptr
  333. mov #-EFAULT,r1
  334. mov.l r1,@r0
  335. ! zero the complete destination - computing the rest
  336. ! is too much work
  337. mov.l @(4,r15),r5 ! dst
  338. mov.l @r15,r6 ! len
  339. mov #0,r7
  340. 1: mov.b r7,@r5
  341. dt r6
  342. bf/s 1b
  343. add #1,r5
  344. mov.l 8000f,r0
  345. jmp @r0
  346. nop
  347. .align 2
  348. 8000: .long 5000b
  349. 6002:
  350. mov.l @(12,r15),r0 ! dst_err_ptr
  351. mov #-EFAULT,r1
  352. mov.l r1,@r0
  353. mov.l 8001f,r0
  354. jmp @r0
  355. nop
  356. .align 2
  357. 8001: .long 5000b
  358. .previous
  359. add #8,r15
  360. rts
  361. mov r7,r0