checksum_32.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. /*
  2. * INET An implementation of the TCP/IP protocol suite for the LINUX
  3. * operating system. INET is implemented using the BSD Socket
  4. * interface as the means of communication with the user level.
  5. *
  6. * IP/TCP/UDP checksumming routines
  7. *
  8. * Authors: Jorge Cwik, <jorge@laser.satlink.net>
  9. * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  10. * Tom May, <ftom@netcom.com>
  11. * Pentium Pro/II routines:
  12. * Alexander Kjeldaas <astor@guardian.no>
  13. * Finn Arne Gangstad <finnag@guardian.no>
  14. * Lots of code moved from tcp.c and ip.c; see those files
  15. * for more names.
  16. *
  17. * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  18. * handling.
  19. * Andi Kleen, add zeroing on error
  20. * converted to pure assembler
  21. *
  22. * This program is free software; you can redistribute it and/or
  23. * modify it under the terms of the GNU General Public License
  24. * as published by the Free Software Foundation; either version
  25. * 2 of the License, or (at your option) any later version.
  26. */
  27. #include <linux/linkage.h>
  28. #include <asm/dwarf2.h>
  29. #include <asm/errno.h>
  30. /*
  31. * computes a partial checksum, e.g. for TCP/UDP fragments
  32. */
  33. /*
  34. unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
  35. */
  36. .text
  37. #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
  38. /*
  39. * Experiments with Ethernet and SLIP connections show that buff
  40. * is aligned on either a 2-byte or 4-byte boundary. We get at
  41. * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  42. * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  43. * alignment for the unrolled loop.
  44. */
  45. ENTRY(csum_partial)
  46. CFI_STARTPROC
  47. pushl_cfi %esi
  48. CFI_REL_OFFSET esi, 0
  49. pushl_cfi %ebx
  50. CFI_REL_OFFSET ebx, 0
  51. movl 20(%esp),%eax # Function arg: unsigned int sum
  52. movl 16(%esp),%ecx # Function arg: int len
  53. movl 12(%esp),%esi # Function arg: unsigned char *buff
  54. testl $3, %esi # Check alignment.
  55. jz 2f # Jump if alignment is ok.
  56. testl $1, %esi # Check alignment.
  57. jz 10f # Jump if alignment is boundary of 2bytes.
  58. # buf is odd
  59. dec %ecx
  60. jl 8f
  61. movzbl (%esi), %ebx
  62. adcl %ebx, %eax
  63. roll $8, %eax
  64. inc %esi
  65. testl $2, %esi
  66. jz 2f
  67. 10:
  68. subl $2, %ecx # Alignment uses up two bytes.
  69. jae 1f # Jump if we had at least two bytes.
  70. addl $2, %ecx # ecx was < 2. Deal with it.
  71. jmp 4f
  72. 1: movw (%esi), %bx
  73. addl $2, %esi
  74. addw %bx, %ax
  75. adcl $0, %eax
  76. 2:
  77. movl %ecx, %edx
  78. shrl $5, %ecx
  79. jz 2f
  80. testl %esi, %esi
  81. 1: movl (%esi), %ebx
  82. adcl %ebx, %eax
  83. movl 4(%esi), %ebx
  84. adcl %ebx, %eax
  85. movl 8(%esi), %ebx
  86. adcl %ebx, %eax
  87. movl 12(%esi), %ebx
  88. adcl %ebx, %eax
  89. movl 16(%esi), %ebx
  90. adcl %ebx, %eax
  91. movl 20(%esi), %ebx
  92. adcl %ebx, %eax
  93. movl 24(%esi), %ebx
  94. adcl %ebx, %eax
  95. movl 28(%esi), %ebx
  96. adcl %ebx, %eax
  97. lea 32(%esi), %esi
  98. dec %ecx
  99. jne 1b
  100. adcl $0, %eax
  101. 2: movl %edx, %ecx
  102. andl $0x1c, %edx
  103. je 4f
  104. shrl $2, %edx # This clears CF
  105. 3: adcl (%esi), %eax
  106. lea 4(%esi), %esi
  107. dec %edx
  108. jne 3b
  109. adcl $0, %eax
  110. 4: andl $3, %ecx
  111. jz 7f
  112. cmpl $2, %ecx
  113. jb 5f
  114. movw (%esi),%cx
  115. leal 2(%esi),%esi
  116. je 6f
  117. shll $16,%ecx
  118. 5: movb (%esi),%cl
  119. 6: addl %ecx,%eax
  120. adcl $0, %eax
  121. 7:
  122. testl $1, 12(%esp)
  123. jz 8f
  124. roll $8, %eax
  125. 8:
  126. popl_cfi %ebx
  127. CFI_RESTORE ebx
  128. popl_cfi %esi
  129. CFI_RESTORE esi
  130. ret
  131. CFI_ENDPROC
  132. ENDPROC(csum_partial)
  133. #else
  134. /* Version for PentiumII/PPro */
  135. ENTRY(csum_partial)
  136. CFI_STARTPROC
  137. pushl_cfi %esi
  138. CFI_REL_OFFSET esi, 0
  139. pushl_cfi %ebx
  140. CFI_REL_OFFSET ebx, 0
  141. movl 20(%esp),%eax # Function arg: unsigned int sum
  142. movl 16(%esp),%ecx # Function arg: int len
  143. movl 12(%esp),%esi # Function arg: const unsigned char *buf
  144. testl $3, %esi
  145. jnz 25f
  146. 10:
  147. movl %ecx, %edx
  148. movl %ecx, %ebx
  149. andl $0x7c, %ebx
  150. shrl $7, %ecx
  151. addl %ebx,%esi
  152. shrl $2, %ebx
  153. negl %ebx
  154. lea 45f(%ebx,%ebx,2), %ebx
  155. testl %esi, %esi
  156. jmp *%ebx
  157. # Handle 2-byte-aligned regions
  158. 20: addw (%esi), %ax
  159. lea 2(%esi), %esi
  160. adcl $0, %eax
  161. jmp 10b
  162. 25:
  163. testl $1, %esi
  164. jz 30f
  165. # buf is odd
  166. dec %ecx
  167. jl 90f
  168. movzbl (%esi), %ebx
  169. addl %ebx, %eax
  170. adcl $0, %eax
  171. roll $8, %eax
  172. inc %esi
  173. testl $2, %esi
  174. jz 10b
  175. 30: subl $2, %ecx
  176. ja 20b
  177. je 32f
  178. addl $2, %ecx
  179. jz 80f
  180. movzbl (%esi),%ebx # csumming 1 byte, 2-aligned
  181. addl %ebx, %eax
  182. adcl $0, %eax
  183. jmp 80f
  184. 32:
  185. addw (%esi), %ax # csumming 2 bytes, 2-aligned
  186. adcl $0, %eax
  187. jmp 80f
  188. 40:
  189. addl -128(%esi), %eax
  190. adcl -124(%esi), %eax
  191. adcl -120(%esi), %eax
  192. adcl -116(%esi), %eax
  193. adcl -112(%esi), %eax
  194. adcl -108(%esi), %eax
  195. adcl -104(%esi), %eax
  196. adcl -100(%esi), %eax
  197. adcl -96(%esi), %eax
  198. adcl -92(%esi), %eax
  199. adcl -88(%esi), %eax
  200. adcl -84(%esi), %eax
  201. adcl -80(%esi), %eax
  202. adcl -76(%esi), %eax
  203. adcl -72(%esi), %eax
  204. adcl -68(%esi), %eax
  205. adcl -64(%esi), %eax
  206. adcl -60(%esi), %eax
  207. adcl -56(%esi), %eax
  208. adcl -52(%esi), %eax
  209. adcl -48(%esi), %eax
  210. adcl -44(%esi), %eax
  211. adcl -40(%esi), %eax
  212. adcl -36(%esi), %eax
  213. adcl -32(%esi), %eax
  214. adcl -28(%esi), %eax
  215. adcl -24(%esi), %eax
  216. adcl -20(%esi), %eax
  217. adcl -16(%esi), %eax
  218. adcl -12(%esi), %eax
  219. adcl -8(%esi), %eax
  220. adcl -4(%esi), %eax
  221. 45:
  222. lea 128(%esi), %esi
  223. adcl $0, %eax
  224. dec %ecx
  225. jge 40b
  226. movl %edx, %ecx
  227. 50: andl $3, %ecx
  228. jz 80f
  229. # Handle the last 1-3 bytes without jumping
  230. notl %ecx # 1->2, 2->1, 3->0, higher bits are masked
  231. movl $0xffffff,%ebx # by the shll and shrl instructions
  232. shll $3,%ecx
  233. shrl %cl,%ebx
  234. andl -128(%esi),%ebx # esi is 4-aligned so should be ok
  235. addl %ebx,%eax
  236. adcl $0,%eax
  237. 80:
  238. testl $1, 12(%esp)
  239. jz 90f
  240. roll $8, %eax
  241. 90:
  242. popl_cfi %ebx
  243. CFI_RESTORE ebx
  244. popl_cfi %esi
  245. CFI_RESTORE esi
  246. ret
  247. CFI_ENDPROC
  248. ENDPROC(csum_partial)
  249. #endif
  250. /*
  251. unsigned int csum_partial_copy_generic (const char *src, char *dst,
  252. int len, int sum, int *src_err_ptr, int *dst_err_ptr)
  253. */
  254. /*
  255. * Copy from ds while checksumming, otherwise like csum_partial
  256. *
  257. * The macros SRC and DST specify the type of access for the instruction.
  258. * thus we can call a custom exception handler for all access types.
  259. *
  260. * FIXME: could someone double-check whether I haven't mixed up some SRC and
  261. * DST definitions? It's damn hard to trigger all cases. I hope I got
  262. * them all but there's no guarantee.
  263. */
  264. #define SRC(y...) \
  265. 9999: y; \
  266. .section __ex_table, "a"; \
  267. .long 9999b, 6001f ; \
  268. .previous
  269. #define DST(y...) \
  270. 9999: y; \
  271. .section __ex_table, "a"; \
  272. .long 9999b, 6002f ; \
  273. .previous
  274. #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
  275. #define ARGBASE 16
  276. #define FP 12
  277. ENTRY(csum_partial_copy_generic)
  278. CFI_STARTPROC
  279. subl $4,%esp
  280. CFI_ADJUST_CFA_OFFSET 4
  281. pushl_cfi %edi
  282. CFI_REL_OFFSET edi, 0
  283. pushl_cfi %esi
  284. CFI_REL_OFFSET esi, 0
  285. pushl_cfi %ebx
  286. CFI_REL_OFFSET ebx, 0
  287. movl ARGBASE+16(%esp),%eax # sum
  288. movl ARGBASE+12(%esp),%ecx # len
  289. movl ARGBASE+4(%esp),%esi # src
  290. movl ARGBASE+8(%esp),%edi # dst
  291. testl $2, %edi # Check alignment.
  292. jz 2f # Jump if alignment is ok.
  293. subl $2, %ecx # Alignment uses up two bytes.
  294. jae 1f # Jump if we had at least two bytes.
  295. addl $2, %ecx # ecx was < 2. Deal with it.
  296. jmp 4f
  297. SRC(1: movw (%esi), %bx )
  298. addl $2, %esi
  299. DST( movw %bx, (%edi) )
  300. addl $2, %edi
  301. addw %bx, %ax
  302. adcl $0, %eax
  303. 2:
  304. movl %ecx, FP(%esp)
  305. shrl $5, %ecx
  306. jz 2f
  307. testl %esi, %esi
  308. SRC(1: movl (%esi), %ebx )
  309. SRC( movl 4(%esi), %edx )
  310. adcl %ebx, %eax
  311. DST( movl %ebx, (%edi) )
  312. adcl %edx, %eax
  313. DST( movl %edx, 4(%edi) )
  314. SRC( movl 8(%esi), %ebx )
  315. SRC( movl 12(%esi), %edx )
  316. adcl %ebx, %eax
  317. DST( movl %ebx, 8(%edi) )
  318. adcl %edx, %eax
  319. DST( movl %edx, 12(%edi) )
  320. SRC( movl 16(%esi), %ebx )
  321. SRC( movl 20(%esi), %edx )
  322. adcl %ebx, %eax
  323. DST( movl %ebx, 16(%edi) )
  324. adcl %edx, %eax
  325. DST( movl %edx, 20(%edi) )
  326. SRC( movl 24(%esi), %ebx )
  327. SRC( movl 28(%esi), %edx )
  328. adcl %ebx, %eax
  329. DST( movl %ebx, 24(%edi) )
  330. adcl %edx, %eax
  331. DST( movl %edx, 28(%edi) )
  332. lea 32(%esi), %esi
  333. lea 32(%edi), %edi
  334. dec %ecx
  335. jne 1b
  336. adcl $0, %eax
  337. 2: movl FP(%esp), %edx
  338. movl %edx, %ecx
  339. andl $0x1c, %edx
  340. je 4f
  341. shrl $2, %edx # This clears CF
  342. SRC(3: movl (%esi), %ebx )
  343. adcl %ebx, %eax
  344. DST( movl %ebx, (%edi) )
  345. lea 4(%esi), %esi
  346. lea 4(%edi), %edi
  347. dec %edx
  348. jne 3b
  349. adcl $0, %eax
  350. 4: andl $3, %ecx
  351. jz 7f
  352. cmpl $2, %ecx
  353. jb 5f
  354. SRC( movw (%esi), %cx )
  355. leal 2(%esi), %esi
  356. DST( movw %cx, (%edi) )
  357. leal 2(%edi), %edi
  358. je 6f
  359. shll $16,%ecx
  360. SRC(5: movb (%esi), %cl )
  361. DST( movb %cl, (%edi) )
  362. 6: addl %ecx, %eax
  363. adcl $0, %eax
  364. 7:
  365. 5000:
  366. # Exception handler:
  367. .section .fixup, "ax"
  368. 6001:
  369. movl ARGBASE+20(%esp), %ebx # src_err_ptr
  370. movl $-EFAULT, (%ebx)
  371. # zero the complete destination - computing the rest
  372. # is too much work
  373. movl ARGBASE+8(%esp), %edi # dst
  374. movl ARGBASE+12(%esp), %ecx # len
  375. xorl %eax,%eax
  376. rep ; stosb
  377. jmp 5000b
  378. 6002:
  379. movl ARGBASE+24(%esp), %ebx # dst_err_ptr
  380. movl $-EFAULT,(%ebx)
  381. jmp 5000b
  382. .previous
  383. popl_cfi %ebx
  384. CFI_RESTORE ebx
  385. popl_cfi %esi
  386. CFI_RESTORE esi
  387. popl_cfi %edi
  388. CFI_RESTORE edi
  389. popl_cfi %ecx # equivalent to addl $4,%esp
  390. ret
  391. CFI_ENDPROC
  392. ENDPROC(csum_partial_copy_generic)
  393. #else
  394. /* Version for PentiumII/PPro */
  395. #define ROUND1(x) \
  396. SRC(movl x(%esi), %ebx ) ; \
  397. addl %ebx, %eax ; \
  398. DST(movl %ebx, x(%edi) ) ;
  399. #define ROUND(x) \
  400. SRC(movl x(%esi), %ebx ) ; \
  401. adcl %ebx, %eax ; \
  402. DST(movl %ebx, x(%edi) ) ;
  403. #define ARGBASE 12
  404. ENTRY(csum_partial_copy_generic)
  405. CFI_STARTPROC
  406. pushl_cfi %ebx
  407. CFI_REL_OFFSET ebx, 0
  408. pushl_cfi %edi
  409. CFI_REL_OFFSET edi, 0
  410. pushl_cfi %esi
  411. CFI_REL_OFFSET esi, 0
  412. movl ARGBASE+4(%esp),%esi #src
  413. movl ARGBASE+8(%esp),%edi #dst
  414. movl ARGBASE+12(%esp),%ecx #len
  415. movl ARGBASE+16(%esp),%eax #sum
  416. # movl %ecx, %edx
  417. movl %ecx, %ebx
  418. movl %esi, %edx
  419. shrl $6, %ecx
  420. andl $0x3c, %ebx
  421. negl %ebx
  422. subl %ebx, %esi
  423. subl %ebx, %edi
  424. lea -1(%esi),%edx
  425. andl $-32,%edx
  426. lea 3f(%ebx,%ebx), %ebx
  427. testl %esi, %esi
  428. jmp *%ebx
  429. 1: addl $64,%esi
  430. addl $64,%edi
  431. SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
  432. ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
  433. ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
  434. ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
  435. ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4)
  436. 3: adcl $0,%eax
  437. addl $64, %edx
  438. dec %ecx
  439. jge 1b
  440. 4: movl ARGBASE+12(%esp),%edx #len
  441. andl $3, %edx
  442. jz 7f
  443. cmpl $2, %edx
  444. jb 5f
  445. SRC( movw (%esi), %dx )
  446. leal 2(%esi), %esi
  447. DST( movw %dx, (%edi) )
  448. leal 2(%edi), %edi
  449. je 6f
  450. shll $16,%edx
  451. 5:
  452. SRC( movb (%esi), %dl )
  453. DST( movb %dl, (%edi) )
  454. 6: addl %edx, %eax
  455. adcl $0, %eax
  456. 7:
  457. .section .fixup, "ax"
  458. 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr
  459. movl $-EFAULT, (%ebx)
  460. # zero the complete destination (computing the rest is too much work)
  461. movl ARGBASE+8(%esp),%edi # dst
  462. movl ARGBASE+12(%esp),%ecx # len
  463. xorl %eax,%eax
  464. rep; stosb
  465. jmp 7b
  466. 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr
  467. movl $-EFAULT, (%ebx)
  468. jmp 7b
  469. .previous
  470. popl_cfi %esi
  471. CFI_RESTORE esi
  472. popl_cfi %edi
  473. CFI_RESTORE edi
  474. popl_cfi %ebx
  475. CFI_RESTORE ebx
  476. ret
  477. CFI_ENDPROC
  478. ENDPROC(csum_partial_copy_generic)
  479. #undef ROUND
  480. #undef ROUND1
  481. #endif