csum_partial.S 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * Quick'n'dirty IP checksum ...
  7. *
  8. * Copyright (C) 1998, 1999 Ralf Baechle
  9. * Copyright (C) 1999 Silicon Graphics, Inc.
  10. */
  11. #include <asm/asm.h>
  12. #include <asm/regdef.h>
  13. #ifdef CONFIG_64BIT
  14. #define T0 ta0
  15. #define T1 ta1
  16. #define T2 ta2
  17. #define T3 ta3
  18. #define T4 t0
  19. #define T7 t3
  20. #else
  21. #define T0 t0
  22. #define T1 t1
  23. #define T2 t2
  24. #define T3 t3
  25. #define T4 t4
  26. #define T7 t7
  27. #endif
  28. #define ADDC(sum,reg) \
  29. addu sum, reg; \
  30. sltu v1, sum, reg; \
  31. addu sum, v1
  32. #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
  33. lw _t0, (offset + 0x00)(src); \
  34. lw _t1, (offset + 0x04)(src); \
  35. lw _t2, (offset + 0x08)(src); \
  36. lw _t3, (offset + 0x0c)(src); \
  37. ADDC(sum, _t0); \
  38. ADDC(sum, _t1); \
  39. ADDC(sum, _t2); \
  40. ADDC(sum, _t3); \
  41. lw _t0, (offset + 0x10)(src); \
  42. lw _t1, (offset + 0x14)(src); \
  43. lw _t2, (offset + 0x18)(src); \
  44. lw _t3, (offset + 0x1c)(src); \
  45. ADDC(sum, _t0); \
  46. ADDC(sum, _t1); \
  47. ADDC(sum, _t2); \
  48. ADDC(sum, _t3); \
  49. /*
  50. * a0: source address
  51. * a1: length of the area to checksum
  52. * a2: partial checksum
  53. */
  54. #define src a0
  55. #define sum v0
  56. .text
  57. .set noreorder
  58. /* unknown src alignment and < 8 bytes to go */
  59. small_csumcpy:
  60. move a1, T2
  61. andi T0, a1, 4
  62. beqz T0, 1f
  63. andi T0, a1, 2
  64. /* Still a full word to go */
  65. ulw T1, (src)
  66. PTR_ADDIU src, 4
  67. ADDC(sum, T1)
  68. 1: move T1, zero
  69. beqz T0, 1f
  70. andi T0, a1, 1
  71. /* Still a halfword to go */
  72. ulhu T1, (src)
  73. PTR_ADDIU src, 2
  74. 1: beqz T0, 1f
  75. sll T1, T1, 16
  76. lbu T2, (src)
  77. nop
  78. #ifdef __MIPSEB__
  79. sll T2, T2, 8
  80. #endif
  81. or T1, T2
  82. 1: ADDC(sum, T1)
  83. /* fold checksum */
  84. sll v1, sum, 16
  85. addu sum, v1
  86. sltu v1, sum, v1
  87. srl sum, sum, 16
  88. addu sum, v1
  89. /* odd buffer alignment? */
  90. beqz T7, 1f
  91. nop
  92. sll v1, sum, 8
  93. srl sum, sum, 8
  94. or sum, v1
  95. andi sum, 0xffff
  96. 1:
  97. .set reorder
  98. /* Add the passed partial csum. */
  99. ADDC(sum, a2)
  100. jr ra
  101. .set noreorder
  102. /* ------------------------------------------------------------------------- */
  103. .align 5
  104. LEAF(csum_partial)
  105. move sum, zero
  106. move T7, zero
  107. sltiu t8, a1, 0x8
  108. bnez t8, small_csumcpy /* < 8 bytes to copy */
  109. move T2, a1
  110. beqz a1, out
  111. andi T7, src, 0x1 /* odd buffer? */
  112. hword_align:
  113. beqz T7, word_align
  114. andi t8, src, 0x2
  115. lbu T0, (src)
  116. LONG_SUBU a1, a1, 0x1
  117. #ifdef __MIPSEL__
  118. sll T0, T0, 8
  119. #endif
  120. ADDC(sum, T0)
  121. PTR_ADDU src, src, 0x1
  122. andi t8, src, 0x2
  123. word_align:
  124. beqz t8, dword_align
  125. sltiu t8, a1, 56
  126. lhu T0, (src)
  127. LONG_SUBU a1, a1, 0x2
  128. ADDC(sum, T0)
  129. sltiu t8, a1, 56
  130. PTR_ADDU src, src, 0x2
  131. dword_align:
  132. bnez t8, do_end_words
  133. move t8, a1
  134. andi t8, src, 0x4
  135. beqz t8, qword_align
  136. andi t8, src, 0x8
  137. lw T0, 0x00(src)
  138. LONG_SUBU a1, a1, 0x4
  139. ADDC(sum, T0)
  140. PTR_ADDU src, src, 0x4
  141. andi t8, src, 0x8
  142. qword_align:
  143. beqz t8, oword_align
  144. andi t8, src, 0x10
  145. lw T0, 0x00(src)
  146. lw T1, 0x04(src)
  147. LONG_SUBU a1, a1, 0x8
  148. ADDC(sum, T0)
  149. ADDC(sum, T1)
  150. PTR_ADDU src, src, 0x8
  151. andi t8, src, 0x10
  152. oword_align:
  153. beqz t8, begin_movement
  154. LONG_SRL t8, a1, 0x7
  155. lw T3, 0x08(src)
  156. lw T4, 0x0c(src)
  157. lw T0, 0x00(src)
  158. lw T1, 0x04(src)
  159. ADDC(sum, T3)
  160. ADDC(sum, T4)
  161. ADDC(sum, T0)
  162. ADDC(sum, T1)
  163. LONG_SUBU a1, a1, 0x10
  164. PTR_ADDU src, src, 0x10
  165. LONG_SRL t8, a1, 0x7
  166. begin_movement:
  167. beqz t8, 1f
  168. andi T2, a1, 0x40
  169. move_128bytes:
  170. CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4)
  171. CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4)
  172. CSUM_BIGCHUNK(src, 0x40, sum, T0, T1, T3, T4)
  173. CSUM_BIGCHUNK(src, 0x60, sum, T0, T1, T3, T4)
  174. LONG_SUBU t8, t8, 0x01
  175. bnez t8, move_128bytes
  176. PTR_ADDU src, src, 0x80
  177. 1:
  178. beqz T2, 1f
  179. andi T2, a1, 0x20
  180. move_64bytes:
  181. CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4)
  182. CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4)
  183. PTR_ADDU src, src, 0x40
  184. 1:
  185. beqz T2, do_end_words
  186. andi t8, a1, 0x1c
  187. move_32bytes:
  188. CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4)
  189. andi t8, a1, 0x1c
  190. PTR_ADDU src, src, 0x20
  191. do_end_words:
  192. beqz t8, maybe_end_cruft
  193. LONG_SRL t8, t8, 0x2
  194. end_words:
  195. lw T0, (src)
  196. LONG_SUBU t8, t8, 0x1
  197. ADDC(sum, T0)
  198. bnez t8, end_words
  199. PTR_ADDU src, src, 0x4
  200. maybe_end_cruft:
  201. andi T2, a1, 0x3
  202. small_memcpy:
  203. j small_csumcpy; move a1, T2 /* XXX ??? */
  204. beqz t2, out
  205. move a1, T2
  206. end_bytes:
  207. lb T0, (src)
  208. LONG_SUBU a1, a1, 0x1
  209. bnez a2, end_bytes
  210. PTR_ADDU src, src, 0x1
  211. out:
  212. jr ra
  213. move v0, sum
  214. END(csum_partial)