csum_partial.S 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * Quick'n'dirty IP checksum ...
  7. *
  8. * Copyright (C) 1998, 1999 Ralf Baechle
  9. * Copyright (C) 1999 Silicon Graphics, Inc.
  10. */
  11. #include <asm/asm.h>
  12. #include <asm/regdef.h>
  13. #ifdef CONFIG_64BIT
  14. /*
  15. * As we are sharing code base with the mips32 tree (which use the o32 ABI
  16. * register definitions). We need to redefine the register definitions from
  17. * the n64 ABI register naming to the o32 ABI register naming.
  18. */
  19. #undef t0
  20. #undef t1
  21. #undef t2
  22. #undef t3
  23. #define t0 $8
  24. #define t1 $9
  25. #define t2 $10
  26. #define t3 $11
  27. #define t4 $12
  28. #define t5 $13
  29. #define t6 $14
  30. #define t7 $15
  31. #define USE_DOUBLE
  32. #endif
  33. #ifdef USE_DOUBLE
  34. #define LOAD ld
  35. #define ADD daddu
  36. #define NBYTES 8
  37. #else
  38. #define LOAD lw
  39. #define ADD addu
  40. #define NBYTES 4
  41. #endif /* USE_DOUBLE */
  42. #define UNIT(unit) ((unit)*NBYTES)
  43. #define ADDC(sum,reg) \
  44. ADD sum, reg; \
  45. sltu v1, sum, reg; \
  46. ADD sum, v1
  47. #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
  48. LOAD _t0, (offset + UNIT(0))(src); \
  49. LOAD _t1, (offset + UNIT(1))(src); \
  50. LOAD _t2, (offset + UNIT(2))(src); \
  51. LOAD _t3, (offset + UNIT(3))(src); \
  52. ADDC(sum, _t0); \
  53. ADDC(sum, _t1); \
  54. ADDC(sum, _t2); \
  55. ADDC(sum, _t3)
  56. #ifdef USE_DOUBLE
  57. #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
  58. CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
  59. #else
  60. #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
  61. CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
  62. CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
  63. #endif
  64. /*
  65. * a0: source address
  66. * a1: length of the area to checksum
  67. * a2: partial checksum
  68. */
  69. #define src a0
  70. #define sum v0
  71. .text
  72. .set noreorder
  73. .align 5
  74. LEAF(csum_partial)
  75. move sum, zero
  76. move t7, zero
  77. sltiu t8, a1, 0x8
  78. bnez t8, small_csumcpy /* < 8 bytes to copy */
  79. move t2, a1
  80. andi t7, src, 0x1 /* odd buffer? */
  81. hword_align:
  82. beqz t7, word_align
  83. andi t8, src, 0x2
  84. lbu t0, (src)
  85. LONG_SUBU a1, a1, 0x1
  86. #ifdef __MIPSEL__
  87. sll t0, t0, 8
  88. #endif
  89. ADDC(sum, t0)
  90. PTR_ADDU src, src, 0x1
  91. andi t8, src, 0x2
  92. word_align:
  93. beqz t8, dword_align
  94. sltiu t8, a1, 56
  95. lhu t0, (src)
  96. LONG_SUBU a1, a1, 0x2
  97. ADDC(sum, t0)
  98. sltiu t8, a1, 56
  99. PTR_ADDU src, src, 0x2
  100. dword_align:
  101. bnez t8, do_end_words
  102. move t8, a1
  103. andi t8, src, 0x4
  104. beqz t8, qword_align
  105. andi t8, src, 0x8
  106. lw t0, 0x00(src)
  107. LONG_SUBU a1, a1, 0x4
  108. ADDC(sum, t0)
  109. PTR_ADDU src, src, 0x4
  110. andi t8, src, 0x8
  111. qword_align:
  112. beqz t8, oword_align
  113. andi t8, src, 0x10
  114. #ifdef USE_DOUBLE
  115. ld t0, 0x00(src)
  116. LONG_SUBU a1, a1, 0x8
  117. ADDC(sum, t0)
  118. #else
  119. lw t0, 0x00(src)
  120. lw t1, 0x04(src)
  121. LONG_SUBU a1, a1, 0x8
  122. ADDC(sum, t0)
  123. ADDC(sum, t1)
  124. #endif
  125. PTR_ADDU src, src, 0x8
  126. andi t8, src, 0x10
  127. oword_align:
  128. beqz t8, begin_movement
  129. LONG_SRL t8, a1, 0x7
  130. #ifdef USE_DOUBLE
  131. ld t0, 0x00(src)
  132. ld t1, 0x08(src)
  133. ADDC(sum, t0)
  134. ADDC(sum, t1)
  135. #else
  136. CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
  137. #endif
  138. LONG_SUBU a1, a1, 0x10
  139. PTR_ADDU src, src, 0x10
  140. LONG_SRL t8, a1, 0x7
  141. begin_movement:
  142. beqz t8, 1f
  143. andi t2, a1, 0x40
  144. move_128bytes:
  145. CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
  146. CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
  147. CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
  148. CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
  149. LONG_SUBU t8, t8, 0x01
  150. bnez t8, move_128bytes
  151. PTR_ADDU src, src, 0x80
  152. 1:
  153. beqz t2, 1f
  154. andi t2, a1, 0x20
  155. move_64bytes:
  156. CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
  157. CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
  158. PTR_ADDU src, src, 0x40
  159. 1:
  160. beqz t2, do_end_words
  161. andi t8, a1, 0x1c
  162. move_32bytes:
  163. CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
  164. andi t8, a1, 0x1c
  165. PTR_ADDU src, src, 0x20
  166. do_end_words:
  167. beqz t8, small_csumcpy
  168. andi t2, a1, 0x3
  169. LONG_SRL t8, t8, 0x2
  170. end_words:
  171. lw t0, (src)
  172. LONG_SUBU t8, t8, 0x1
  173. ADDC(sum, t0)
  174. bnez t8, end_words
  175. PTR_ADDU src, src, 0x4
  176. /* unknown src alignment and < 8 bytes to go */
  177. small_csumcpy:
  178. move a1, t2
  179. andi t0, a1, 4
  180. beqz t0, 1f
  181. andi t0, a1, 2
  182. /* Still a full word to go */
  183. ulw t1, (src)
  184. PTR_ADDIU src, 4
  185. ADDC(sum, t1)
  186. 1: move t1, zero
  187. beqz t0, 1f
  188. andi t0, a1, 1
  189. /* Still a halfword to go */
  190. ulhu t1, (src)
  191. PTR_ADDIU src, 2
  192. 1: beqz t0, 1f
  193. sll t1, t1, 16
  194. lbu t2, (src)
  195. nop
  196. #ifdef __MIPSEB__
  197. sll t2, t2, 8
  198. #endif
  199. or t1, t2
  200. 1: ADDC(sum, t1)
  201. /* fold checksum */
  202. #ifdef USE_DOUBLE
  203. dsll32 v1, sum, 0
  204. daddu sum, v1
  205. sltu v1, sum, v1
  206. dsra32 sum, sum, 0
  207. addu sum, v1
  208. #endif
  209. sll v1, sum, 16
  210. addu sum, v1
  211. sltu v1, sum, v1
  212. srl sum, sum, 16
  213. addu sum, v1
  214. /* odd buffer alignment? */
  215. beqz t7, 1f
  216. nop
  217. sll v1, sum, 8
  218. srl sum, sum, 8
  219. or sum, v1
  220. andi sum, 0xffff
  221. 1:
  222. .set reorder
  223. /* Add the passed partial csum. */
  224. ADDC(sum, a2)
  225. jr ra
  226. .set noreorder
  227. END(csum_partial)