do_csum.S 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. /* Optimised simple memory checksum
  2. *
  3. * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public Licence
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the Licence, or (at your option) any later version.
  10. */
  11. #include <asm/cache.h>
  12. .section .text
  13. .balign L1_CACHE_BYTES
  14. ###############################################################################
  15. #
  16. # unsigned int do_csum(const unsigned char *buff, size_t len)
  17. #
  18. ###############################################################################
  19. .globl do_csum
  20. .type do_csum,@function
  21. do_csum:
  22. movm [d2,d3],(sp)
  23. mov d0,(12,sp)
  24. mov d1,(16,sp)
  25. mov d1,d2 # count
  26. mov d0,a0 # buff
  27. clr d1 # accumulator
  28. cmp +0,d2
  29. beq do_csum_done # return if zero-length buffer
  30. # 4-byte align the buffer pointer
  31. btst +3,a0
  32. beq do_csum_now_4b_aligned
  33. btst +1,a0
  34. beq do_csum_addr_not_odd
  35. movbu (a0),d0
  36. inc a0
  37. asl +8,d0
  38. add d0,d1
  39. addc +0,d1
  40. add -1,d2
  41. do_csum_addr_not_odd:
  42. cmp +2,d2
  43. bcs do_csum_fewer_than_4
  44. btst +2,a0
  45. beq do_csum_now_4b_aligned
  46. movhu (a0+),d0
  47. add d0,d1
  48. addc +0,d1
  49. add -2,d2
  50. cmp +4,d2
  51. bcs do_csum_fewer_than_4
  52. do_csum_now_4b_aligned:
  53. # we want to checksum as much as we can in chunks of 32 bytes
  54. cmp +31,d2
  55. bls do_csum_remainder # 4-byte aligned remainder
  56. add -32,d2
  57. mov +32,d3
  58. do_csum_loop:
  59. mov (a0+),d0
  60. add d0,d1
  61. mov (a0+),e0
  62. addc e0,d1
  63. mov (a0+),e1
  64. addc e1,d1
  65. mov (a0+),e3
  66. addc e3,d1
  67. mov (a0+),d0
  68. addc d0,d1
  69. mov (a0+),e0
  70. addc e0,d1
  71. mov (a0+),e1
  72. addc e1,d1
  73. mov (a0+),e3
  74. addc e3,d1
  75. addc +0,d1
  76. sub d3,d2
  77. bcc do_csum_loop
  78. add d3,d2
  79. beq do_csum_done
  80. do_csum_remainder:
  81. # cut 16-31 bytes down to 0-15
  82. cmp +16,d2
  83. bcs do_csum_fewer_than_16
  84. mov (a0+),d0
  85. add d0,d1
  86. mov (a0+),e0
  87. addc e0,d1
  88. mov (a0+),e1
  89. addc e1,d1
  90. mov (a0+),e3
  91. addc e3,d1
  92. addc +0,d1
  93. add -16,d2
  94. beq do_csum_done
  95. do_csum_fewer_than_16:
  96. # copy the remaining whole words
  97. cmp +4,d2
  98. bcs do_csum_fewer_than_4
  99. cmp +8,d2
  100. bcs do_csum_one_word
  101. cmp +12,d2
  102. bcs do_csum_two_words
  103. mov (a0+),d0
  104. add d0,d1
  105. addc +0,d1
  106. do_csum_two_words:
  107. mov (a0+),d0
  108. add d0,d1
  109. addc +0,d1
  110. do_csum_one_word:
  111. mov (a0+),d0
  112. add d0,d1
  113. addc +0,d1
  114. do_csum_fewer_than_4:
  115. and +3,d2
  116. beq do_csum_done
  117. xor_cmp d0,d0,+2,d2
  118. bcs do_csum_fewer_than_2
  119. movhu (a0+),d0
  120. do_csum_fewer_than_2:
  121. and +1,d2
  122. beq do_csum_add_last_bit
  123. movbu (a0),d3
  124. add d3,d0
  125. do_csum_add_last_bit:
  126. add d0,d1
  127. addc +0,d1
  128. do_csum_done:
  129. # compress the checksum down to 16 bits
  130. mov +0xffff0000,d2
  131. and d1,d2
  132. asl +16,d1
  133. add d2,d1,d0
  134. addc +0xffff,d0
  135. lsr +16,d0
  136. # flip the halves of the word result if the buffer was oddly aligned
  137. mov (12,sp),d1
  138. and +1,d1
  139. beq do_csum_not_oddly_aligned
  140. swaph d0,d0 # exchange bits 15:8 with 7:0
  141. do_csum_not_oddly_aligned:
  142. ret [d2,d3],8
  143. do_csum_end:
  144. .size do_csum, do_csum_end-do_csum