ip_fast_csum.S 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. /*
  2. * Optmized version of the ip_fast_csum() function
  3. * Used for calculating IP header checksum
  4. *
  5. * Return: 16bit checksum, complemented
  6. *
  7. * Inputs:
  8. * in0: address of buffer to checksum (char *)
  9. * in1: length of the buffer (int)
  10. *
  11. * Copyright (C) 2002 Intel Corp.
  12. * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
  13. */
  14. #include <asm/asmmacro.h>
  15. /*
  16. * Since we know that most likely this function is called with buf aligned
  17. * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
  18. * versus calling generic version of do_csum, which has lots of overhead in
  19. * handling various alignments and sizes. However, due to lack of constrains
  20. * put on the function input argument, cases with alignment not on 4-byte or
  21. * size not equal to 20 bytes will be handled by the generic do_csum function.
  22. */
  23. #define in0 r32
  24. #define in1 r33
  25. #define ret0 r8
  26. GLOBAL_ENTRY(ip_fast_csum)
  27. .prologue
  28. .body
  29. cmp.ne p6,p7=5,in1 // size other than 20 byte?
  30. and r14=3,in0 // is it aligned on 4-byte?
  31. add r15=4,in0 // second source pointer
  32. ;;
  33. cmp.ne.or.andcm p6,p7=r14,r0
  34. ;;
  35. (p7) ld4 r20=[in0],8
  36. (p7) ld4 r21=[r15],8
  37. (p6) br.spnt .generic
  38. ;;
  39. ld4 r22=[in0],8
  40. ld4 r23=[r15],8
  41. ;;
  42. ld4 r24=[in0]
  43. add r20=r20,r21
  44. add r22=r22,r23
  45. ;;
  46. add r20=r20,r22
  47. ;;
  48. add r20=r20,r24
  49. ;;
  50. shr.u ret0=r20,16 // now need to add the carry
  51. zxt2 r20=r20
  52. ;;
  53. add r20=ret0,r20
  54. ;;
  55. shr.u ret0=r20,16 // add carry again
  56. zxt2 r20=r20
  57. ;;
  58. add r20=ret0,r20
  59. ;;
  60. shr.u ret0=r20,16
  61. zxt2 r20=r20
  62. ;;
  63. add r20=ret0,r20
  64. ;;
  65. andcm ret0=-1,r20
  66. .restore sp // reset frame state
  67. br.ret.sptk.many b0
  68. ;;
  69. .generic:
  70. .prologue
  71. .save ar.pfs, r35
  72. alloc r35=ar.pfs,2,2,2,0
  73. .save rp, r34
  74. mov r34=b0
  75. .body
  76. dep.z out1=in1,2,30
  77. mov out0=in0
  78. ;;
  79. br.call.sptk.many b0=do_csum
  80. ;;
  81. andcm ret0=-1,ret0
  82. mov ar.pfs=r35
  83. mov b0=r34
  84. br.ret.sptk.many b0
  85. END(ip_fast_csum)