muldi3.S 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. #include <linux/linkage.h>
  2. /*
  3. * Multiply operation for 64 bit integers, for devices with hard multiply
  4. * Input : Operand1[H] in Reg r5
  5. * Operand1[L] in Reg r6
  6. * Operand2[H] in Reg r7
  7. * Operand2[L] in Reg r8
  8. * Output: Result[H] in Reg r3
  9. * Result[L] in Reg r4
  10. *
  11. * Explaination:
  12. *
  13. * Both the input numbers are divided into 16 bit number as follows
  14. * op1 = A B C D
  15. * op2 = E F G H
  16. * result = D * H
  17. * + (C * H + D * G) << 16
  18. * + (B * H + C * G + D * F) << 32
  19. * + (A * H + B * G + C * F + D * E) << 48
  20. *
  21. * Only 64 bits of the output are considered
  22. */
  23. .text
  24. .globl __muldi3
  25. .type __muldi3, @function
  26. .ent __muldi3
  27. __muldi3:
  28. addi r1, r1, -40
  29. /* Save the input operands on the caller's stack */
  30. swi r5, r1, 44
  31. swi r6, r1, 48
  32. swi r7, r1, 52
  33. swi r8, r1, 56
  34. /* Store all the callee saved registers */
  35. sw r20, r1, r0
  36. swi r21, r1, 4
  37. swi r22, r1, 8
  38. swi r23, r1, 12
  39. swi r24, r1, 16
  40. swi r25, r1, 20
  41. swi r26, r1, 24
  42. swi r27, r1, 28
  43. /* Load all the 16 bit values for A thru H */
  44. lhui r20, r1, 44 /* A */
  45. lhui r21, r1, 46 /* B */
  46. lhui r22, r1, 48 /* C */
  47. lhui r23, r1, 50 /* D */
  48. lhui r24, r1, 52 /* E */
  49. lhui r25, r1, 54 /* F */
  50. lhui r26, r1, 56 /* G */
  51. lhui r27, r1, 58 /* H */
  52. /* D * H ==> LSB of the result on stack ==> Store1 */
  53. mul r9, r23, r27
  54. swi r9, r1, 36 /* Pos2 and Pos3 */
  55. /* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */
  56. /* Store the carry generated in position 2 for Pos 3 */
  57. lhui r11, r1, 36 /* Pos2 */
  58. mul r9, r22, r27 /* C * H */
  59. mul r10, r23, r26 /* D * G */
  60. add r9, r9, r10
  61. addc r12, r0, r0
  62. add r9, r9, r11
  63. addc r12, r12, r0 /* Store the Carry */
  64. shi r9, r1, 36 /* Store Pos2 */
  65. swi r9, r1, 32
  66. lhui r11, r1, 32
  67. shi r11, r1, 34 /* Store Pos1 */
  68. /* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */
  69. mul r9, r21, r27 /* B * H */
  70. mul r10, r22, r26 /* C * G */
  71. mul r7, r23, r25 /* D * F */
  72. add r9, r9, r11
  73. add r9, r9, r10
  74. add r9, r9, r7
  75. swi r9, r1, 32 /* Pos0 and Pos1 */
  76. /* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */
  77. lhui r11, r1, 32 /* Pos0 */
  78. mul r9, r20, r27 /* A * H */
  79. mul r10, r21, r26 /* B * G */
  80. mul r7, r22, r25 /* C * F */
  81. mul r8, r23, r24 /* D * E */
  82. add r9, r9, r11
  83. add r9, r9, r10
  84. add r9, r9, r7
  85. add r9, r9, r8
  86. sext16 r9, r9 /* Sign extend the MSB */
  87. shi r9, r1, 32
  88. /* Move results to r3 and r4 */
  89. lhui r3, r1, 32
  90. add r3, r3, r12
  91. shi r3, r1, 32
  92. lwi r3, r1, 32 /* Hi Part */
  93. lwi r4, r1, 36 /* Lo Part */
  94. /* Restore Callee saved registers */
  95. lw r20, r1, r0
  96. lwi r21, r1, 4
  97. lwi r22, r1, 8
  98. lwi r23, r1, 12
  99. lwi r24, r1, 16
  100. lwi r25, r1, 20
  101. lwi r26, r1, 24
  102. lwi r27, r1, 28
  103. /* Restore Frame and return */
  104. rtsd r15, 8
  105. addi r1, r1, 40
  106. .size __muldi3, . - __muldi3
  107. .end __muldi3