sdivsi3.S 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. .global __sdivsi3
  2. .section .text..SHmedia32,"ax"
  3. .align 2
  4. /* inputs: r4,r5 */
  5. /* clobbered: r1,r18,r19,r20,r21,r25,tr0 */
  6. /* result in r0 */
  7. __sdivsi3:
  8. ptb __div_table,tr0
  9. nsb r5, r1
  10. shlld r5, r1, r25 /* normalize; [-2 ..1, 1..2) in s2.62 */
  11. shari r25, 58, r21 /* extract 5(6) bit index (s2.4 with hole -1..1) */
  12. /* bubble */
  13. gettr tr0,r20
  14. ldx.ub r20, r21, r19 /* u0.8 */
  15. shari r25, 32, r25 /* normalize to s2.30 */
  16. shlli r21, 1, r21
  17. muls.l r25, r19, r19 /* s2.38 */
  18. ldx.w r20, r21, r21 /* s2.14 */
  19. ptabs r18, tr0
  20. shari r19, 24, r19 /* truncate to s2.14 */
  21. sub r21, r19, r19 /* some 11 bit inverse in s1.14 */
  22. muls.l r19, r19, r21 /* u0.28 */
  23. sub r63, r1, r1
  24. addi r1, 92, r1
  25. muls.l r25, r21, r18 /* s2.58 */
  26. shlli r19, 45, r19 /* multiply by two and convert to s2.58 */
  27. /* bubble */
  28. sub r19, r18, r18
  29. shari r18, 28, r18 /* some 22 bit inverse in s1.30 */
  30. muls.l r18, r25, r0 /* s2.60 */
  31. muls.l r18, r4, r25 /* s32.30 */
  32. /* bubble */
  33. shari r0, 16, r19 /* s-16.44 */
  34. muls.l r19, r18, r19 /* s-16.74 */
  35. shari r25, 63, r0
  36. shari r4, 14, r18 /* s19.-14 */
  37. shari r19, 30, r19 /* s-16.44 */
  38. muls.l r19, r18, r19 /* s15.30 */
  39. xor r21, r0, r21 /* You could also use the constant 1 << 27. */
  40. add r21, r25, r21
  41. sub r21, r19, r21
  42. shard r21, r1, r21
  43. sub r21, r0, r0
  44. blink tr0, r63
  45. /* This table has been generated by divtab.c .
  46. Defects for bias -330:
  47. Max defect: 6.081536e-07 at -1.000000e+00
  48. Min defect: 2.849516e-08 at 1.030651e+00
  49. Max 2nd step defect: 9.606539e-12 at -1.000000e+00
  50. Min 2nd step defect: 0.000000e+00 at 0.000000e+00
  51. Defect at 1: 1.238659e-07
  52. Defect at -2: 1.061708e-07 */
  53. .balign 2
  54. .type __div_table,@object
  55. .size __div_table,128
  56. /* negative division constants */
  57. .word -16638
  58. .word -17135
  59. .word -17737
  60. .word -18433
  61. .word -19103
  62. .word -19751
  63. .word -20583
  64. .word -21383
  65. .word -22343
  66. .word -23353
  67. .word -24407
  68. .word -25582
  69. .word -26863
  70. .word -28382
  71. .word -29965
  72. .word -31800
  73. /* negative division factors */
  74. .byte 66
  75. .byte 70
  76. .byte 75
  77. .byte 81
  78. .byte 87
  79. .byte 93
  80. .byte 101
  81. .byte 109
  82. .byte 119
  83. .byte 130
  84. .byte 142
  85. .byte 156
  86. .byte 172
  87. .byte 192
  88. .byte 214
  89. .byte 241
  90. .skip 16
  91. .global __div_table
  92. __div_table:
  93. .skip 16
  94. /* positive division factors */
  95. .byte 241
  96. .byte 214
  97. .byte 192
  98. .byte 172
  99. .byte 156
  100. .byte 142
  101. .byte 130
  102. .byte 119
  103. .byte 109
  104. .byte 101
  105. .byte 93
  106. .byte 87
  107. .byte 81
  108. .byte 75
  109. .byte 70
  110. .byte 66
  111. /* positive division constants */
  112. .word 31801
  113. .word 29966
  114. .word 28383
  115. .word 26864
  116. .word 25583
  117. .word 24408
  118. .word 23354
  119. .word 22344
  120. .word 21384
  121. .word 20584
  122. .word 19752
  123. .word 19104
  124. .word 18434
  125. .word 17738
  126. .word 17136
  127. .word 16639