udivsi3-Os.S 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. /* Copyright (C) 2006 Free Software Foundation, Inc.
  2. This file is free software; you can redistribute it and/or modify it
  3. under the terms of the GNU General Public License as published by the
  4. Free Software Foundation; either version 2, or (at your option) any
  5. later version.
  6. In addition to the permissions in the GNU General Public License, the
  7. Free Software Foundation gives you unlimited permission to link the
  8. compiled version of this file into combinations with other programs,
  9. and to distribute those combinations without any restriction coming
  10. from the use of this file. (The General Public License restrictions
  11. do apply in other respects; for example, they cover modification of
  12. the file, and distribution when not linked into a combine
  13. executable.)
  14. This file is distributed in the hope that it will be useful, but
  15. WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program; see the file COPYING. If not, write to
  20. the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  21. Boston, MA 02110-1301, USA. */
  22. /* Moderately Space-optimized libgcc routines for the Renesas SH /
  23. STMicroelectronics ST40 CPUs.
  24. Contributed by J"orn Rennecke joern.rennecke@st.com. */
  25. /* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
  26. sh4-200 run times:
  27. udiv small divisor: 55 cycles
  28. udiv large divisor: 52 cycles
  29. sdiv small divisor, positive result: 59 cycles
  30. sdiv large divisor, positive result: 56 cycles
  31. sdiv small divisor, negative result: 65 cycles (*)
  32. sdiv large divisor, negative result: 62 cycles (*)
  33. (*): r2 is restored in the rts delay slot and has a lingering latency
  34. of two more cycles. */
  35. .balign 4
  36. .global __udivsi3_i4i
  37. .global __udivsi3_i4
  38. .global __udivsi3
  39. .set __udivsi3_i4, __udivsi3_i4i
  40. .set __udivsi3, __udivsi3_i4i
  41. .type __udivsi3_i4i, @function
  42. .type __sdivsi3_i4i, @function
  43. __udivsi3_i4i:
  44. sts pr,r1
  45. mov.l r4,@-r15
  46. extu.w r5,r0
  47. cmp/eq r5,r0
  48. swap.w r4,r0
  49. shlr16 r4
  50. bf/s large_divisor
  51. div0u
  52. mov.l r5,@-r15
  53. shll16 r5
  54. sdiv_small_divisor:
  55. div1 r5,r4
  56. bsr div6
  57. div1 r5,r4
  58. div1 r5,r4
  59. bsr div6
  60. div1 r5,r4
  61. xtrct r4,r0
  62. xtrct r0,r4
  63. bsr div7
  64. swap.w r4,r4
  65. div1 r5,r4
  66. bsr div7
  67. div1 r5,r4
  68. xtrct r4,r0
  69. mov.l @r15+,r5
  70. swap.w r0,r0
  71. mov.l @r15+,r4
  72. jmp @r1
  73. rotcl r0
  74. div7:
  75. div1 r5,r4
  76. div6:
  77. div1 r5,r4; div1 r5,r4; div1 r5,r4
  78. div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
  79. divx3:
  80. rotcl r0
  81. div1 r5,r4
  82. rotcl r0
  83. div1 r5,r4
  84. rotcl r0
  85. rts
  86. div1 r5,r4
  87. large_divisor:
  88. mov.l r5,@-r15
  89. sdiv_large_divisor:
  90. xor r4,r0
  91. .rept 4
  92. rotcl r0
  93. bsr divx3
  94. div1 r5,r4
  95. .endr
  96. mov.l @r15+,r5
  97. mov.l @r15+,r4
  98. jmp @r1
  99. rotcl r0
  100. .global __sdivsi3_i4i
  101. .global __sdivsi3_i4
  102. .global __sdivsi3
  103. .set __sdivsi3_i4, __sdivsi3_i4i
  104. .set __sdivsi3, __sdivsi3_i4i
  105. __sdivsi3_i4i:
  106. mov.l r4,@-r15
  107. cmp/pz r5
  108. mov.l r5,@-r15
  109. bt/s pos_divisor
  110. cmp/pz r4
  111. neg r5,r5
  112. extu.w r5,r0
  113. bt/s neg_result
  114. cmp/eq r5,r0
  115. neg r4,r4
  116. pos_result:
  117. swap.w r4,r0
  118. bra sdiv_check_divisor
  119. sts pr,r1
  120. pos_divisor:
  121. extu.w r5,r0
  122. bt/s pos_result
  123. cmp/eq r5,r0
  124. neg r4,r4
  125. neg_result:
  126. mova negate_result,r0
  127. ;
  128. mov r0,r1
  129. swap.w r4,r0
  130. lds r2,macl
  131. sts pr,r2
  132. sdiv_check_divisor:
  133. shlr16 r4
  134. bf/s sdiv_large_divisor
  135. div0u
  136. bra sdiv_small_divisor
  137. shll16 r5
  138. .balign 4
  139. negate_result:
  140. neg r0,r0
  141. jmp @r2
  142. sts macl,r2