udivsi3.S 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668
  1. /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
  2. 2004, 2005, 2006
  3. Free Software Foundation, Inc.
  4. This file is free software; you can redistribute it and/or modify it
  5. under the terms of the GNU General Public License as published by the
  6. Free Software Foundation; either version 2, or (at your option) any
  7. later version.
  8. In addition to the permissions in the GNU General Public License, the
  9. Free Software Foundation gives you unlimited permission to link the
  10. compiled version of this file into combinations with other programs,
  11. and to distribute those combinations without any restriction coming
  12. from the use of this file. (The General Public License restrictions
  13. do apply in other respects; for example, they cover modification of
  14. the file, and distribution when not linked into a combine
  15. executable.)
  16. This file is distributed in the hope that it will be useful, but
  17. WITHOUT ANY WARRANTY; without even the implied warranty of
  18. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  19. General Public License for more details.
  20. You should have received a copy of the GNU General Public License
  21. along with this program; see the file COPYING. If not, write to
  22. the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  23. Boston, MA 02110-1301, USA. */
  24. !! libgcc routines for the Renesas / SuperH SH CPUs.
  25. !! Contributed by Steve Chamberlain.
  26. !! sac@cygnus.com
  27. !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
  28. !! recoded in assembly by Toshiyasu Morita
  29. !! tm@netcom.com
  30. /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
  31. ELF local label prefixes by J"orn Rennecke
  32. amylaar@cygnus.com */
  33. /* This code used shld, thus is not suitable for SH1 / SH2. */
  34. /* Signed / unsigned division without use of FPU, optimized for SH4.
  35. Uses a lookup table for divisors in the range -128 .. +128, and
  36. div1 with case distinction for larger divisors in three more ranges.
  37. The code is lumped together with the table to allow the use of mova. */
  38. #ifdef CONFIG_CPU_LITTLE_ENDIAN
  39. #define L_LSB 0
  40. #define L_LSWMSB 1
  41. #define L_MSWLSB 2
  42. #else
  43. #define L_LSB 3
  44. #define L_LSWMSB 2
  45. #define L_MSWLSB 1
  46. #endif
  47. .balign 4
  48. .global __udivsi3_i4i
  49. .global __udivsi3_i4
  50. .global __udivsi3
  51. .set __udivsi3_i4, __udivsi3_i4i
  52. .set __udivsi3, __udivsi3_i4i
  53. .type __udivsi3_i4i, @function
  54. __udivsi3_i4i:
  55. mov.w c128_w, r1
  56. div0u
  57. mov r4,r0
  58. shlr8 r0
  59. cmp/hi r1,r5
  60. extu.w r5,r1
  61. bf udiv_le128
  62. cmp/eq r5,r1
  63. bf udiv_ge64k
  64. shlr r0
  65. mov r5,r1
  66. shll16 r5
  67. mov.l r4,@-r15
  68. div1 r5,r0
  69. mov.l r1,@-r15
  70. div1 r5,r0
  71. div1 r5,r0
  72. bra udiv_25
  73. div1 r5,r0
  74. div_le128:
  75. mova div_table_ix,r0
  76. bra div_le128_2
  77. mov.b @(r0,r5),r1
  78. udiv_le128:
  79. mov.l r4,@-r15
  80. mova div_table_ix,r0
  81. mov.b @(r0,r5),r1
  82. mov.l r5,@-r15
  83. div_le128_2:
  84. mova div_table_inv,r0
  85. mov.l @(r0,r1),r1
  86. mov r5,r0
  87. tst #0xfe,r0
  88. mova div_table_clz,r0
  89. dmulu.l r1,r4
  90. mov.b @(r0,r5),r1
  91. bt/s div_by_1
  92. mov r4,r0
  93. mov.l @r15+,r5
  94. sts mach,r0
  95. /* clrt */
  96. addc r4,r0
  97. mov.l @r15+,r4
  98. rotcr r0
  99. rts
  100. shld r1,r0
  101. div_by_1_neg:
  102. neg r4,r0
  103. div_by_1:
  104. mov.l @r15+,r5
  105. rts
  106. mov.l @r15+,r4
  107. div_ge64k:
  108. bt/s div_r8
  109. div0u
  110. shll8 r5
  111. bra div_ge64k_2
  112. div1 r5,r0
  113. udiv_ge64k:
  114. cmp/hi r0,r5
  115. mov r5,r1
  116. bt udiv_r8
  117. shll8 r5
  118. mov.l r4,@-r15
  119. div1 r5,r0
  120. mov.l r1,@-r15
  121. div_ge64k_2:
  122. div1 r5,r0
  123. mov.l zero_l,r1
  124. .rept 4
  125. div1 r5,r0
  126. .endr
  127. mov.l r1,@-r15
  128. div1 r5,r0
  129. mov.w m256_w,r1
  130. div1 r5,r0
  131. mov.b r0,@(L_LSWMSB,r15)
  132. xor r4,r0
  133. and r1,r0
  134. bra div_ge64k_end
  135. xor r4,r0
  136. div_r8:
  137. shll16 r4
  138. bra div_r8_2
  139. shll8 r4
  140. udiv_r8:
  141. mov.l r4,@-r15
  142. shll16 r4
  143. clrt
  144. shll8 r4
  145. mov.l r5,@-r15
  146. div_r8_2:
  147. rotcl r4
  148. mov r0,r1
  149. div1 r5,r1
  150. mov r4,r0
  151. rotcl r0
  152. mov r5,r4
  153. div1 r5,r1
  154. .rept 5
  155. rotcl r0; div1 r5,r1
  156. .endr
  157. rotcl r0
  158. mov.l @r15+,r5
  159. div1 r4,r1
  160. mov.l @r15+,r4
  161. rts
  162. rotcl r0
  163. .global __sdivsi3_i4i
  164. .global __sdivsi3_i4
  165. .global __sdivsi3
  166. .set __sdivsi3_i4, __sdivsi3_i4i
  167. .set __sdivsi3, __sdivsi3_i4i
  168. .type __sdivsi3_i4i, @function
  169. /* This is link-compatible with a __sdivsi3 call,
  170. but we effectively clobber only r1. */
  171. __sdivsi3_i4i:
  172. mov.l r4,@-r15
  173. cmp/pz r5
  174. mov.w c128_w, r1
  175. bt/s pos_divisor
  176. cmp/pz r4
  177. mov.l r5,@-r15
  178. neg r5,r5
  179. bt/s neg_result
  180. cmp/hi r1,r5
  181. neg r4,r4
  182. pos_result:
  183. extu.w r5,r0
  184. bf div_le128
  185. cmp/eq r5,r0
  186. mov r4,r0
  187. shlr8 r0
  188. bf/s div_ge64k
  189. cmp/hi r0,r5
  190. div0u
  191. shll16 r5
  192. div1 r5,r0
  193. div1 r5,r0
  194. div1 r5,r0
  195. udiv_25:
  196. mov.l zero_l,r1
  197. div1 r5,r0
  198. div1 r5,r0
  199. mov.l r1,@-r15
  200. .rept 3
  201. div1 r5,r0
  202. .endr
  203. mov.b r0,@(L_MSWLSB,r15)
  204. xtrct r4,r0
  205. swap.w r0,r0
  206. .rept 8
  207. div1 r5,r0
  208. .endr
  209. mov.b r0,@(L_LSWMSB,r15)
  210. div_ge64k_end:
  211. .rept 8
  212. div1 r5,r0
  213. .endr
  214. mov.l @r15+,r4 ! zero-extension and swap using LS unit.
  215. extu.b r0,r0
  216. mov.l @r15+,r5
  217. or r4,r0
  218. mov.l @r15+,r4
  219. rts
  220. rotcl r0
  221. div_le128_neg:
  222. tst #0xfe,r0
  223. mova div_table_ix,r0
  224. mov.b @(r0,r5),r1
  225. mova div_table_inv,r0
  226. bt/s div_by_1_neg
  227. mov.l @(r0,r1),r1
  228. mova div_table_clz,r0
  229. dmulu.l r1,r4
  230. mov.b @(r0,r5),r1
  231. mov.l @r15+,r5
  232. sts mach,r0
  233. /* clrt */
  234. addc r4,r0
  235. mov.l @r15+,r4
  236. rotcr r0
  237. shld r1,r0
  238. rts
  239. neg r0,r0
  240. pos_divisor:
  241. mov.l r5,@-r15
  242. bt/s pos_result
  243. cmp/hi r1,r5
  244. neg r4,r4
  245. neg_result:
  246. extu.w r5,r0
  247. bf div_le128_neg
  248. cmp/eq r5,r0
  249. mov r4,r0
  250. shlr8 r0
  251. bf/s div_ge64k_neg
  252. cmp/hi r0,r5
  253. div0u
  254. mov.l zero_l,r1
  255. shll16 r5
  256. div1 r5,r0
  257. mov.l r1,@-r15
  258. .rept 7
  259. div1 r5,r0
  260. .endr
  261. mov.b r0,@(L_MSWLSB,r15)
  262. xtrct r4,r0
  263. swap.w r0,r0
  264. .rept 8
  265. div1 r5,r0
  266. .endr
  267. mov.b r0,@(L_LSWMSB,r15)
  268. div_ge64k_neg_end:
  269. .rept 8
  270. div1 r5,r0
  271. .endr
  272. mov.l @r15+,r4 ! zero-extension and swap using LS unit.
  273. extu.b r0,r1
  274. mov.l @r15+,r5
  275. or r4,r1
  276. div_r8_neg_end:
  277. mov.l @r15+,r4
  278. rotcl r1
  279. rts
  280. neg r1,r0
  281. div_ge64k_neg:
  282. bt/s div_r8_neg
  283. div0u
  284. shll8 r5
  285. mov.l zero_l,r1
  286. .rept 6
  287. div1 r5,r0
  288. .endr
  289. mov.l r1,@-r15
  290. div1 r5,r0
  291. mov.w m256_w,r1
  292. div1 r5,r0
  293. mov.b r0,@(L_LSWMSB,r15)
  294. xor r4,r0
  295. and r1,r0
  296. bra div_ge64k_neg_end
  297. xor r4,r0
  298. c128_w:
  299. .word 128
  300. div_r8_neg:
  301. clrt
  302. shll16 r4
  303. mov r4,r1
  304. shll8 r1
  305. mov r5,r4
  306. .rept 7
  307. rotcl r1; div1 r5,r0
  308. .endr
  309. mov.l @r15+,r5
  310. rotcl r1
  311. bra div_r8_neg_end
  312. div1 r4,r0
  313. m256_w:
  314. .word 0xff00
  315. /* This table has been generated by divtab-sh4.c. */
  316. .balign 4
  317. div_table_clz:
  318. .byte 0
  319. .byte 1
  320. .byte 0
  321. .byte -1
  322. .byte -1
  323. .byte -2
  324. .byte -2
  325. .byte -2
  326. .byte -2
  327. .byte -3
  328. .byte -3
  329. .byte -3
  330. .byte -3
  331. .byte -3
  332. .byte -3
  333. .byte -3
  334. .byte -3
  335. .byte -4
  336. .byte -4
  337. .byte -4
  338. .byte -4
  339. .byte -4
  340. .byte -4
  341. .byte -4
  342. .byte -4
  343. .byte -4
  344. .byte -4
  345. .byte -4
  346. .byte -4
  347. .byte -4
  348. .byte -4
  349. .byte -4
  350. .byte -4
  351. .byte -5
  352. .byte -5
  353. .byte -5
  354. .byte -5
  355. .byte -5
  356. .byte -5
  357. .byte -5
  358. .byte -5
  359. .byte -5
  360. .byte -5
  361. .byte -5
  362. .byte -5
  363. .byte -5
  364. .byte -5
  365. .byte -5
  366. .byte -5
  367. .byte -5
  368. .byte -5
  369. .byte -5
  370. .byte -5
  371. .byte -5
  372. .byte -5
  373. .byte -5
  374. .byte -5
  375. .byte -5
  376. .byte -5
  377. .byte -5
  378. .byte -5
  379. .byte -5
  380. .byte -5
  381. .byte -5
  382. .byte -5
  383. .byte -6
  384. .byte -6
  385. .byte -6
  386. .byte -6
  387. .byte -6
  388. .byte -6
  389. .byte -6
  390. .byte -6
  391. .byte -6
  392. .byte -6
  393. .byte -6
  394. .byte -6
  395. .byte -6
  396. .byte -6
  397. .byte -6
  398. .byte -6
  399. .byte -6
  400. .byte -6
  401. .byte -6
  402. .byte -6
  403. .byte -6
  404. .byte -6
  405. .byte -6
  406. .byte -6
  407. .byte -6
  408. .byte -6
  409. .byte -6
  410. .byte -6
  411. .byte -6
  412. .byte -6
  413. .byte -6
  414. .byte -6
  415. .byte -6
  416. .byte -6
  417. .byte -6
  418. .byte -6
  419. .byte -6
  420. .byte -6
  421. .byte -6
  422. .byte -6
  423. .byte -6
  424. .byte -6
  425. .byte -6
  426. .byte -6
  427. .byte -6
  428. .byte -6
  429. .byte -6
  430. .byte -6
  431. .byte -6
  432. .byte -6
  433. .byte -6
  434. .byte -6
  435. .byte -6
  436. .byte -6
  437. .byte -6
  438. .byte -6
  439. .byte -6
  440. .byte -6
  441. .byte -6
  442. .byte -6
  443. .byte -6
  444. .byte -6
  445. .byte -6
  446. /* Lookup table translating positive divisor to index into table of
  447. normalized inverse. N.B. the '0' entry is also the last entry of the
  448. previous table, and causes an unaligned access for division by zero. */
  449. div_table_ix:
  450. .byte -6
  451. .byte -128
  452. .byte -128
  453. .byte 0
  454. .byte -128
  455. .byte -64
  456. .byte 0
  457. .byte 64
  458. .byte -128
  459. .byte -96
  460. .byte -64
  461. .byte -32
  462. .byte 0
  463. .byte 32
  464. .byte 64
  465. .byte 96
  466. .byte -128
  467. .byte -112
  468. .byte -96
  469. .byte -80
  470. .byte -64
  471. .byte -48
  472. .byte -32
  473. .byte -16
  474. .byte 0
  475. .byte 16
  476. .byte 32
  477. .byte 48
  478. .byte 64
  479. .byte 80
  480. .byte 96
  481. .byte 112
  482. .byte -128
  483. .byte -120
  484. .byte -112
  485. .byte -104
  486. .byte -96
  487. .byte -88
  488. .byte -80
  489. .byte -72
  490. .byte -64
  491. .byte -56
  492. .byte -48
  493. .byte -40
  494. .byte -32
  495. .byte -24
  496. .byte -16
  497. .byte -8
  498. .byte 0
  499. .byte 8
  500. .byte 16
  501. .byte 24
  502. .byte 32
  503. .byte 40
  504. .byte 48
  505. .byte 56
  506. .byte 64
  507. .byte 72
  508. .byte 80
  509. .byte 88
  510. .byte 96
  511. .byte 104
  512. .byte 112
  513. .byte 120
  514. .byte -128
  515. .byte -124
  516. .byte -120
  517. .byte -116
  518. .byte -112
  519. .byte -108
  520. .byte -104
  521. .byte -100
  522. .byte -96
  523. .byte -92
  524. .byte -88
  525. .byte -84
  526. .byte -80
  527. .byte -76
  528. .byte -72
  529. .byte -68
  530. .byte -64
  531. .byte -60
  532. .byte -56
  533. .byte -52
  534. .byte -48
  535. .byte -44
  536. .byte -40
  537. .byte -36
  538. .byte -32
  539. .byte -28
  540. .byte -24
  541. .byte -20
  542. .byte -16
  543. .byte -12
  544. .byte -8
  545. .byte -4
  546. .byte 0
  547. .byte 4
  548. .byte 8
  549. .byte 12
  550. .byte 16
  551. .byte 20
  552. .byte 24
  553. .byte 28
  554. .byte 32
  555. .byte 36
  556. .byte 40
  557. .byte 44
  558. .byte 48
  559. .byte 52
  560. .byte 56
  561. .byte 60
  562. .byte 64
  563. .byte 68
  564. .byte 72
  565. .byte 76
  566. .byte 80
  567. .byte 84
  568. .byte 88
  569. .byte 92
  570. .byte 96
  571. .byte 100
  572. .byte 104
  573. .byte 108
  574. .byte 112
  575. .byte 116
  576. .byte 120
  577. .byte 124
  578. .byte -128
  579. /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
  580. .balign 4
  581. zero_l:
  582. .long 0x0
  583. .long 0xF81F81F9
  584. .long 0xF07C1F08
  585. .long 0xE9131AC0
  586. .long 0xE1E1E1E2
  587. .long 0xDAE6076C
  588. .long 0xD41D41D5
  589. .long 0xCD856891
  590. .long 0xC71C71C8
  591. .long 0xC0E07039
  592. .long 0xBACF914D
  593. .long 0xB4E81B4F
  594. .long 0xAF286BCB
  595. .long 0xA98EF607
  596. .long 0xA41A41A5
  597. .long 0x9EC8E952
  598. .long 0x9999999A
  599. .long 0x948B0FCE
  600. .long 0x8F9C18FA
  601. .long 0x8ACB90F7
  602. .long 0x86186187
  603. .long 0x81818182
  604. .long 0x7D05F418
  605. .long 0x78A4C818
  606. .long 0x745D1746
  607. .long 0x702E05C1
  608. .long 0x6C16C16D
  609. .long 0x68168169
  610. .long 0x642C8591
  611. .long 0x60581606
  612. .long 0x5C9882BA
  613. .long 0x58ED2309
  614. div_table_inv:
  615. .long 0x55555556
  616. .long 0x51D07EAF
  617. .long 0x4E5E0A73
  618. .long 0x4AFD6A06
  619. .long 0x47AE147B
  620. .long 0x446F8657
  621. .long 0x41414142
  622. .long 0x3E22CBCF
  623. .long 0x3B13B13C
  624. .long 0x38138139
  625. .long 0x3521CFB3
  626. .long 0x323E34A3
  627. .long 0x2F684BDB
  628. .long 0x2C9FB4D9
  629. .long 0x29E4129F
  630. .long 0x27350B89
  631. .long 0x24924925
  632. .long 0x21FB7813
  633. .long 0x1F7047DD
  634. .long 0x1CF06ADB
  635. .long 0x1A7B9612
  636. .long 0x18118119
  637. .long 0x15B1E5F8
  638. .long 0x135C8114
  639. .long 0x11111112
  640. .long 0xECF56BF
  641. .long 0xC9714FC
  642. .long 0xA6810A7
  643. .long 0x8421085
  644. .long 0x624DD30
  645. .long 0x4104105
  646. .long 0x2040811
  647. /* maximum error: 0.987342 scaled: 0.921875*/