udivsi3.S 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664
  1. /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
  2. 2004, 2005, 2006
  3. Free Software Foundation, Inc.
  4. This file is free software; you can redistribute it and/or modify it
  5. under the terms of the GNU General Public License as published by the
  6. Free Software Foundation; either version 2, or (at your option) any
  7. later version.
  8. In addition to the permissions in the GNU General Public License, the
  9. Free Software Foundation gives you unlimited permission to link the
  10. compiled version of this file into combinations with other programs,
  11. and to distribute those combinations without any restriction coming
  12. from the use of this file. (The General Public License restrictions
  13. do apply in other respects; for example, they cover modification of
  14. the file, and distribution when not linked into a combine
  15. executable.)
  16. This file is distributed in the hope that it will be useful, but
  17. WITHOUT ANY WARRANTY; without even the implied warranty of
  18. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  19. General Public License for more details.
  20. You should have received a copy of the GNU General Public License
  21. along with this program; see the file COPYING. If not, write to
  22. the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  23. Boston, MA 02110-1301, USA. */
  24. !! libgcc routines for the Renesas / SuperH SH CPUs.
  25. !! Contributed by Steve Chamberlain.
  26. !! sac@cygnus.com
  27. !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
  28. !! recoded in assembly by Toshiyasu Morita
  29. !! tm@netcom.com
  30. /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
  31. ELF local label prefixes by J"orn Rennecke
  32. amylaar@cygnus.com */
  33. /* This code used shld, thus is not suitable for SH1 / SH2. */
  34. /* Signed / unsigned division without use of FPU, optimized for SH4.
  35. Uses a lookup table for divisors in the range -128 .. +128, and
  36. div1 with case distinction for larger divisors in three more ranges.
  37. The code is lumped together with the table to allow the use of mova. */
  38. #ifdef CONFIG_CPU_LITTLE_ENDIAN
  39. #define L_LSB 0
  40. #define L_LSWMSB 1
  41. #define L_MSWLSB 2
  42. #else
  43. #define L_LSB 3
  44. #define L_LSWMSB 2
  45. #define L_MSWLSB 1
  46. #endif
  47. .balign 4
  48. .global __udivsi3_i4i
  49. .global __udivsi3
  50. .set __udivsi3, __udivsi3_i4i
  51. .type __udivsi3_i4i, @function
  52. __udivsi3_i4i:
  53. mov.w c128_w, r1
  54. div0u
  55. mov r4,r0
  56. shlr8 r0
  57. cmp/hi r1,r5
  58. extu.w r5,r1
  59. bf udiv_le128
  60. cmp/eq r5,r1
  61. bf udiv_ge64k
  62. shlr r0
  63. mov r5,r1
  64. shll16 r5
  65. mov.l r4,@-r15
  66. div1 r5,r0
  67. mov.l r1,@-r15
  68. div1 r5,r0
  69. div1 r5,r0
  70. bra udiv_25
  71. div1 r5,r0
  72. div_le128:
  73. mova div_table_ix,r0
  74. bra div_le128_2
  75. mov.b @(r0,r5),r1
  76. udiv_le128:
  77. mov.l r4,@-r15
  78. mova div_table_ix,r0
  79. mov.b @(r0,r5),r1
  80. mov.l r5,@-r15
  81. div_le128_2:
  82. mova div_table_inv,r0
  83. mov.l @(r0,r1),r1
  84. mov r5,r0
  85. tst #0xfe,r0
  86. mova div_table_clz,r0
  87. dmulu.l r1,r4
  88. mov.b @(r0,r5),r1
  89. bt/s div_by_1
  90. mov r4,r0
  91. mov.l @r15+,r5
  92. sts mach,r0
  93. /* clrt */
  94. addc r4,r0
  95. mov.l @r15+,r4
  96. rotcr r0
  97. rts
  98. shld r1,r0
  99. div_by_1_neg:
  100. neg r4,r0
  101. div_by_1:
  102. mov.l @r15+,r5
  103. rts
  104. mov.l @r15+,r4
  105. div_ge64k:
  106. bt/s div_r8
  107. div0u
  108. shll8 r5
  109. bra div_ge64k_2
  110. div1 r5,r0
  111. udiv_ge64k:
  112. cmp/hi r0,r5
  113. mov r5,r1
  114. bt udiv_r8
  115. shll8 r5
  116. mov.l r4,@-r15
  117. div1 r5,r0
  118. mov.l r1,@-r15
  119. div_ge64k_2:
  120. div1 r5,r0
  121. mov.l zero_l,r1
  122. .rept 4
  123. div1 r5,r0
  124. .endr
  125. mov.l r1,@-r15
  126. div1 r5,r0
  127. mov.w m256_w,r1
  128. div1 r5,r0
  129. mov.b r0,@(L_LSWMSB,r15)
  130. xor r4,r0
  131. and r1,r0
  132. bra div_ge64k_end
  133. xor r4,r0
  134. div_r8:
  135. shll16 r4
  136. bra div_r8_2
  137. shll8 r4
  138. udiv_r8:
  139. mov.l r4,@-r15
  140. shll16 r4
  141. clrt
  142. shll8 r4
  143. mov.l r5,@-r15
  144. div_r8_2:
  145. rotcl r4
  146. mov r0,r1
  147. div1 r5,r1
  148. mov r4,r0
  149. rotcl r0
  150. mov r5,r4
  151. div1 r5,r1
  152. .rept 5
  153. rotcl r0; div1 r5,r1
  154. .endr
  155. rotcl r0
  156. mov.l @r15+,r5
  157. div1 r4,r1
  158. mov.l @r15+,r4
  159. rts
  160. rotcl r0
  161. .global __sdivsi3_i4i
  162. .global __sdivsi3
  163. .set __sdivsi3, __sdivsi3_i4i
  164. .type __sdivsi3_i4i, @function
  165. /* This is link-compatible with a __sdivsi3 call,
  166. but we effectively clobber only r1. */
  167. __sdivsi3_i4i:
  168. mov.l r4,@-r15
  169. cmp/pz r5
  170. mov.w c128_w, r1
  171. bt/s pos_divisor
  172. cmp/pz r4
  173. mov.l r5,@-r15
  174. neg r5,r5
  175. bt/s neg_result
  176. cmp/hi r1,r5
  177. neg r4,r4
  178. pos_result:
  179. extu.w r5,r0
  180. bf div_le128
  181. cmp/eq r5,r0
  182. mov r4,r0
  183. shlr8 r0
  184. bf/s div_ge64k
  185. cmp/hi r0,r5
  186. div0u
  187. shll16 r5
  188. div1 r5,r0
  189. div1 r5,r0
  190. div1 r5,r0
  191. udiv_25:
  192. mov.l zero_l,r1
  193. div1 r5,r0
  194. div1 r5,r0
  195. mov.l r1,@-r15
  196. .rept 3
  197. div1 r5,r0
  198. .endr
  199. mov.b r0,@(L_MSWLSB,r15)
  200. xtrct r4,r0
  201. swap.w r0,r0
  202. .rept 8
  203. div1 r5,r0
  204. .endr
  205. mov.b r0,@(L_LSWMSB,r15)
  206. div_ge64k_end:
  207. .rept 8
  208. div1 r5,r0
  209. .endr
  210. mov.l @r15+,r4 ! zero-extension and swap using LS unit.
  211. extu.b r0,r0
  212. mov.l @r15+,r5
  213. or r4,r0
  214. mov.l @r15+,r4
  215. rts
  216. rotcl r0
  217. div_le128_neg:
  218. tst #0xfe,r0
  219. mova div_table_ix,r0
  220. mov.b @(r0,r5),r1
  221. mova div_table_inv,r0
  222. bt/s div_by_1_neg
  223. mov.l @(r0,r1),r1
  224. mova div_table_clz,r0
  225. dmulu.l r1,r4
  226. mov.b @(r0,r5),r1
  227. mov.l @r15+,r5
  228. sts mach,r0
  229. /* clrt */
  230. addc r4,r0
  231. mov.l @r15+,r4
  232. rotcr r0
  233. shld r1,r0
  234. rts
  235. neg r0,r0
  236. pos_divisor:
  237. mov.l r5,@-r15
  238. bt/s pos_result
  239. cmp/hi r1,r5
  240. neg r4,r4
  241. neg_result:
  242. extu.w r5,r0
  243. bf div_le128_neg
  244. cmp/eq r5,r0
  245. mov r4,r0
  246. shlr8 r0
  247. bf/s div_ge64k_neg
  248. cmp/hi r0,r5
  249. div0u
  250. mov.l zero_l,r1
  251. shll16 r5
  252. div1 r5,r0
  253. mov.l r1,@-r15
  254. .rept 7
  255. div1 r5,r0
  256. .endr
  257. mov.b r0,@(L_MSWLSB,r15)
  258. xtrct r4,r0
  259. swap.w r0,r0
  260. .rept 8
  261. div1 r5,r0
  262. .endr
  263. mov.b r0,@(L_LSWMSB,r15)
  264. div_ge64k_neg_end:
  265. .rept 8
  266. div1 r5,r0
  267. .endr
  268. mov.l @r15+,r4 ! zero-extension and swap using LS unit.
  269. extu.b r0,r1
  270. mov.l @r15+,r5
  271. or r4,r1
  272. div_r8_neg_end:
  273. mov.l @r15+,r4
  274. rotcl r1
  275. rts
  276. neg r1,r0
  277. div_ge64k_neg:
  278. bt/s div_r8_neg
  279. div0u
  280. shll8 r5
  281. mov.l zero_l,r1
  282. .rept 6
  283. div1 r5,r0
  284. .endr
  285. mov.l r1,@-r15
  286. div1 r5,r0
  287. mov.w m256_w,r1
  288. div1 r5,r0
  289. mov.b r0,@(L_LSWMSB,r15)
  290. xor r4,r0
  291. and r1,r0
  292. bra div_ge64k_neg_end
  293. xor r4,r0
  294. c128_w:
  295. .word 128
  296. div_r8_neg:
  297. clrt
  298. shll16 r4
  299. mov r4,r1
  300. shll8 r1
  301. mov r5,r4
  302. .rept 7
  303. rotcl r1; div1 r5,r0
  304. .endr
  305. mov.l @r15+,r5
  306. rotcl r1
  307. bra div_r8_neg_end
  308. div1 r4,r0
  309. m256_w:
  310. .word 0xff00
  311. /* This table has been generated by divtab-sh4.c. */
  312. .balign 4
  313. div_table_clz:
  314. .byte 0
  315. .byte 1
  316. .byte 0
  317. .byte -1
  318. .byte -1
  319. .byte -2
  320. .byte -2
  321. .byte -2
  322. .byte -2
  323. .byte -3
  324. .byte -3
  325. .byte -3
  326. .byte -3
  327. .byte -3
  328. .byte -3
  329. .byte -3
  330. .byte -3
  331. .byte -4
  332. .byte -4
  333. .byte -4
  334. .byte -4
  335. .byte -4
  336. .byte -4
  337. .byte -4
  338. .byte -4
  339. .byte -4
  340. .byte -4
  341. .byte -4
  342. .byte -4
  343. .byte -4
  344. .byte -4
  345. .byte -4
  346. .byte -4
  347. .byte -5
  348. .byte -5
  349. .byte -5
  350. .byte -5
  351. .byte -5
  352. .byte -5
  353. .byte -5
  354. .byte -5
  355. .byte -5
  356. .byte -5
  357. .byte -5
  358. .byte -5
  359. .byte -5
  360. .byte -5
  361. .byte -5
  362. .byte -5
  363. .byte -5
  364. .byte -5
  365. .byte -5
  366. .byte -5
  367. .byte -5
  368. .byte -5
  369. .byte -5
  370. .byte -5
  371. .byte -5
  372. .byte -5
  373. .byte -5
  374. .byte -5
  375. .byte -5
  376. .byte -5
  377. .byte -5
  378. .byte -5
  379. .byte -6
  380. .byte -6
  381. .byte -6
  382. .byte -6
  383. .byte -6
  384. .byte -6
  385. .byte -6
  386. .byte -6
  387. .byte -6
  388. .byte -6
  389. .byte -6
  390. .byte -6
  391. .byte -6
  392. .byte -6
  393. .byte -6
  394. .byte -6
  395. .byte -6
  396. .byte -6
  397. .byte -6
  398. .byte -6
  399. .byte -6
  400. .byte -6
  401. .byte -6
  402. .byte -6
  403. .byte -6
  404. .byte -6
  405. .byte -6
  406. .byte -6
  407. .byte -6
  408. .byte -6
  409. .byte -6
  410. .byte -6
  411. .byte -6
  412. .byte -6
  413. .byte -6
  414. .byte -6
  415. .byte -6
  416. .byte -6
  417. .byte -6
  418. .byte -6
  419. .byte -6
  420. .byte -6
  421. .byte -6
  422. .byte -6
  423. .byte -6
  424. .byte -6
  425. .byte -6
  426. .byte -6
  427. .byte -6
  428. .byte -6
  429. .byte -6
  430. .byte -6
  431. .byte -6
  432. .byte -6
  433. .byte -6
  434. .byte -6
  435. .byte -6
  436. .byte -6
  437. .byte -6
  438. .byte -6
  439. .byte -6
  440. .byte -6
  441. .byte -6
  442. /* Lookup table translating positive divisor to index into table of
  443. normalized inverse. N.B. the '0' entry is also the last entry of the
  444. previous table, and causes an unaligned access for division by zero. */
  445. div_table_ix:
  446. .byte -6
  447. .byte -128
  448. .byte -128
  449. .byte 0
  450. .byte -128
  451. .byte -64
  452. .byte 0
  453. .byte 64
  454. .byte -128
  455. .byte -96
  456. .byte -64
  457. .byte -32
  458. .byte 0
  459. .byte 32
  460. .byte 64
  461. .byte 96
  462. .byte -128
  463. .byte -112
  464. .byte -96
  465. .byte -80
  466. .byte -64
  467. .byte -48
  468. .byte -32
  469. .byte -16
  470. .byte 0
  471. .byte 16
  472. .byte 32
  473. .byte 48
  474. .byte 64
  475. .byte 80
  476. .byte 96
  477. .byte 112
  478. .byte -128
  479. .byte -120
  480. .byte -112
  481. .byte -104
  482. .byte -96
  483. .byte -88
  484. .byte -80
  485. .byte -72
  486. .byte -64
  487. .byte -56
  488. .byte -48
  489. .byte -40
  490. .byte -32
  491. .byte -24
  492. .byte -16
  493. .byte -8
  494. .byte 0
  495. .byte 8
  496. .byte 16
  497. .byte 24
  498. .byte 32
  499. .byte 40
  500. .byte 48
  501. .byte 56
  502. .byte 64
  503. .byte 72
  504. .byte 80
  505. .byte 88
  506. .byte 96
  507. .byte 104
  508. .byte 112
  509. .byte 120
  510. .byte -128
  511. .byte -124
  512. .byte -120
  513. .byte -116
  514. .byte -112
  515. .byte -108
  516. .byte -104
  517. .byte -100
  518. .byte -96
  519. .byte -92
  520. .byte -88
  521. .byte -84
  522. .byte -80
  523. .byte -76
  524. .byte -72
  525. .byte -68
  526. .byte -64
  527. .byte -60
  528. .byte -56
  529. .byte -52
  530. .byte -48
  531. .byte -44
  532. .byte -40
  533. .byte -36
  534. .byte -32
  535. .byte -28
  536. .byte -24
  537. .byte -20
  538. .byte -16
  539. .byte -12
  540. .byte -8
  541. .byte -4
  542. .byte 0
  543. .byte 4
  544. .byte 8
  545. .byte 12
  546. .byte 16
  547. .byte 20
  548. .byte 24
  549. .byte 28
  550. .byte 32
  551. .byte 36
  552. .byte 40
  553. .byte 44
  554. .byte 48
  555. .byte 52
  556. .byte 56
  557. .byte 60
  558. .byte 64
  559. .byte 68
  560. .byte 72
  561. .byte 76
  562. .byte 80
  563. .byte 84
  564. .byte 88
  565. .byte 92
  566. .byte 96
  567. .byte 100
  568. .byte 104
  569. .byte 108
  570. .byte 112
  571. .byte 116
  572. .byte 120
  573. .byte 124
  574. .byte -128
  575. /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
  576. .balign 4
  577. zero_l:
  578. .long 0x0
  579. .long 0xF81F81F9
  580. .long 0xF07C1F08
  581. .long 0xE9131AC0
  582. .long 0xE1E1E1E2
  583. .long 0xDAE6076C
  584. .long 0xD41D41D5
  585. .long 0xCD856891
  586. .long 0xC71C71C8
  587. .long 0xC0E07039
  588. .long 0xBACF914D
  589. .long 0xB4E81B4F
  590. .long 0xAF286BCB
  591. .long 0xA98EF607
  592. .long 0xA41A41A5
  593. .long 0x9EC8E952
  594. .long 0x9999999A
  595. .long 0x948B0FCE
  596. .long 0x8F9C18FA
  597. .long 0x8ACB90F7
  598. .long 0x86186187
  599. .long 0x81818182
  600. .long 0x7D05F418
  601. .long 0x78A4C818
  602. .long 0x745D1746
  603. .long 0x702E05C1
  604. .long 0x6C16C16D
  605. .long 0x68168169
  606. .long 0x642C8591
  607. .long 0x60581606
  608. .long 0x5C9882BA
  609. .long 0x58ED2309
  610. div_table_inv:
  611. .long 0x55555556
  612. .long 0x51D07EAF
  613. .long 0x4E5E0A73
  614. .long 0x4AFD6A06
  615. .long 0x47AE147B
  616. .long 0x446F8657
  617. .long 0x41414142
  618. .long 0x3E22CBCF
  619. .long 0x3B13B13C
  620. .long 0x38138139
  621. .long 0x3521CFB3
  622. .long 0x323E34A3
  623. .long 0x2F684BDB
  624. .long 0x2C9FB4D9
  625. .long 0x29E4129F
  626. .long 0x27350B89
  627. .long 0x24924925
  628. .long 0x21FB7813
  629. .long 0x1F7047DD
  630. .long 0x1CF06ADB
  631. .long 0x1A7B9612
  632. .long 0x18118119
  633. .long 0x15B1E5F8
  634. .long 0x135C8114
  635. .long 0x11111112
  636. .long 0xECF56BF
  637. .long 0xC9714FC
  638. .long 0xA6810A7
  639. .long 0x8421085
  640. .long 0x624DD30
  641. .long 0x4104105
  642. .long 0x2040811
  643. /* maximum error: 0.987342 scaled: 0.921875*/