softfloat.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. /*
  2. * Floating point emulation support for subnormalised numbers on SH4
  3. * architecture This file is derived from the SoftFloat IEC/IEEE
  4. * Floating-point Arithmetic Package, Release 2 the original license of
  5. * which is reproduced below.
  6. *
  7. * ========================================================================
  8. *
  9. * This C source file is part of the SoftFloat IEC/IEEE Floating-point
  10. * Arithmetic Package, Release 2.
  11. *
  12. * Written by John R. Hauser. This work was made possible in part by the
  13. * International Computer Science Institute, located at Suite 600, 1947 Center
  14. * Street, Berkeley, California 94704. Funding was partially provided by the
  15. * National Science Foundation under grant MIP-9311980. The original version
  16. * of this code was written as part of a project to build a fixed-point vector
  17. * processor in collaboration with the University of California at Berkeley,
  18. * overseen by Profs. Nelson Morgan and John Wawrzynek. More information
  19. * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  20. * arithmetic/softfloat.html'.
  21. *
  22. * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
  23. * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  24. * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
  25. * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  26. * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  27. *
  28. * Derivative works are acceptable, even for commercial purposes, so long as
  29. * (1) they include prominent notice that the work is derivative, and (2) they
  30. * include prominent notice akin to these three paragraphs for those parts of
  31. * this code that are retained.
  32. *
  33. * ========================================================================
  34. *
  35. * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
  36. * and Kamel Khelifi <kamel.khelifi@st.com>
  37. */
  38. #include <linux/kernel.h>
  39. #include <cpu/fpu.h>
  40. #define LIT64( a ) a##LL
  41. typedef char flag;
  42. typedef unsigned char uint8;
  43. typedef signed char int8;
  44. typedef int uint16;
  45. typedef int int16;
  46. typedef unsigned int uint32;
  47. typedef signed int int32;
  48. typedef unsigned long long int bits64;
  49. typedef signed long long int sbits64;
  50. typedef unsigned char bits8;
  51. typedef signed char sbits8;
  52. typedef unsigned short int bits16;
  53. typedef signed short int sbits16;
  54. typedef unsigned int bits32;
  55. typedef signed int sbits32;
  56. typedef unsigned long long int uint64;
  57. typedef signed long long int int64;
  58. typedef unsigned long int float32;
  59. typedef unsigned long long float64;
  60. extern void float_raise(unsigned int flags); /* in fpu.c */
  61. extern int float_rounding_mode(void); /* in fpu.c */
  62. inline bits64 extractFloat64Frac(float64 a);
  63. inline flag extractFloat64Sign(float64 a);
  64. inline int16 extractFloat64Exp(float64 a);
  65. inline int16 extractFloat32Exp(float32 a);
  66. inline flag extractFloat32Sign(float32 a);
  67. inline bits32 extractFloat32Frac(float32 a);
  68. inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
  69. inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
  70. inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
  71. inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
  72. float64 float64_sub(float64 a, float64 b);
  73. float32 float32_sub(float32 a, float32 b);
  74. float32 float32_add(float32 a, float32 b);
  75. float64 float64_add(float64 a, float64 b);
  76. float64 float64_div(float64 a, float64 b);
  77. float32 float32_div(float32 a, float32 b);
  78. float32 float32_mul(float32 a, float32 b);
  79. float64 float64_mul(float64 a, float64 b);
  80. float32 float64_to_float32(float64 a);
  81. inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
  82. bits64 * z1Ptr);
  83. inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
  84. bits64 * z1Ptr);
  85. inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
  86. static int8 countLeadingZeros32(bits32 a);
  87. static int8 countLeadingZeros64(bits64 a);
  88. static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
  89. bits64 zSig);
  90. static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
  91. static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
  92. static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
  93. static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
  94. bits32 zSig);
  95. static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
  96. static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
  97. static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
  98. static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
  99. bits64 * zSigPtr);
  100. static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
  101. static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
  102. bits32 * zSigPtr);
  103. inline bits64 extractFloat64Frac(float64 a)
  104. {
  105. return a & LIT64(0x000FFFFFFFFFFFFF);
  106. }
  107. inline flag extractFloat64Sign(float64 a)
  108. {
  109. return a >> 63;
  110. }
  111. inline int16 extractFloat64Exp(float64 a)
  112. {
  113. return (a >> 52) & 0x7FF;
  114. }
  115. inline int16 extractFloat32Exp(float32 a)
  116. {
  117. return (a >> 23) & 0xFF;
  118. }
  119. inline flag extractFloat32Sign(float32 a)
  120. {
  121. return a >> 31;
  122. }
  123. inline bits32 extractFloat32Frac(float32 a)
  124. {
  125. return a & 0x007FFFFF;
  126. }
  127. inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
  128. {
  129. return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
  130. }
  131. inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
  132. {
  133. bits64 z;
  134. if (count == 0) {
  135. z = a;
  136. } else if (count < 64) {
  137. z = (a >> count) | ((a << ((-count) & 63)) != 0);
  138. } else {
  139. z = (a != 0);
  140. }
  141. *zPtr = z;
  142. }
  143. static int8 countLeadingZeros32(bits32 a)
  144. {
  145. static const int8 countLeadingZerosHigh[] = {
  146. 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
  147. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  148. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  149. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  150. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  151. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  152. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  153. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  154. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  155. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  156. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  157. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  158. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  159. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  160. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  161. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  162. };
  163. int8 shiftCount;
  164. shiftCount = 0;
  165. if (a < 0x10000) {
  166. shiftCount += 16;
  167. a <<= 16;
  168. }
  169. if (a < 0x1000000) {
  170. shiftCount += 8;
  171. a <<= 8;
  172. }
  173. shiftCount += countLeadingZerosHigh[a >> 24];
  174. return shiftCount;
  175. }
  176. static int8 countLeadingZeros64(bits64 a)
  177. {
  178. int8 shiftCount;
  179. shiftCount = 0;
  180. if (a < ((bits64) 1) << 32) {
  181. shiftCount += 32;
  182. } else {
  183. a >>= 32;
  184. }
  185. shiftCount += countLeadingZeros32(a);
  186. return shiftCount;
  187. }
  188. static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
  189. {
  190. int8 shiftCount;
  191. shiftCount = countLeadingZeros64(zSig) - 1;
  192. return roundAndPackFloat64(zSign, zExp - shiftCount,
  193. zSig << shiftCount);
  194. }
  195. static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
  196. {
  197. int16 aExp, bExp, zExp;
  198. bits64 aSig, bSig, zSig;
  199. int16 expDiff;
  200. aSig = extractFloat64Frac(a);
  201. aExp = extractFloat64Exp(a);
  202. bSig = extractFloat64Frac(b);
  203. bExp = extractFloat64Exp(b);
  204. expDiff = aExp - bExp;
  205. aSig <<= 10;
  206. bSig <<= 10;
  207. if (0 < expDiff)
  208. goto aExpBigger;
  209. if (expDiff < 0)
  210. goto bExpBigger;
  211. if (aExp == 0) {
  212. aExp = 1;
  213. bExp = 1;
  214. }
  215. if (bSig < aSig)
  216. goto aBigger;
  217. if (aSig < bSig)
  218. goto bBigger;
  219. return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
  220. bExpBigger:
  221. if (bExp == 0x7FF) {
  222. return packFloat64(zSign ^ 1, 0x7FF, 0);
  223. }
  224. if (aExp == 0) {
  225. ++expDiff;
  226. } else {
  227. aSig |= LIT64(0x4000000000000000);
  228. }
  229. shift64RightJamming(aSig, -expDiff, &aSig);
  230. bSig |= LIT64(0x4000000000000000);
  231. bBigger:
  232. zSig = bSig - aSig;
  233. zExp = bExp;
  234. zSign ^= 1;
  235. goto normalizeRoundAndPack;
  236. aExpBigger:
  237. if (aExp == 0x7FF) {
  238. return a;
  239. }
  240. if (bExp == 0) {
  241. --expDiff;
  242. } else {
  243. bSig |= LIT64(0x4000000000000000);
  244. }
  245. shift64RightJamming(bSig, expDiff, &bSig);
  246. aSig |= LIT64(0x4000000000000000);
  247. aBigger:
  248. zSig = aSig - bSig;
  249. zExp = aExp;
  250. normalizeRoundAndPack:
  251. --zExp;
  252. return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
  253. }
  254. static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
  255. {
  256. int16 aExp, bExp, zExp;
  257. bits64 aSig, bSig, zSig;
  258. int16 expDiff;
  259. aSig = extractFloat64Frac(a);
  260. aExp = extractFloat64Exp(a);
  261. bSig = extractFloat64Frac(b);
  262. bExp = extractFloat64Exp(b);
  263. expDiff = aExp - bExp;
  264. aSig <<= 9;
  265. bSig <<= 9;
  266. if (0 < expDiff) {
  267. if (aExp == 0x7FF) {
  268. return a;
  269. }
  270. if (bExp == 0) {
  271. --expDiff;
  272. } else {
  273. bSig |= LIT64(0x2000000000000000);
  274. }
  275. shift64RightJamming(bSig, expDiff, &bSig);
  276. zExp = aExp;
  277. } else if (expDiff < 0) {
  278. if (bExp == 0x7FF) {
  279. return packFloat64(zSign, 0x7FF, 0);
  280. }
  281. if (aExp == 0) {
  282. ++expDiff;
  283. } else {
  284. aSig |= LIT64(0x2000000000000000);
  285. }
  286. shift64RightJamming(aSig, -expDiff, &aSig);
  287. zExp = bExp;
  288. } else {
  289. if (aExp == 0x7FF) {
  290. return a;
  291. }
  292. if (aExp == 0)
  293. return packFloat64(zSign, 0, (aSig + bSig) >> 9);
  294. zSig = LIT64(0x4000000000000000) + aSig + bSig;
  295. zExp = aExp;
  296. goto roundAndPack;
  297. }
  298. aSig |= LIT64(0x2000000000000000);
  299. zSig = (aSig + bSig) << 1;
  300. --zExp;
  301. if ((sbits64) zSig < 0) {
  302. zSig = aSig + bSig;
  303. ++zExp;
  304. }
  305. roundAndPack:
  306. return roundAndPackFloat64(zSign, zExp, zSig);
  307. }
  308. inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
  309. {
  310. return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
  311. }
  312. inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
  313. {
  314. bits32 z;
  315. if (count == 0) {
  316. z = a;
  317. } else if (count < 32) {
  318. z = (a >> count) | ((a << ((-count) & 31)) != 0);
  319. } else {
  320. z = (a != 0);
  321. }
  322. *zPtr = z;
  323. }
  324. static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
  325. {
  326. flag roundNearestEven;
  327. int8 roundIncrement, roundBits;
  328. flag isTiny;
  329. /* SH4 has only 2 rounding modes - round to nearest and round to zero */
  330. roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
  331. roundIncrement = 0x40;
  332. if (!roundNearestEven) {
  333. roundIncrement = 0;
  334. }
  335. roundBits = zSig & 0x7F;
  336. if (0xFD <= (bits16) zExp) {
  337. if ((0xFD < zExp)
  338. || ((zExp == 0xFD)
  339. && ((sbits32) (zSig + roundIncrement) < 0))
  340. ) {
  341. float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
  342. return packFloat32(zSign, 0xFF,
  343. 0) - (roundIncrement == 0);
  344. }
  345. if (zExp < 0) {
  346. isTiny = (zExp < -1)
  347. || (zSig + roundIncrement < 0x80000000);
  348. shift32RightJamming(zSig, -zExp, &zSig);
  349. zExp = 0;
  350. roundBits = zSig & 0x7F;
  351. if (isTiny && roundBits)
  352. float_raise(FPSCR_CAUSE_UNDERFLOW);
  353. }
  354. }
  355. if (roundBits)
  356. float_raise(FPSCR_CAUSE_INEXACT);
  357. zSig = (zSig + roundIncrement) >> 7;
  358. zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
  359. if (zSig == 0)
  360. zExp = 0;
  361. return packFloat32(zSign, zExp, zSig);
  362. }
  363. static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
  364. {
  365. int8 shiftCount;
  366. shiftCount = countLeadingZeros32(zSig) - 1;
  367. return roundAndPackFloat32(zSign, zExp - shiftCount,
  368. zSig << shiftCount);
  369. }
  370. static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
  371. {
  372. flag roundNearestEven;
  373. int16 roundIncrement, roundBits;
  374. flag isTiny;
  375. /* SH4 has only 2 rounding modes - round to nearest and round to zero */
  376. roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
  377. roundIncrement = 0x200;
  378. if (!roundNearestEven) {
  379. roundIncrement = 0;
  380. }
  381. roundBits = zSig & 0x3FF;
  382. if (0x7FD <= (bits16) zExp) {
  383. if ((0x7FD < zExp)
  384. || ((zExp == 0x7FD)
  385. && ((sbits64) (zSig + roundIncrement) < 0))
  386. ) {
  387. float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
  388. return packFloat64(zSign, 0x7FF,
  389. 0) - (roundIncrement == 0);
  390. }
  391. if (zExp < 0) {
  392. isTiny = (zExp < -1)
  393. || (zSig + roundIncrement <
  394. LIT64(0x8000000000000000));
  395. shift64RightJamming(zSig, -zExp, &zSig);
  396. zExp = 0;
  397. roundBits = zSig & 0x3FF;
  398. if (isTiny && roundBits)
  399. float_raise(FPSCR_CAUSE_UNDERFLOW);
  400. }
  401. }
  402. if (roundBits)
  403. float_raise(FPSCR_CAUSE_INEXACT);
  404. zSig = (zSig + roundIncrement) >> 10;
  405. zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
  406. if (zSig == 0)
  407. zExp = 0;
  408. return packFloat64(zSign, zExp, zSig);
  409. }
  410. static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
  411. {
  412. int16 aExp, bExp, zExp;
  413. bits32 aSig, bSig, zSig;
  414. int16 expDiff;
  415. aSig = extractFloat32Frac(a);
  416. aExp = extractFloat32Exp(a);
  417. bSig = extractFloat32Frac(b);
  418. bExp = extractFloat32Exp(b);
  419. expDiff = aExp - bExp;
  420. aSig <<= 7;
  421. bSig <<= 7;
  422. if (0 < expDiff)
  423. goto aExpBigger;
  424. if (expDiff < 0)
  425. goto bExpBigger;
  426. if (aExp == 0) {
  427. aExp = 1;
  428. bExp = 1;
  429. }
  430. if (bSig < aSig)
  431. goto aBigger;
  432. if (aSig < bSig)
  433. goto bBigger;
  434. return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
  435. bExpBigger:
  436. if (bExp == 0xFF) {
  437. return packFloat32(zSign ^ 1, 0xFF, 0);
  438. }
  439. if (aExp == 0) {
  440. ++expDiff;
  441. } else {
  442. aSig |= 0x40000000;
  443. }
  444. shift32RightJamming(aSig, -expDiff, &aSig);
  445. bSig |= 0x40000000;
  446. bBigger:
  447. zSig = bSig - aSig;
  448. zExp = bExp;
  449. zSign ^= 1;
  450. goto normalizeRoundAndPack;
  451. aExpBigger:
  452. if (aExp == 0xFF) {
  453. return a;
  454. }
  455. if (bExp == 0) {
  456. --expDiff;
  457. } else {
  458. bSig |= 0x40000000;
  459. }
  460. shift32RightJamming(bSig, expDiff, &bSig);
  461. aSig |= 0x40000000;
  462. aBigger:
  463. zSig = aSig - bSig;
  464. zExp = aExp;
  465. normalizeRoundAndPack:
  466. --zExp;
  467. return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
  468. }
  469. static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
  470. {
  471. int16 aExp, bExp, zExp;
  472. bits32 aSig, bSig, zSig;
  473. int16 expDiff;
  474. aSig = extractFloat32Frac(a);
  475. aExp = extractFloat32Exp(a);
  476. bSig = extractFloat32Frac(b);
  477. bExp = extractFloat32Exp(b);
  478. expDiff = aExp - bExp;
  479. aSig <<= 6;
  480. bSig <<= 6;
  481. if (0 < expDiff) {
  482. if (aExp == 0xFF) {
  483. return a;
  484. }
  485. if (bExp == 0) {
  486. --expDiff;
  487. } else {
  488. bSig |= 0x20000000;
  489. }
  490. shift32RightJamming(bSig, expDiff, &bSig);
  491. zExp = aExp;
  492. } else if (expDiff < 0) {
  493. if (bExp == 0xFF) {
  494. return packFloat32(zSign, 0xFF, 0);
  495. }
  496. if (aExp == 0) {
  497. ++expDiff;
  498. } else {
  499. aSig |= 0x20000000;
  500. }
  501. shift32RightJamming(aSig, -expDiff, &aSig);
  502. zExp = bExp;
  503. } else {
  504. if (aExp == 0xFF) {
  505. return a;
  506. }
  507. if (aExp == 0)
  508. return packFloat32(zSign, 0, (aSig + bSig) >> 6);
  509. zSig = 0x40000000 + aSig + bSig;
  510. zExp = aExp;
  511. goto roundAndPack;
  512. }
  513. aSig |= 0x20000000;
  514. zSig = (aSig + bSig) << 1;
  515. --zExp;
  516. if ((sbits32) zSig < 0) {
  517. zSig = aSig + bSig;
  518. ++zExp;
  519. }
  520. roundAndPack:
  521. return roundAndPackFloat32(zSign, zExp, zSig);
  522. }
  523. float64 float64_sub(float64 a, float64 b)
  524. {
  525. flag aSign, bSign;
  526. aSign = extractFloat64Sign(a);
  527. bSign = extractFloat64Sign(b);
  528. if (aSign == bSign) {
  529. return subFloat64Sigs(a, b, aSign);
  530. } else {
  531. return addFloat64Sigs(a, b, aSign);
  532. }
  533. }
  534. float32 float32_sub(float32 a, float32 b)
  535. {
  536. flag aSign, bSign;
  537. aSign = extractFloat32Sign(a);
  538. bSign = extractFloat32Sign(b);
  539. if (aSign == bSign) {
  540. return subFloat32Sigs(a, b, aSign);
  541. } else {
  542. return addFloat32Sigs(a, b, aSign);
  543. }
  544. }
  545. float32 float32_add(float32 a, float32 b)
  546. {
  547. flag aSign, bSign;
  548. aSign = extractFloat32Sign(a);
  549. bSign = extractFloat32Sign(b);
  550. if (aSign == bSign) {
  551. return addFloat32Sigs(a, b, aSign);
  552. } else {
  553. return subFloat32Sigs(a, b, aSign);
  554. }
  555. }
  556. float64 float64_add(float64 a, float64 b)
  557. {
  558. flag aSign, bSign;
  559. aSign = extractFloat64Sign(a);
  560. bSign = extractFloat64Sign(b);
  561. if (aSign == bSign) {
  562. return addFloat64Sigs(a, b, aSign);
  563. } else {
  564. return subFloat64Sigs(a, b, aSign);
  565. }
  566. }
  567. static void
  568. normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
  569. {
  570. int8 shiftCount;
  571. shiftCount = countLeadingZeros64(aSig) - 11;
  572. *zSigPtr = aSig << shiftCount;
  573. *zExpPtr = 1 - shiftCount;
  574. }
  575. inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
  576. bits64 * z1Ptr)
  577. {
  578. bits64 z1;
  579. z1 = a1 + b1;
  580. *z1Ptr = z1;
  581. *z0Ptr = a0 + b0 + (z1 < a1);
  582. }
  583. inline void
  584. sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
  585. bits64 * z1Ptr)
  586. {
  587. *z1Ptr = a1 - b1;
  588. *z0Ptr = a0 - b0 - (a1 < b1);
  589. }
  590. static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
  591. {
  592. bits64 b0, b1;
  593. bits64 rem0, rem1, term0, term1;
  594. bits64 z;
  595. if (b <= a0)
  596. return LIT64(0xFFFFFFFFFFFFFFFF);
  597. b0 = b >> 32;
  598. z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : (a0 / b0) << 32;
  599. mul64To128(b, z, &term0, &term1);
  600. sub128(a0, a1, term0, term1, &rem0, &rem1);
  601. while (((sbits64) rem0) < 0) {
  602. z -= LIT64(0x100000000);
  603. b1 = b << 32;
  604. add128(rem0, rem1, b0, b1, &rem0, &rem1);
  605. }
  606. rem0 = (rem0 << 32) | (rem1 >> 32);
  607. z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
  608. return z;
  609. }
  610. inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
  611. {
  612. bits32 aHigh, aLow, bHigh, bLow;
  613. bits64 z0, zMiddleA, zMiddleB, z1;
  614. aLow = a;
  615. aHigh = a >> 32;
  616. bLow = b;
  617. bHigh = b >> 32;
  618. z1 = ((bits64) aLow) * bLow;
  619. zMiddleA = ((bits64) aLow) * bHigh;
  620. zMiddleB = ((bits64) aHigh) * bLow;
  621. z0 = ((bits64) aHigh) * bHigh;
  622. zMiddleA += zMiddleB;
  623. z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
  624. zMiddleA <<= 32;
  625. z1 += zMiddleA;
  626. z0 += (z1 < zMiddleA);
  627. *z1Ptr = z1;
  628. *z0Ptr = z0;
  629. }
  630. static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
  631. bits32 * zSigPtr)
  632. {
  633. int8 shiftCount;
  634. shiftCount = countLeadingZeros32(aSig) - 8;
  635. *zSigPtr = aSig << shiftCount;
  636. *zExpPtr = 1 - shiftCount;
  637. }
  638. float64 float64_div(float64 a, float64 b)
  639. {
  640. flag aSign, bSign, zSign;
  641. int16 aExp, bExp, zExp;
  642. bits64 aSig, bSig, zSig;
  643. bits64 rem0, rem1;
  644. bits64 term0, term1;
  645. aSig = extractFloat64Frac(a);
  646. aExp = extractFloat64Exp(a);
  647. aSign = extractFloat64Sign(a);
  648. bSig = extractFloat64Frac(b);
  649. bExp = extractFloat64Exp(b);
  650. bSign = extractFloat64Sign(b);
  651. zSign = aSign ^ bSign;
  652. if (aExp == 0x7FF) {
  653. if (bExp == 0x7FF) {
  654. }
  655. return packFloat64(zSign, 0x7FF, 0);
  656. }
  657. if (bExp == 0x7FF) {
  658. return packFloat64(zSign, 0, 0);
  659. }
  660. if (bExp == 0) {
  661. if (bSig == 0) {
  662. if ((aExp | aSig) == 0) {
  663. float_raise(FPSCR_CAUSE_INVALID);
  664. }
  665. return packFloat64(zSign, 0x7FF, 0);
  666. }
  667. normalizeFloat64Subnormal(bSig, &bExp, &bSig);
  668. }
  669. if (aExp == 0) {
  670. if (aSig == 0)
  671. return packFloat64(zSign, 0, 0);
  672. normalizeFloat64Subnormal(aSig, &aExp, &aSig);
  673. }
  674. zExp = aExp - bExp + 0x3FD;
  675. aSig = (aSig | LIT64(0x0010000000000000)) << 10;
  676. bSig = (bSig | LIT64(0x0010000000000000)) << 11;
  677. if (bSig <= (aSig + aSig)) {
  678. aSig >>= 1;
  679. ++zExp;
  680. }
  681. zSig = estimateDiv128To64(aSig, 0, bSig);
  682. if ((zSig & 0x1FF) <= 2) {
  683. mul64To128(bSig, zSig, &term0, &term1);
  684. sub128(aSig, 0, term0, term1, &rem0, &rem1);
  685. while ((sbits64) rem0 < 0) {
  686. --zSig;
  687. add128(rem0, rem1, 0, bSig, &rem0, &rem1);
  688. }
  689. zSig |= (rem1 != 0);
  690. }
  691. return roundAndPackFloat64(zSign, zExp, zSig);
  692. }
  693. float32 float32_div(float32 a, float32 b)
  694. {
  695. flag aSign, bSign, zSign;
  696. int16 aExp, bExp, zExp;
  697. bits32 aSig, bSig, zSig;
  698. aSig = extractFloat32Frac(a);
  699. aExp = extractFloat32Exp(a);
  700. aSign = extractFloat32Sign(a);
  701. bSig = extractFloat32Frac(b);
  702. bExp = extractFloat32Exp(b);
  703. bSign = extractFloat32Sign(b);
  704. zSign = aSign ^ bSign;
  705. if (aExp == 0xFF) {
  706. if (bExp == 0xFF) {
  707. }
  708. return packFloat32(zSign, 0xFF, 0);
  709. }
  710. if (bExp == 0xFF) {
  711. return packFloat32(zSign, 0, 0);
  712. }
  713. if (bExp == 0) {
  714. if (bSig == 0) {
  715. return packFloat32(zSign, 0xFF, 0);
  716. }
  717. normalizeFloat32Subnormal(bSig, &bExp, &bSig);
  718. }
  719. if (aExp == 0) {
  720. if (aSig == 0)
  721. return packFloat32(zSign, 0, 0);
  722. normalizeFloat32Subnormal(aSig, &aExp, &aSig);
  723. }
  724. zExp = aExp - bExp + 0x7D;
  725. aSig = (aSig | 0x00800000) << 7;
  726. bSig = (bSig | 0x00800000) << 8;
  727. if (bSig <= (aSig + aSig)) {
  728. aSig >>= 1;
  729. ++zExp;
  730. }
  731. zSig = (((bits64) aSig) << 32) / bSig;
  732. if ((zSig & 0x3F) == 0) {
  733. zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
  734. }
  735. return roundAndPackFloat32(zSign, zExp, zSig);
  736. }
  737. float32 float32_mul(float32 a, float32 b)
  738. {
  739. char aSign, bSign, zSign;
  740. int aExp, bExp, zExp;
  741. unsigned int aSig, bSig;
  742. unsigned long long zSig64;
  743. unsigned int zSig;
  744. aSig = extractFloat32Frac(a);
  745. aExp = extractFloat32Exp(a);
  746. aSign = extractFloat32Sign(a);
  747. bSig = extractFloat32Frac(b);
  748. bExp = extractFloat32Exp(b);
  749. bSign = extractFloat32Sign(b);
  750. zSign = aSign ^ bSign;
  751. if (aExp == 0) {
  752. if (aSig == 0)
  753. return packFloat32(zSign, 0, 0);
  754. normalizeFloat32Subnormal(aSig, &aExp, &aSig);
  755. }
  756. if (bExp == 0) {
  757. if (bSig == 0)
  758. return packFloat32(zSign, 0, 0);
  759. normalizeFloat32Subnormal(bSig, &bExp, &bSig);
  760. }
  761. if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
  762. return roundAndPackFloat32(zSign, 0xff, 0);
  763. zExp = aExp + bExp - 0x7F;
  764. aSig = (aSig | 0x00800000) << 7;
  765. bSig = (bSig | 0x00800000) << 8;
  766. shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
  767. zSig = zSig64;
  768. if (0 <= (signed int)(zSig << 1)) {
  769. zSig <<= 1;
  770. --zExp;
  771. }
  772. return roundAndPackFloat32(zSign, zExp, zSig);
  773. }
  774. float64 float64_mul(float64 a, float64 b)
  775. {
  776. char aSign, bSign, zSign;
  777. int aExp, bExp, zExp;
  778. unsigned long long int aSig, bSig, zSig0, zSig1;
  779. aSig = extractFloat64Frac(a);
  780. aExp = extractFloat64Exp(a);
  781. aSign = extractFloat64Sign(a);
  782. bSig = extractFloat64Frac(b);
  783. bExp = extractFloat64Exp(b);
  784. bSign = extractFloat64Sign(b);
  785. zSign = aSign ^ bSign;
  786. if (aExp == 0) {
  787. if (aSig == 0)
  788. return packFloat64(zSign, 0, 0);
  789. normalizeFloat64Subnormal(aSig, &aExp, &aSig);
  790. }
  791. if (bExp == 0) {
  792. if (bSig == 0)
  793. return packFloat64(zSign, 0, 0);
  794. normalizeFloat64Subnormal(bSig, &bExp, &bSig);
  795. }
  796. if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
  797. return roundAndPackFloat64(zSign, 0x7ff, 0);
  798. zExp = aExp + bExp - 0x3FF;
  799. aSig = (aSig | 0x0010000000000000LL) << 10;
  800. bSig = (bSig | 0x0010000000000000LL) << 11;
  801. mul64To128(aSig, bSig, &zSig0, &zSig1);
  802. zSig0 |= (zSig1 != 0);
  803. if (0 <= (signed long long int)(zSig0 << 1)) {
  804. zSig0 <<= 1;
  805. --zExp;
  806. }
  807. return roundAndPackFloat64(zSign, zExp, zSig0);
  808. }
  809. /*
  810. * -------------------------------------------------------------------------------
  811. * Returns the result of converting the double-precision floating-point value
  812. * `a' to the single-precision floating-point format. The conversion is
  813. * performed according to the IEC/IEEE Standard for Binary Floating-point
  814. * Arithmetic.
  815. * -------------------------------------------------------------------------------
  816. * */
  817. float32 float64_to_float32(float64 a)
  818. {
  819. flag aSign;
  820. int16 aExp;
  821. bits64 aSig;
  822. bits32 zSig;
  823. aSig = extractFloat64Frac( a );
  824. aExp = extractFloat64Exp( a );
  825. aSign = extractFloat64Sign( a );
  826. shift64RightJamming( aSig, 22, &aSig );
  827. zSig = aSig;
  828. if ( aExp || zSig ) {
  829. zSig |= 0x40000000;
  830. aExp -= 0x381;
  831. }
  832. return roundAndPackFloat32(aSign, aExp, zSig);
  833. }