op-2.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /*
  2. * Basic two-word fraction declaration and manipulation.
  3. */
  4. #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1
  5. #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
  6. #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
  7. #define _FP_FRAC_HIGH_2(X) (X##_f1)
  8. #define _FP_FRAC_LOW_2(X) (X##_f0)
  9. #define _FP_FRAC_WORD_2(X,w) (X##_f##w)
  10. #define _FP_FRAC_SLL_2(X,N) \
  11. do { \
  12. if ((N) < _FP_W_TYPE_SIZE) \
  13. { \
  14. if (__builtin_constant_p(N) && (N) == 1) \
  15. { \
  16. X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \
  17. X##_f0 += X##_f0; \
  18. } \
  19. else \
  20. { \
  21. X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
  22. X##_f0 <<= (N); \
  23. } \
  24. } \
  25. else \
  26. { \
  27. X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
  28. X##_f0 = 0; \
  29. } \
  30. } while (0)
  31. #define _FP_FRAC_SRL_2(X,N) \
  32. do { \
  33. if ((N) < _FP_W_TYPE_SIZE) \
  34. { \
  35. X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
  36. X##_f1 >>= (N); \
  37. } \
  38. else \
  39. { \
  40. X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
  41. X##_f1 = 0; \
  42. } \
  43. } while (0)
  44. /* Right shift with sticky-lsb. */
  45. #define _FP_FRAC_SRS_2(X,N,sz) \
  46. do { \
  47. if ((N) < _FP_W_TYPE_SIZE) \
  48. { \
  49. X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
  50. (__builtin_constant_p(N) && (N) == 1 \
  51. ? X##_f0 & 1 \
  52. : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
  53. X##_f1 >>= (N); \
  54. } \
  55. else \
  56. { \
  57. X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \
  58. (((X##_f1 << (2 * _FP_W_TYPE_SIZE - (N))) | \
  59. X##_f0) != 0)); \
  60. X##_f1 = 0; \
  61. } \
  62. } while (0)
  63. #define _FP_FRAC_ADDI_2(X,I) \
  64. __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
  65. #define _FP_FRAC_ADD_2(R,X,Y) \
  66. __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
  67. #define _FP_FRAC_SUB_2(R,X,Y) \
  68. __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
  69. #define _FP_FRAC_CLZ_2(R,X) \
  70. do { \
  71. if (X##_f1) \
  72. __FP_CLZ(R,X##_f1); \
  73. else \
  74. { \
  75. __FP_CLZ(R,X##_f0); \
  76. R += _FP_W_TYPE_SIZE; \
  77. } \
  78. } while(0)
  79. /* Predicates */
  80. #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)
  81. #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
  82. #define _FP_FRAC_OVERP_2(fs,X) (X##_f1 & _FP_OVERFLOW_##fs)
  83. #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
  84. #define _FP_FRAC_GT_2(X, Y) \
  85. ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
  86. #define _FP_FRAC_GE_2(X, Y) \
  87. ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
  88. #define _FP_ZEROFRAC_2 0, 0
  89. #define _FP_MINFRAC_2 0, 1
  90. /*
  91. * Internals
  92. */
  93. #define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)
  94. #define __FP_CLZ_2(R, xh, xl) \
  95. do { \
  96. if (xh) \
  97. __FP_CLZ(R,xl); \
  98. else \
  99. { \
  100. __FP_CLZ(R,xl); \
  101. R += _FP_W_TYPE_SIZE; \
  102. } \
  103. } while(0)
  104. #if 0
  105. #ifndef __FP_FRAC_ADDI_2
  106. #define __FP_FRAC_ADDI_2(xh, xl, i) \
  107. (xh += ((xl += i) < i))
  108. #endif
  109. #ifndef __FP_FRAC_ADD_2
  110. #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
  111. (rh = xh + yh + ((rl = xl + yl) < xl))
  112. #endif
  113. #ifndef __FP_FRAC_SUB_2
  114. #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
  115. (rh = xh - yh - ((rl = xl - yl) > xl))
  116. #endif
  117. #else
  118. #undef __FP_FRAC_ADDI_2
  119. #define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
  120. #undef __FP_FRAC_ADD_2
  121. #define __FP_FRAC_ADD_2 add_ssaaaa
  122. #undef __FP_FRAC_SUB_2
  123. #define __FP_FRAC_SUB_2 sub_ddmmss
  124. #endif
  125. /*
  126. * Unpack the raw bits of a native fp value. Do not classify or
  127. * normalize the data.
  128. */
  129. #define _FP_UNPACK_RAW_2(fs, X, val) \
  130. do { \
  131. union _FP_UNION_##fs _flo; _flo.flt = (val); \
  132. \
  133. X##_f0 = _flo.bits.frac0; \
  134. X##_f1 = _flo.bits.frac1; \
  135. X##_e = _flo.bits.exp; \
  136. X##_s = _flo.bits.sign; \
  137. } while (0)
  138. /*
  139. * Repack the raw bits of a native fp value.
  140. */
  141. #define _FP_PACK_RAW_2(fs, val, X) \
  142. do { \
  143. union _FP_UNION_##fs _flo; \
  144. \
  145. _flo.bits.frac0 = X##_f0; \
  146. _flo.bits.frac1 = X##_f1; \
  147. _flo.bits.exp = X##_e; \
  148. _flo.bits.sign = X##_s; \
  149. \
  150. (val) = _flo.flt; \
  151. } while (0)
  152. /*
  153. * Multiplication algorithms:
  154. */
  155. /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
  156. #define _FP_MUL_MEAT_2_wide(fs, R, X, Y, doit) \
  157. do { \
  158. _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
  159. \
  160. doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
  161. doit(_b_f1, _b_f0, X##_f0, Y##_f1); \
  162. doit(_c_f1, _c_f0, X##_f1, Y##_f0); \
  163. doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
  164. \
  165. __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
  166. _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \
  167. 0, _b_f1, _b_f0, 0, \
  168. _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
  169. _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0)); \
  170. __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
  171. _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \
  172. 0, _c_f1, _c_f0, 0, \
  173. _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
  174. _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0)); \
  175. \
  176. /* Normalize since we know where the msb of the multiplicands \
  177. were (bit B), we know that the msb of the of the product is \
  178. at either 2B or 2B-1. */ \
  179. _FP_FRAC_SRS_4(_z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \
  180. R##_f0 = _FP_FRAC_WORD_4(_z,0); \
  181. R##_f1 = _FP_FRAC_WORD_4(_z,1); \
  182. } while (0)
  183. /* This next macro appears to be totally broken. Fortunately nowhere
  184. * seems to use it :-> The problem is that we define _z[4] but
  185. * then use it in _FP_FRAC_SRS_4, which will attempt to access
  186. * _z_f[n] which will cause an error. The fix probably involves
  187. * declaring it with _FP_FRAC_DECL_4, see previous macro. -- PMM 02/1998
  188. */
  189. #define _FP_MUL_MEAT_2_gmp(fs, R, X, Y) \
  190. do { \
  191. _FP_W_TYPE _x[2], _y[2], _z[4]; \
  192. _x[0] = X##_f0; _x[1] = X##_f1; \
  193. _y[0] = Y##_f0; _y[1] = Y##_f1; \
  194. \
  195. mpn_mul_n(_z, _x, _y, 2); \
  196. \
  197. /* Normalize since we know where the msb of the multiplicands \
  198. were (bit B), we know that the msb of the of the product is \
  199. at either 2B or 2B-1. */ \
  200. _FP_FRAC_SRS_4(_z, _FP_WFRACBITS##_fs-1, 2*_FP_WFRACBITS_##fs); \
  201. R##_f0 = _z[0]; \
  202. R##_f1 = _z[1]; \
  203. } while (0)
  204. /*
  205. * Division algorithms:
  206. * This seems to be giving me difficulties -- PMM
  207. * Look, NetBSD seems to be able to comment algorithms. Can't you?
  208. * I've thrown printks at the problem.
  209. * This now appears to work, but I still don't really know why.
  210. * Also, I don't think the result is properly normalised...
  211. */
  212. #define _FP_DIV_MEAT_2_udiv_64(fs, R, X, Y) \
  213. do { \
  214. extern void _fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2], \
  215. _FP_W_TYPE n1, _FP_W_TYPE n0, \
  216. _FP_W_TYPE d1, _FP_W_TYPE d0); \
  217. _FP_W_TYPE _n_f3, _n_f2, _n_f1, _n_f0, _r_f1, _r_f0; \
  218. _FP_W_TYPE _q_f1, _q_f0, _m_f1, _m_f0; \
  219. _FP_W_TYPE _rmem[2], _qmem[2]; \
  220. /* I think this check is to ensure that the result is normalised. \
  221. * Assuming X,Y normalised (ie in [1.0,2.0)) X/Y will be in \
  222. * [0.5,2.0). Furthermore, it will be less than 1.0 iff X < Y. \
  223. * In this case we tweak things. (this is based on comments in \
  224. * the NetBSD FPU emulation code. ) \
  225. * We know X,Y are normalised because we ensure this as part of \
  226. * the unpacking process. -- PMM \
  227. */ \
  228. if (_FP_FRAC_GT_2(X, Y)) \
  229. { \
  230. /* R##_e++; */ \
  231. _n_f3 = X##_f1 >> 1; \
  232. _n_f2 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
  233. _n_f1 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
  234. _n_f0 = 0; \
  235. } \
  236. else \
  237. { \
  238. R##_e--; \
  239. _n_f3 = X##_f1; \
  240. _n_f2 = X##_f0; \
  241. _n_f1 = _n_f0 = 0; \
  242. } \
  243. \
  244. /* Normalize, i.e. make the most significant bit of the \
  245. denominator set. CHANGED: - 1 to nothing -- PMM */ \
  246. _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs /* -1 */); \
  247. \
  248. /* Do the 256/128 bit division given the 128-bit _fp_udivmodtf4 \
  249. primitive snagged from libgcc2.c. */ \
  250. \
  251. _fp_udivmodti4(_qmem, _rmem, _n_f3, _n_f2, 0, Y##_f1); \
  252. _q_f1 = _qmem[0]; \
  253. umul_ppmm(_m_f1, _m_f0, _q_f1, Y##_f0); \
  254. _r_f1 = _rmem[0]; \
  255. _r_f0 = _n_f1; \
  256. if (_FP_FRAC_GT_2(_m, _r)) \
  257. { \
  258. _q_f1--; \
  259. _FP_FRAC_ADD_2(_r, _r, Y); \
  260. if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
  261. { \
  262. _q_f1--; \
  263. _FP_FRAC_ADD_2(_r, _r, Y); \
  264. } \
  265. } \
  266. _FP_FRAC_SUB_2(_r, _r, _m); \
  267. \
  268. _fp_udivmodti4(_qmem, _rmem, _r_f1, _r_f0, 0, Y##_f1); \
  269. _q_f0 = _qmem[0]; \
  270. umul_ppmm(_m_f1, _m_f0, _q_f0, Y##_f0); \
  271. _r_f1 = _rmem[0]; \
  272. _r_f0 = _n_f0; \
  273. if (_FP_FRAC_GT_2(_m, _r)) \
  274. { \
  275. _q_f0--; \
  276. _FP_FRAC_ADD_2(_r, _r, Y); \
  277. if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
  278. { \
  279. _q_f0--; \
  280. _FP_FRAC_ADD_2(_r, _r, Y); \
  281. } \
  282. } \
  283. _FP_FRAC_SUB_2(_r, _r, _m); \
  284. \
  285. R##_f1 = _q_f1; \
  286. R##_f0 = _q_f0 | ((_r_f1 | _r_f0) != 0); \
  287. /* adjust so answer is normalized again. I'm not sure what the \
  288. * final sz param should be. In practice it's never used since \
  289. * N is 1 which is always going to be < _FP_W_TYPE_SIZE... \
  290. */ \
  291. /* _FP_FRAC_SRS_2(R,1,_FP_WFRACBITS_##fs); */ \
  292. } while (0)
  293. #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \
  294. do { \
  295. _FP_W_TYPE _x[4], _y[2], _z[4]; \
  296. _y[0] = Y##_f0; _y[1] = Y##_f1; \
  297. _x[0] = _x[3] = 0; \
  298. if (_FP_FRAC_GT_2(X, Y)) \
  299. { \
  300. R##_e++; \
  301. _x[1] = (X##_f0 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE) | \
  302. X##_f1 >> (_FP_W_TYPE_SIZE - \
  303. (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE))); \
  304. _x[2] = X##_f1 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE); \
  305. } \
  306. else \
  307. { \
  308. _x[1] = (X##_f0 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE) | \
  309. X##_f1 >> (_FP_W_TYPE_SIZE - \
  310. (_FP_WFRACBITS - _FP_W_TYPE_SIZE))); \
  311. _x[2] = X##_f1 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE); \
  312. } \
  313. \
  314. (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \
  315. R##_f1 = _z[1]; \
  316. R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \
  317. } while (0)
  318. /*
  319. * Square root algorithms:
  320. * We have just one right now, maybe Newton approximation
  321. * should be added for those machines where division is fast.
  322. */
  323. #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
  324. do { \
  325. while (q) \
  326. { \
  327. T##_f1 = S##_f1 + q; \
  328. if (T##_f1 <= X##_f1) \
  329. { \
  330. S##_f1 = T##_f1 + q; \
  331. X##_f1 -= T##_f1; \
  332. R##_f1 += q; \
  333. } \
  334. _FP_FRAC_SLL_2(X, 1); \
  335. q >>= 1; \
  336. } \
  337. q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
  338. while (q) \
  339. { \
  340. T##_f0 = S##_f0 + q; \
  341. T##_f1 = S##_f1; \
  342. if (T##_f1 < X##_f1 || \
  343. (T##_f1 == X##_f1 && T##_f0 < X##_f0)) \
  344. { \
  345. S##_f0 = T##_f0 + q; \
  346. if (((_FP_WS_TYPE)T##_f0) < 0 && \
  347. ((_FP_WS_TYPE)S##_f0) >= 0) \
  348. S##_f1++; \
  349. _FP_FRAC_SUB_2(X, X, T); \
  350. R##_f0 += q; \
  351. } \
  352. _FP_FRAC_SLL_2(X, 1); \
  353. q >>= 1; \
  354. } \
  355. } while (0)
  356. /*
  357. * Assembly/disassembly for converting to/from integral types.
  358. * No shifting or overflow handled here.
  359. */
  360. #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
  361. do { \
  362. if (rsize <= _FP_W_TYPE_SIZE) \
  363. r = X##_f0; \
  364. else \
  365. { \
  366. r = X##_f1; \
  367. r <<= _FP_W_TYPE_SIZE; \
  368. r += X##_f0; \
  369. } \
  370. } while (0)
  371. #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
  372. do { \
  373. X##_f0 = r; \
  374. X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
  375. } while (0)
  376. /*
  377. * Convert FP values between word sizes
  378. */
  379. #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \
  380. do { \
  381. _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
  382. _FP_WFRACBITS_##sfs); \
  383. D##_f = S##_f0; \
  384. } while (0)
  385. #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \
  386. do { \
  387. D##_f0 = S##_f; \
  388. D##_f1 = 0; \
  389. _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
  390. } while (0)