op-4.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. /*
  2. * Basic four-word fraction declaration and manipulation.
  3. *
  4. * When adding quadword support for 32 bit machines, we need
  5. * to be a little careful as double multiply uses some of these
  6. * macros: (in op-2.h)
  7. * _FP_MUL_MEAT_2_wide() uses _FP_FRAC_DECL_4, _FP_FRAC_WORD_4,
  8. * _FP_FRAC_ADD_4, _FP_FRAC_SRS_4
  9. * _FP_MUL_MEAT_2_gmp() uses _FP_FRAC_SRS_4 (and should use
  10. * _FP_FRAC_DECL_4: it appears to be broken and is not used
  11. * anywhere anyway. )
  12. *
  13. * I've now fixed all the macros that were here from the sparc64 code.
  14. * [*none* of the shift macros were correct!] -- PMM 02/1998
  15. *
  16. * The only quadword stuff that remains to be coded is:
  17. * 1) the conversion to/from ints, which requires
  18. * that we check (in op-common.h) that the following do the right thing
  19. * for quadwords: _FP_TO_INT(Q,4,r,X,rsz,rsg), _FP_FROM_INT(Q,4,X,r,rs,rt)
  20. * 2) multiply, divide and sqrt, which require:
  21. * _FP_MUL_MEAT_4_*(R,X,Y), _FP_DIV_MEAT_4_*(R,X,Y), _FP_SQRT_MEAT_4(R,S,T,X,q),
  22. * This also needs _FP_MUL_MEAT_Q and _FP_DIV_MEAT_Q to be defined to
  23. * some suitable _FP_MUL_MEAT_4_* macros in sfp-machine.h.
  24. * [we're free to choose whatever FP_MUL_MEAT_4_* macros we need for
  25. * these; they are used nowhere else. ]
  26. */
  27. #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4]
  28. #define _FP_FRAC_COPY_4(D,S) \
  29. (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \
  30. D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
  31. /* The _FP_FRAC_SET_n(X,I) macro is intended for use with another
  32. * macro such as _FP_ZEROFRAC_n which returns n comma separated values.
  33. * The result is that we get an expansion of __FP_FRAC_SET_n(X,I0,I1,I2,I3)
  34. * which just assigns the In values to the array X##_f[].
  35. * This is why the number of parameters doesn't appear to match
  36. * at first glance... -- PMM
  37. */
  38. #define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I)
  39. #define _FP_FRAC_HIGH_4(X) (X##_f[3])
  40. #define _FP_FRAC_LOW_4(X) (X##_f[0])
  41. #define _FP_FRAC_WORD_4(X,w) (X##_f[w])
  42. #define _FP_FRAC_SLL_4(X,N) \
  43. do { \
  44. _FP_I_TYPE _up, _down, _skip, _i; \
  45. _skip = (N) / _FP_W_TYPE_SIZE; \
  46. _up = (N) % _FP_W_TYPE_SIZE; \
  47. _down = _FP_W_TYPE_SIZE - _up; \
  48. for (_i = 3; _i > _skip; --_i) \
  49. X##_f[_i] = X##_f[_i-_skip] << _up | X##_f[_i-_skip-1] >> _down; \
  50. /* bugfixed: was X##_f[_i] <<= _up; -- PMM 02/1998 */ \
  51. X##_f[_i] = X##_f[0] << _up; \
  52. for (--_i; _i >= 0; --_i) \
  53. X##_f[_i] = 0; \
  54. } while (0)
  55. /* This one was broken too */
  56. #define _FP_FRAC_SRL_4(X,N) \
  57. do { \
  58. _FP_I_TYPE _up, _down, _skip, _i; \
  59. _skip = (N) / _FP_W_TYPE_SIZE; \
  60. _down = (N) % _FP_W_TYPE_SIZE; \
  61. _up = _FP_W_TYPE_SIZE - _down; \
  62. for (_i = 0; _i < 3-_skip; ++_i) \
  63. X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up; \
  64. X##_f[_i] = X##_f[3] >> _down; \
  65. for (++_i; _i < 4; ++_i) \
  66. X##_f[_i] = 0; \
  67. } while (0)
  68. /* Right shift with sticky-lsb.
  69. * What this actually means is that we do a standard right-shift,
  70. * but that if any of the bits that fall off the right hand side
  71. * were one then we always set the LSbit.
  72. */
  73. #define _FP_FRAC_SRS_4(X,N,size) \
  74. do { \
  75. _FP_I_TYPE _up, _down, _skip, _i; \
  76. _FP_W_TYPE _s; \
  77. _skip = (N) / _FP_W_TYPE_SIZE; \
  78. _down = (N) % _FP_W_TYPE_SIZE; \
  79. _up = _FP_W_TYPE_SIZE - _down; \
  80. for (_s = _i = 0; _i < _skip; ++_i) \
  81. _s |= X##_f[_i]; \
  82. _s |= X##_f[_i] << _up; \
  83. /* s is now != 0 if we want to set the LSbit */ \
  84. for (_i = 0; _i < 3-_skip; ++_i) \
  85. X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up; \
  86. X##_f[_i] = X##_f[3] >> _down; \
  87. for (++_i; _i < 4; ++_i) \
  88. X##_f[_i] = 0; \
  89. /* don't fix the LSB until the very end when we're sure f[0] is stable */ \
  90. X##_f[0] |= (_s != 0); \
  91. } while (0)
  92. #define _FP_FRAC_ADD_4(R,X,Y) \
  93. __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
  94. X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
  95. Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
  96. #define _FP_FRAC_SUB_4(R,X,Y) \
  97. __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
  98. X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
  99. Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
  100. #define _FP_FRAC_ADDI_4(X,I) \
  101. __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
  102. #define _FP_ZEROFRAC_4 0,0,0,0
  103. #define _FP_MINFRAC_4 0,0,0,1
  104. #define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
  105. #define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0)
  106. #define _FP_FRAC_OVERP_4(fs,X) (X##_f[0] & _FP_OVERFLOW_##fs)
  107. #define _FP_FRAC_EQ_4(X,Y) \
  108. (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \
  109. && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
  110. #define _FP_FRAC_GT_4(X,Y) \
  111. (X##_f[3] > Y##_f[3] || \
  112. (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \
  113. (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \
  114. (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \
  115. )) \
  116. )) \
  117. )
  118. #define _FP_FRAC_GE_4(X,Y) \
  119. (X##_f[3] > Y##_f[3] || \
  120. (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \
  121. (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \
  122. (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \
  123. )) \
  124. )) \
  125. )
  126. #define _FP_FRAC_CLZ_4(R,X) \
  127. do { \
  128. if (X##_f[3]) \
  129. { \
  130. __FP_CLZ(R,X##_f[3]); \
  131. } \
  132. else if (X##_f[2]) \
  133. { \
  134. __FP_CLZ(R,X##_f[2]); \
  135. R += _FP_W_TYPE_SIZE; \
  136. } \
  137. else if (X##_f[1]) \
  138. { \
  139. __FP_CLZ(R,X##_f[2]); \
  140. R += _FP_W_TYPE_SIZE*2; \
  141. } \
  142. else \
  143. { \
  144. __FP_CLZ(R,X##_f[0]); \
  145. R += _FP_W_TYPE_SIZE*3; \
  146. } \
  147. } while(0)
  148. #define _FP_UNPACK_RAW_4(fs, X, val) \
  149. do { \
  150. union _FP_UNION_##fs _flo; _flo.flt = (val); \
  151. X##_f[0] = _flo.bits.frac0; \
  152. X##_f[1] = _flo.bits.frac1; \
  153. X##_f[2] = _flo.bits.frac2; \
  154. X##_f[3] = _flo.bits.frac3; \
  155. X##_e = _flo.bits.exp; \
  156. X##_s = _flo.bits.sign; \
  157. } while (0)
  158. #define _FP_PACK_RAW_4(fs, val, X) \
  159. do { \
  160. union _FP_UNION_##fs _flo; \
  161. _flo.bits.frac0 = X##_f[0]; \
  162. _flo.bits.frac1 = X##_f[1]; \
  163. _flo.bits.frac2 = X##_f[2]; \
  164. _flo.bits.frac3 = X##_f[3]; \
  165. _flo.bits.exp = X##_e; \
  166. _flo.bits.sign = X##_s; \
  167. (val) = _flo.flt; \
  168. } while (0)
  169. /*
  170. * Internals
  171. */
  172. #define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \
  173. (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
  174. #ifndef __FP_FRAC_ADD_4
  175. #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \
  176. (r0 = x0 + y0, \
  177. r1 = x1 + y1 + (r0 < x0), \
  178. r2 = x2 + y2 + (r1 < x1), \
  179. r3 = x3 + y3 + (r2 < x2))
  180. #endif
  181. #ifndef __FP_FRAC_SUB_4
  182. #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \
  183. (r0 = x0 - y0, \
  184. r1 = x1 - y1 - (r0 > x0), \
  185. r2 = x2 - y2 - (r1 > x1), \
  186. r3 = x3 - y3 - (r2 > x2))
  187. #endif
  188. #ifndef __FP_FRAC_ADDI_4
  189. /* I always wanted to be a lisp programmer :-> */
  190. #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \
  191. (x3 += ((x2 += ((x1 += ((x0 += i) < x0)) < x1) < x2)))
  192. #endif
  193. /* Convert FP values between word sizes. This appears to be more
  194. * complicated than I'd have expected it to be, so these might be
  195. * wrong... These macros are in any case somewhat bogus because they
  196. * use information about what various FRAC_n variables look like
  197. * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
  198. * the ones in op-2.h and op-1.h.
  199. */
  200. #define _FP_FRAC_CONV_1_4(dfs, sfs, D, S) \
  201. do { \
  202. _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
  203. _FP_WFRACBITS_##sfs); \
  204. D##_f = S##_f[0]; \
  205. } while (0)
  206. #define _FP_FRAC_CONV_2_4(dfs, sfs, D, S) \
  207. do { \
  208. _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
  209. _FP_WFRACBITS_##sfs); \
  210. D##_f0 = S##_f[0]; \
  211. D##_f1 = S##_f[1]; \
  212. } while (0)
  213. /* Assembly/disassembly for converting to/from integral types.
  214. * No shifting or overflow handled here.
  215. */
  216. /* Put the FP value X into r, which is an integer of size rsize. */
  217. #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
  218. do { \
  219. if (rsize <= _FP_W_TYPE_SIZE) \
  220. r = X##_f[0]; \
  221. else if (rsize <= 2*_FP_W_TYPE_SIZE) \
  222. { \
  223. r = X##_f[1]; \
  224. r <<= _FP_W_TYPE_SIZE; \
  225. r += X##_f[0]; \
  226. } \
  227. else \
  228. { \
  229. /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
  230. /* and int == 4words as a single case. */ \
  231. r = X##_f[3]; \
  232. r <<= _FP_W_TYPE_SIZE; \
  233. r += X##_f[2]; \
  234. r <<= _FP_W_TYPE_SIZE; \
  235. r += X##_f[1]; \
  236. r <<= _FP_W_TYPE_SIZE; \
  237. r += X##_f[0]; \
  238. } \
  239. } while (0)
  240. /* "No disassemble Number Five!" */
  241. /* move an integer of size rsize into X's fractional part. We rely on
  242. * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
  243. * having to mask the values we store into it.
  244. */
  245. #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
  246. do { \
  247. X##_f[0] = r; \
  248. X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
  249. X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
  250. X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
  251. } while (0)
  252. #define _FP_FRAC_CONV_4_1(dfs, sfs, D, S) \
  253. do { \
  254. D##_f[0] = S##_f; \
  255. D##_f[1] = D##_f[2] = D##_f[3] = 0; \
  256. _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
  257. } while (0)
  258. #define _FP_FRAC_CONV_4_2(dfs, sfs, D, S) \
  259. do { \
  260. D##_f[0] = S##_f0; \
  261. D##_f[1] = S##_f1; \
  262. D##_f[2] = D##_f[3] = 0; \
  263. _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
  264. } while (0)
  265. /* FIXME! This has to be written */
  266. #define _FP_SQRT_MEAT_4(R, S, T, X, q)