op-1.h 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. /*
  2. * Basic one-word fraction declaration and manipulation.
  3. */
  4. #define _FP_FRAC_DECL_1(X) _FP_W_TYPE X##_f
  5. #define _FP_FRAC_COPY_1(D,S) (D##_f = S##_f)
  6. #define _FP_FRAC_SET_1(X,I) (X##_f = I)
  7. #define _FP_FRAC_HIGH_1(X) (X##_f)
  8. #define _FP_FRAC_LOW_1(X) (X##_f)
  9. #define _FP_FRAC_WORD_1(X,w) (X##_f)
  10. #define _FP_FRAC_ADDI_1(X,I) (X##_f += I)
  11. #define _FP_FRAC_SLL_1(X,N) \
  12. do { \
  13. if (__builtin_constant_p(N) && (N) == 1) \
  14. X##_f += X##_f; \
  15. else \
  16. X##_f <<= (N); \
  17. } while (0)
  18. #define _FP_FRAC_SRL_1(X,N) (X##_f >>= N)
  19. /* Right shift with sticky-lsb. */
  20. #define _FP_FRAC_SRS_1(X,N,sz) __FP_FRAC_SRS_1(X##_f, N, sz)
  21. #define __FP_FRAC_SRS_1(X,N,sz) \
  22. (X = (X >> (N) | (__builtin_constant_p(N) && (N) == 1 \
  23. ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
  24. #define _FP_FRAC_ADD_1(R,X,Y) (R##_f = X##_f + Y##_f)
  25. #define _FP_FRAC_SUB_1(R,X,Y) (R##_f = X##_f - Y##_f)
  26. #define _FP_FRAC_CLZ_1(z, X) __FP_CLZ(z, X##_f)
  27. /* Predicates */
  28. #define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE)X##_f < 0)
  29. #define _FP_FRAC_ZEROP_1(X) (X##_f == 0)
  30. #define _FP_FRAC_OVERP_1(fs,X) (X##_f & _FP_OVERFLOW_##fs)
  31. #define _FP_FRAC_EQ_1(X, Y) (X##_f == Y##_f)
  32. #define _FP_FRAC_GE_1(X, Y) (X##_f >= Y##_f)
  33. #define _FP_FRAC_GT_1(X, Y) (X##_f > Y##_f)
  34. #define _FP_ZEROFRAC_1 0
  35. #define _FP_MINFRAC_1 1
  36. /*
  37. * Unpack the raw bits of a native fp value. Do not classify or
  38. * normalize the data.
  39. */
  40. #define _FP_UNPACK_RAW_1(fs, X, val) \
  41. do { \
  42. union _FP_UNION_##fs _flo; _flo.flt = (val); \
  43. \
  44. X##_f = _flo.bits.frac; \
  45. X##_e = _flo.bits.exp; \
  46. X##_s = _flo.bits.sign; \
  47. } while (0)
  48. /*
  49. * Repack the raw bits of a native fp value.
  50. */
  51. #define _FP_PACK_RAW_1(fs, val, X) \
  52. do { \
  53. union _FP_UNION_##fs _flo; \
  54. \
  55. _flo.bits.frac = X##_f; \
  56. _flo.bits.exp = X##_e; \
  57. _flo.bits.sign = X##_s; \
  58. \
  59. (val) = _flo.flt; \
  60. } while (0)
  61. /*
  62. * Multiplication algorithms:
  63. */
  64. /* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
  65. multiplication immediately. */
  66. #define _FP_MUL_MEAT_1_imm(fs, R, X, Y) \
  67. do { \
  68. R##_f = X##_f * Y##_f; \
  69. /* Normalize since we know where the msb of the multiplicands \
  70. were (bit B), we know that the msb of the of the product is \
  71. at either 2B or 2B-1. */ \
  72. _FP_FRAC_SRS_1(R, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \
  73. } while (0)
  74. /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
  75. #define _FP_MUL_MEAT_1_wide(fs, R, X, Y, doit) \
  76. do { \
  77. _FP_W_TYPE _Z_f0, _Z_f1; \
  78. doit(_Z_f1, _Z_f0, X##_f, Y##_f); \
  79. /* Normalize since we know where the msb of the multiplicands \
  80. were (bit B), we know that the msb of the of the product is \
  81. at either 2B or 2B-1. */ \
  82. _FP_FRAC_SRS_2(_Z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \
  83. R##_f = _Z_f0; \
  84. } while (0)
  85. /* Finally, a simple widening multiply algorithm. What fun! */
  86. #define _FP_MUL_MEAT_1_hard(fs, R, X, Y) \
  87. do { \
  88. _FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1; \
  89. \
  90. /* split the words in half */ \
  91. _xh = X##_f >> (_FP_W_TYPE_SIZE/2); \
  92. _xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \
  93. _yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \
  94. _yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \
  95. \
  96. /* multiply the pieces */ \
  97. _z_f0 = _xl * _yl; \
  98. _a_f0 = _xh * _yl; \
  99. _a_f1 = _xl * _yh; \
  100. _z_f1 = _xh * _yh; \
  101. \
  102. /* reassemble into two full words */ \
  103. if ((_a_f0 += _a_f1) < _a_f1) \
  104. _z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2); \
  105. _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2); \
  106. _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2); \
  107. _FP_FRAC_ADD_2(_z, _z, _a); \
  108. \
  109. /* normalize */ \
  110. _FP_FRAC_SRS_2(_z, _FP_WFRACBITS_##fs - 1, 2*_FP_WFRACBITS_##fs); \
  111. R##_f = _z_f0; \
  112. } while (0)
  113. /*
  114. * Division algorithms:
  115. */
  116. /* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
  117. division immediately. Give this macro either _FP_DIV_HELP_imm for
  118. C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you
  119. choose will depend on what the compiler does with divrem4. */
  120. #define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \
  121. do { \
  122. _FP_W_TYPE _q, _r; \
  123. X##_f <<= (X##_f < Y##_f \
  124. ? R##_e--, _FP_WFRACBITS_##fs \
  125. : _FP_WFRACBITS_##fs - 1); \
  126. doit(_q, _r, X##_f, Y##_f); \
  127. R##_f = _q | (_r != 0); \
  128. } while (0)
  129. /* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
  130. that may be useful in this situation. This first is for a primitive
  131. that requires normalization, the second for one that does not. Look
  132. for UDIV_NEEDS_NORMALIZATION to tell which your machine needs. */
  133. #define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \
  134. do { \
  135. _FP_W_TYPE _nh, _nl, _q, _r; \
  136. \
  137. /* Normalize Y -- i.e. make the most significant bit set. */ \
  138. Y##_f <<= _FP_WFRACXBITS_##fs - 1; \
  139. \
  140. /* Shift X op correspondingly high, that is, up one full word. */ \
  141. if (X##_f <= Y##_f) \
  142. { \
  143. _nl = 0; \
  144. _nh = X##_f; \
  145. } \
  146. else \
  147. { \
  148. R##_e++; \
  149. _nl = X##_f << (_FP_W_TYPE_SIZE-1); \
  150. _nh = X##_f >> 1; \
  151. } \
  152. \
  153. udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \
  154. R##_f = _q | (_r != 0); \
  155. } while (0)
  156. #define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \
  157. do { \
  158. _FP_W_TYPE _nh, _nl, _q, _r; \
  159. if (X##_f < Y##_f) \
  160. { \
  161. R##_e--; \
  162. _nl = X##_f << _FP_WFRACBITS_##fs; \
  163. _nh = X##_f >> _FP_WFRACXBITS_##fs; \
  164. } \
  165. else \
  166. { \
  167. _nl = X##_f << (_FP_WFRACBITS_##fs - 1); \
  168. _nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \
  169. } \
  170. udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \
  171. R##_f = _q | (_r != 0); \
  172. } while (0)
  173. /*
  174. * Square root algorithms:
  175. * We have just one right now, maybe Newton approximation
  176. * should be added for those machines where division is fast.
  177. */
  178. #define _FP_SQRT_MEAT_1(R, S, T, X, q) \
  179. do { \
  180. while (q) \
  181. { \
  182. T##_f = S##_f + q; \
  183. if (T##_f <= X##_f) \
  184. { \
  185. S##_f = T##_f + q; \
  186. X##_f -= T##_f; \
  187. R##_f += q; \
  188. } \
  189. _FP_FRAC_SLL_1(X, 1); \
  190. q >>= 1; \
  191. } \
  192. } while (0)
  193. /*
  194. * Assembly/disassembly for converting to/from integral types.
  195. * No shifting or overflow handled here.
  196. */
  197. #define _FP_FRAC_ASSEMBLE_1(r, X, rsize) (r = X##_f)
  198. #define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = r)
  199. /*
  200. * Convert FP values between word sizes
  201. */
  202. #define _FP_FRAC_CONV_1_1(dfs, sfs, D, S) \
  203. do { \
  204. D##_f = S##_f; \
  205. if (_FP_WFRACBITS_##sfs > _FP_WFRACBITS_##dfs) \
  206. _FP_FRAC_SRS_1(D, (_FP_WFRACBITS_##sfs-_FP_WFRACBITS_##dfs), \
  207. _FP_WFRACBITS_##sfs); \
  208. else \
  209. D##_f <<= _FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs; \
  210. } while (0)