|
@@ -37,159 +37,6 @@
|
|
|
#define UDIV_TIME UMUL_TIME
|
|
|
#endif
|
|
|
|
|
|
-/* FIXME: We should be using invert_limb (or invert_normalized_limb)
|
|
|
- * here (not udiv_qrnnd).
|
|
|
- */
|
|
|
-
|
|
|
-mpi_limb_t
|
|
|
-mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
|
|
|
- mpi_limb_t divisor_limb)
|
|
|
-{
|
|
|
- mpi_size_t i;
|
|
|
- mpi_limb_t n1, n0, r;
|
|
|
- int dummy;
|
|
|
-
|
|
|
- /* Botch: Should this be handled at all? Rely on callers? */
|
|
|
- if (!dividend_size)
|
|
|
- return 0;
|
|
|
-
|
|
|
- /* If multiplication is much faster than division, and the
|
|
|
- * dividend is large, pre-invert the divisor, and use
|
|
|
- * only multiplications in the inner loop.
|
|
|
- *
|
|
|
- * This test should be read:
|
|
|
- * Does it ever help to use udiv_qrnnd_preinv?
|
|
|
- * && Does what we save compensate for the inversion overhead?
|
|
|
- */
|
|
|
- if (UDIV_TIME > (2 * UMUL_TIME + 6)
|
|
|
- && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
|
|
|
- int normalization_steps;
|
|
|
-
|
|
|
- count_leading_zeros(normalization_steps, divisor_limb);
|
|
|
- if (normalization_steps) {
|
|
|
- mpi_limb_t divisor_limb_inverted;
|
|
|
-
|
|
|
- divisor_limb <<= normalization_steps;
|
|
|
-
|
|
|
- /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
|
|
|
- * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
|
|
|
- * most significant bit (with weight 2**N) implicit.
|
|
|
- *
|
|
|
- * Special case for DIVISOR_LIMB == 100...000.
|
|
|
- */
|
|
|
- if (!(divisor_limb << 1))
|
|
|
- divisor_limb_inverted = ~(mpi_limb_t) 0;
|
|
|
- else
|
|
|
- udiv_qrnnd(divisor_limb_inverted, dummy,
|
|
|
- -divisor_limb, 0, divisor_limb);
|
|
|
-
|
|
|
- n1 = dividend_ptr[dividend_size - 1];
|
|
|
- r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
|
|
|
-
|
|
|
- /* Possible optimization:
|
|
|
- * if (r == 0
|
|
|
- * && divisor_limb > ((n1 << normalization_steps)
|
|
|
- * | (dividend_ptr[dividend_size - 2] >> ...)))
|
|
|
- * ...one division less...
|
|
|
- */
|
|
|
- for (i = dividend_size - 2; i >= 0; i--) {
|
|
|
- n0 = dividend_ptr[i];
|
|
|
- UDIV_QRNND_PREINV(dummy, r, r,
|
|
|
- ((n1 << normalization_steps)
|
|
|
- | (n0 >>
|
|
|
- (BITS_PER_MPI_LIMB -
|
|
|
- normalization_steps))),
|
|
|
- divisor_limb,
|
|
|
- divisor_limb_inverted);
|
|
|
- n1 = n0;
|
|
|
- }
|
|
|
- UDIV_QRNND_PREINV(dummy, r, r,
|
|
|
- n1 << normalization_steps,
|
|
|
- divisor_limb, divisor_limb_inverted);
|
|
|
- return r >> normalization_steps;
|
|
|
- } else {
|
|
|
- mpi_limb_t divisor_limb_inverted;
|
|
|
-
|
|
|
- /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
|
|
|
- * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
|
|
|
- * most significant bit (with weight 2**N) implicit.
|
|
|
- *
|
|
|
- * Special case for DIVISOR_LIMB == 100...000.
|
|
|
- */
|
|
|
- if (!(divisor_limb << 1))
|
|
|
- divisor_limb_inverted = ~(mpi_limb_t) 0;
|
|
|
- else
|
|
|
- udiv_qrnnd(divisor_limb_inverted, dummy,
|
|
|
- -divisor_limb, 0, divisor_limb);
|
|
|
-
|
|
|
- i = dividend_size - 1;
|
|
|
- r = dividend_ptr[i];
|
|
|
-
|
|
|
- if (r >= divisor_limb)
|
|
|
- r = 0;
|
|
|
- else
|
|
|
- i--;
|
|
|
-
|
|
|
- for (; i >= 0; i--) {
|
|
|
- n0 = dividend_ptr[i];
|
|
|
- UDIV_QRNND_PREINV(dummy, r, r,
|
|
|
- n0, divisor_limb,
|
|
|
- divisor_limb_inverted);
|
|
|
- }
|
|
|
- return r;
|
|
|
- }
|
|
|
- } else {
|
|
|
- if (UDIV_NEEDS_NORMALIZATION) {
|
|
|
- int normalization_steps;
|
|
|
-
|
|
|
- count_leading_zeros(normalization_steps, divisor_limb);
|
|
|
- if (normalization_steps) {
|
|
|
- divisor_limb <<= normalization_steps;
|
|
|
-
|
|
|
- n1 = dividend_ptr[dividend_size - 1];
|
|
|
- r = n1 >> (BITS_PER_MPI_LIMB -
|
|
|
- normalization_steps);
|
|
|
-
|
|
|
- /* Possible optimization:
|
|
|
- * if (r == 0
|
|
|
- * && divisor_limb > ((n1 << normalization_steps)
|
|
|
- * | (dividend_ptr[dividend_size - 2] >> ...)))
|
|
|
- * ...one division less...
|
|
|
- */
|
|
|
- for (i = dividend_size - 2; i >= 0; i--) {
|
|
|
- n0 = dividend_ptr[i];
|
|
|
- udiv_qrnnd(dummy, r, r,
|
|
|
- ((n1 << normalization_steps)
|
|
|
- | (n0 >>
|
|
|
- (BITS_PER_MPI_LIMB -
|
|
|
- normalization_steps))),
|
|
|
- divisor_limb);
|
|
|
- n1 = n0;
|
|
|
- }
|
|
|
- udiv_qrnnd(dummy, r, r,
|
|
|
- n1 << normalization_steps,
|
|
|
- divisor_limb);
|
|
|
- return r >> normalization_steps;
|
|
|
- }
|
|
|
- }
|
|
|
- /* No normalization needed, either because udiv_qrnnd doesn't require
|
|
|
- * it, or because DIVISOR_LIMB is already normalized. */
|
|
|
- i = dividend_size - 1;
|
|
|
- r = dividend_ptr[i];
|
|
|
-
|
|
|
- if (r >= divisor_limb)
|
|
|
- r = 0;
|
|
|
- else
|
|
|
- i--;
|
|
|
-
|
|
|
- for (; i >= 0; i--) {
|
|
|
- n0 = dividend_ptr[i];
|
|
|
- udiv_qrnnd(dummy, r, r, n0, divisor_limb);
|
|
|
- }
|
|
|
- return r;
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
|
|
|
* the NSIZE-DSIZE least significant quotient limbs at QP
|
|
|
* and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
|
|
@@ -387,159 +234,3 @@ q_test:
|
|
|
|
|
|
return most_significant_q_limb;
|
|
|
}
|
|
|
-
|
|
|
-/****************
|
|
|
- * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
|
|
|
- * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
|
|
|
- * Return the single-limb remainder.
|
|
|
- * There are no constraints on the value of the divisor.
|
|
|
- *
|
|
|
- * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
|
|
|
- */
|
|
|
-
|
|
|
-mpi_limb_t
|
|
|
-mpihelp_divmod_1(mpi_ptr_t quot_ptr,
|
|
|
- mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
|
|
|
- mpi_limb_t divisor_limb)
|
|
|
-{
|
|
|
- mpi_size_t i;
|
|
|
- mpi_limb_t n1, n0, r;
|
|
|
- int dummy;
|
|
|
-
|
|
|
- if (!dividend_size)
|
|
|
- return 0;
|
|
|
-
|
|
|
- /* If multiplication is much faster than division, and the
|
|
|
- * dividend is large, pre-invert the divisor, and use
|
|
|
- * only multiplications in the inner loop.
|
|
|
- *
|
|
|
- * This test should be read:
|
|
|
- * Does it ever help to use udiv_qrnnd_preinv?
|
|
|
- * && Does what we save compensate for the inversion overhead?
|
|
|
- */
|
|
|
- if (UDIV_TIME > (2 * UMUL_TIME + 6)
|
|
|
- && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
|
|
|
- int normalization_steps;
|
|
|
-
|
|
|
- count_leading_zeros(normalization_steps, divisor_limb);
|
|
|
- if (normalization_steps) {
|
|
|
- mpi_limb_t divisor_limb_inverted;
|
|
|
-
|
|
|
- divisor_limb <<= normalization_steps;
|
|
|
-
|
|
|
- /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
|
|
|
- * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
|
|
|
- * most significant bit (with weight 2**N) implicit.
|
|
|
- */
|
|
|
- /* Special case for DIVISOR_LIMB == 100...000. */
|
|
|
- if (!(divisor_limb << 1))
|
|
|
- divisor_limb_inverted = ~(mpi_limb_t) 0;
|
|
|
- else
|
|
|
- udiv_qrnnd(divisor_limb_inverted, dummy,
|
|
|
- -divisor_limb, 0, divisor_limb);
|
|
|
-
|
|
|
- n1 = dividend_ptr[dividend_size - 1];
|
|
|
- r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
|
|
|
-
|
|
|
- /* Possible optimization:
|
|
|
- * if (r == 0
|
|
|
- * && divisor_limb > ((n1 << normalization_steps)
|
|
|
- * | (dividend_ptr[dividend_size - 2] >> ...)))
|
|
|
- * ...one division less...
|
|
|
- */
|
|
|
- for (i = dividend_size - 2; i >= 0; i--) {
|
|
|
- n0 = dividend_ptr[i];
|
|
|
- UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r,
|
|
|
- ((n1 << normalization_steps)
|
|
|
- | (n0 >>
|
|
|
- (BITS_PER_MPI_LIMB -
|
|
|
- normalization_steps))),
|
|
|
- divisor_limb,
|
|
|
- divisor_limb_inverted);
|
|
|
- n1 = n0;
|
|
|
- }
|
|
|
- UDIV_QRNND_PREINV(quot_ptr[0], r, r,
|
|
|
- n1 << normalization_steps,
|
|
|
- divisor_limb, divisor_limb_inverted);
|
|
|
- return r >> normalization_steps;
|
|
|
- } else {
|
|
|
- mpi_limb_t divisor_limb_inverted;
|
|
|
-
|
|
|
- /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
|
|
|
- * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
|
|
|
- * most significant bit (with weight 2**N) implicit.
|
|
|
- */
|
|
|
- /* Special case for DIVISOR_LIMB == 100...000. */
|
|
|
- if (!(divisor_limb << 1))
|
|
|
- divisor_limb_inverted = ~(mpi_limb_t) 0;
|
|
|
- else
|
|
|
- udiv_qrnnd(divisor_limb_inverted, dummy,
|
|
|
- -divisor_limb, 0, divisor_limb);
|
|
|
-
|
|
|
- i = dividend_size - 1;
|
|
|
- r = dividend_ptr[i];
|
|
|
-
|
|
|
- if (r >= divisor_limb)
|
|
|
- r = 0;
|
|
|
- else
|
|
|
- quot_ptr[i--] = 0;
|
|
|
-
|
|
|
- for (; i >= 0; i--) {
|
|
|
- n0 = dividend_ptr[i];
|
|
|
- UDIV_QRNND_PREINV(quot_ptr[i], r, r,
|
|
|
- n0, divisor_limb,
|
|
|
- divisor_limb_inverted);
|
|
|
- }
|
|
|
- return r;
|
|
|
- }
|
|
|
- } else {
|
|
|
- if (UDIV_NEEDS_NORMALIZATION) {
|
|
|
- int normalization_steps;
|
|
|
-
|
|
|
- count_leading_zeros(normalization_steps, divisor_limb);
|
|
|
- if (normalization_steps) {
|
|
|
- divisor_limb <<= normalization_steps;
|
|
|
-
|
|
|
- n1 = dividend_ptr[dividend_size - 1];
|
|
|
- r = n1 >> (BITS_PER_MPI_LIMB -
|
|
|
- normalization_steps);
|
|
|
-
|
|
|
- /* Possible optimization:
|
|
|
- * if (r == 0
|
|
|
- * && divisor_limb > ((n1 << normalization_steps)
|
|
|
- * | (dividend_ptr[dividend_size - 2] >> ...)))
|
|
|
- * ...one division less...
|
|
|
- */
|
|
|
- for (i = dividend_size - 2; i >= 0; i--) {
|
|
|
- n0 = dividend_ptr[i];
|
|
|
- udiv_qrnnd(quot_ptr[i + 1], r, r,
|
|
|
- ((n1 << normalization_steps)
|
|
|
- | (n0 >>
|
|
|
- (BITS_PER_MPI_LIMB -
|
|
|
- normalization_steps))),
|
|
|
- divisor_limb);
|
|
|
- n1 = n0;
|
|
|
- }
|
|
|
- udiv_qrnnd(quot_ptr[0], r, r,
|
|
|
- n1 << normalization_steps,
|
|
|
- divisor_limb);
|
|
|
- return r >> normalization_steps;
|
|
|
- }
|
|
|
- }
|
|
|
- /* No normalization needed, either because udiv_qrnnd doesn't require
|
|
|
- * it, or because DIVISOR_LIMB is already normalized. */
|
|
|
- i = dividend_size - 1;
|
|
|
- r = dividend_ptr[i];
|
|
|
-
|
|
|
- if (r >= divisor_limb)
|
|
|
- r = 0;
|
|
|
- else
|
|
|
- quot_ptr[i--] = 0;
|
|
|
-
|
|
|
- for (; i >= 0; i--) {
|
|
|
- n0 = dividend_ptr[i];
|
|
|
- udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb);
|
|
|
- }
|
|
|
- return r;
|
|
|
- }
|
|
|
-}
|