diff options
author | Kevin B Kenny <kennykb@acm.org> | 2005-01-19 22:41:26 (GMT) |
---|---|---|
committer | Kevin B Kenny <kennykb@acm.org> | 2005-01-19 22:41:26 (GMT) |
commit | b9cf65a08e6a59e434685e894e3189c201ac6791 (patch) | |
tree | 1f0868ef44c9f17d83d10dc94343df7b8cfe1842 /libtommath/bn_fast_s_mp_sqr.c | |
parent | c6a259aeeca4814a97cf6694814c63e74e4e18fa (diff) | |
download | tcl-b9cf65a08e6a59e434685e894e3189c201ac6791.zip tcl-b9cf65a08e6a59e434685e894e3189c201ac6791.tar.gz tcl-b9cf65a08e6a59e434685e894e3189c201ac6791.tar.bz2 |
Import of libtommath 0.33
Diffstat (limited to 'libtommath/bn_fast_s_mp_sqr.c')
-rw-r--r-- | libtommath/bn_fast_s_mp_sqr.c | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/libtommath/bn_fast_s_mp_sqr.c b/libtommath/bn_fast_s_mp_sqr.c new file mode 100644 index 0000000..d6014ab --- /dev/null +++ b/libtommath/bn_fast_s_mp_sqr.c @@ -0,0 +1,129 @@ +#include <tommath.h> +#ifdef BN_FAST_S_MP_SQR_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org + */ + +/* fast squaring + * + * This is the comba method where the columns of the product + * are computed first then the carries are computed. This + * has the effect of making a very simple inner loop that + * is executed the most + * + * W2 represents the outer products and W the inner. + * + * A further optimizations is made because the inner + * products are of the form "A * B * 2". The *2 part does + * not need to be computed until the end which is good + * because 64-bit shifts are slow! + * + * Based on Algorithm 14.16 on pp.597 of HAC. + * + */ +/* the jist of squaring... + +you do like mult except the offset of the tmpx [one that starts closer to zero] +can't equal the offset of tmpy. So basically you set up iy like before then you min it with +(ty-tx) so that it never happens. You double all those you add in the inner loop + +After that loop you do the squares and add them in. + +Remove W2 and don't memset W + +*/ + +int fast_s_mp_sqr (mp_int * a, mp_int * b) +{ + int olduse, res, pa, ix, iz; + mp_digit W[MP_WARRAY], *tmpx; + mp_word W1; + + /* grow the destination as required */ + pa = a->used + a->used; + if (b->alloc < pa) { + if ((res = mp_grow (b, pa)) != MP_OKAY) { + return res; + } + } + + /* number of output digits to produce */ + W1 = 0; + for (ix = 0; ix < pa; ix++) { + int tx, ty, iy; + mp_word _W; + mp_digit *tmpy; + + /* clear counter */ + _W = 0; + + /* get offsets into the two bignums */ + ty = MIN(a->used-1, ix); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = a->dp + tx; + tmpy = a->dp + ty; + + /* this is the number of times the loop will iterrate, essentially its + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(a->used-tx, ty+1); + + /* now for squaring tx can never equal ty + * we halve the distance since they approach at a rate of 2x + * and we have to round because odd cases need to be executed + */ + iy = MIN(iy, (ty-tx+1)>>1); + + /* execute loop */ + for (iz = 0; iz < iy; iz++) { + _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); + } + + /* double the inner product and add carry */ + _W = _W + _W + W1; + + /* even columns have the square term in them */ + if ((ix&1) == 0) { + _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]); + } + + /* store it */ + W[ix] = _W; + + /* make next carry */ + W1 = _W >> ((mp_word)DIGIT_BIT); + } + + /* setup dest */ + olduse = b->used; + b->used = a->used+a->used; + + { + mp_digit *tmpb; + tmpb = b->dp; + for (ix = 0; ix < pa; ix++) { + *tmpb++ = W[ix] & MP_MASK; + } + + /* clear unused digits [that existed in the old copy of c] */ + for (; ix < olduse; ix++) { + *tmpb++ = 0; + } + } + mp_clamp (b); + return MP_OKAY; +} +#endif |