diff options
author | Stefan Krah <skrah@bytereef.org> | 2012-03-21 17:25:23 (GMT) |
---|---|---|
committer | Stefan Krah <skrah@bytereef.org> | 2012-03-21 17:25:23 (GMT) |
commit | 1919b7e72bc43315b32f38a6f5f01e8c717907f4 (patch) | |
tree | ef7490b66425fd2c622740ef9adfb03d806b7517 /Modules/_decimal/libmpdec/convolute.c | |
parent | 8bfccd852e2bceb04664b4832fc80eb3b7584918 (diff) | |
download | cpython-1919b7e72bc43315b32f38a6f5f01e8c717907f4.zip cpython-1919b7e72bc43315b32f38a6f5f01e8c717907f4.tar.gz cpython-1919b7e72bc43315b32f38a6f5f01e8c717907f4.tar.bz2 |
Issue #7652: Integrate the decimal floating point libmpdec library to speed
up the decimal module. Performance gains of the new C implementation are
between 12x and 80x, depending on the application.
Diffstat (limited to 'Modules/_decimal/libmpdec/convolute.c')
-rw-r--r-- | Modules/_decimal/libmpdec/convolute.c | 174 |
1 files changed, 174 insertions, 0 deletions
diff --git a/Modules/_decimal/libmpdec/convolute.c b/Modules/_decimal/libmpdec/convolute.c new file mode 100644 index 0000000..b5fe131 --- /dev/null +++ b/Modules/_decimal/libmpdec/convolute.c @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2008-2012 Stefan Krah. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#include "mpdecimal.h" +#include <stdio.h> +#include "bits.h" +#include "constants.h" +#include "fnt.h" +#include "fourstep.h" +#include "numbertheory.h" +#include "sixstep.h" +#include "umodarith.h" +#include "convolute.h" + + +/* Bignum: Fast convolution using the Number Theoretic Transform. Used for + the multiplication of very large coefficients. */ + + +/* Convolute the data in c1 and c2. Result is in c1. */ +int +fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum) +{ + int (*fnt)(mpd_uint_t *, mpd_size_t, int); + int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int); +#ifdef PPRO + double dmod; + uint32_t dinvmod[3]; +#endif + mpd_uint_t n_inv, umod; + mpd_size_t i; + + + SETMODULUS(modnum); + n_inv = POWMOD(n, (umod-2)); + + if (ispower2(n)) { + if (n > SIX_STEP_THRESHOLD) { + fnt = six_step_fnt; + inv_fnt = inv_six_step_fnt; + } + else { + fnt = std_fnt; + inv_fnt = std_inv_fnt; + } + } + else { + fnt = four_step_fnt; + inv_fnt = inv_four_step_fnt; + } + + if (!fnt(c1, n, modnum)) { + return 0; + } + if (!fnt(c2, n, modnum)) { + return 0; + } + for (i = 0; i < n-1; i += 2) { + mpd_uint_t x0 = c1[i]; + mpd_uint_t y0 = c2[i]; + mpd_uint_t x1 = c1[i+1]; + mpd_uint_t y1 = c2[i+1]; + MULMOD2(&x0, y0, &x1, y1); + c1[i] = x0; + c1[i+1] = x1; + } + + if (!inv_fnt(c1, n, modnum)) { + return 0; + } + for (i = 0; i < n-3; i += 4) { + mpd_uint_t x0 = c1[i]; + mpd_uint_t x1 = c1[i+1]; + mpd_uint_t x2 = c1[i+2]; + mpd_uint_t x3 = c1[i+3]; + MULMOD2C(&x0, &x1, n_inv); + MULMOD2C(&x2, &x3, n_inv); + c1[i] = x0; + c1[i+1] = x1; + c1[i+2] = x2; + c1[i+3] = x3; + } + + return 1; +} + +/* Autoconvolute the data in c1. Result is in c1. */ +int +fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum) +{ + int (*fnt)(mpd_uint_t *, mpd_size_t, int); + int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int); +#ifdef PPRO + double dmod; + uint32_t dinvmod[3]; +#endif + mpd_uint_t n_inv, umod; + mpd_size_t i; + + + SETMODULUS(modnum); + n_inv = POWMOD(n, (umod-2)); + + if (ispower2(n)) { + if (n > SIX_STEP_THRESHOLD) { + fnt = six_step_fnt; + inv_fnt = inv_six_step_fnt; + } + else { + fnt = std_fnt; + inv_fnt = std_inv_fnt; + } + } + else { + fnt = four_step_fnt; + inv_fnt = inv_four_step_fnt; + } + + if (!fnt(c1, n, modnum)) { + return 0; + } + for (i = 0; i < n-1; i += 2) { + mpd_uint_t x0 = c1[i]; + mpd_uint_t x1 = c1[i+1]; + MULMOD2(&x0, x0, &x1, x1); + c1[i] = x0; + c1[i+1] = x1; + } + + if (!inv_fnt(c1, n, modnum)) { + return 0; + } + for (i = 0; i < n-3; i += 4) { + mpd_uint_t x0 = c1[i]; + mpd_uint_t x1 = c1[i+1]; + mpd_uint_t x2 = c1[i+2]; + mpd_uint_t x3 = c1[i+3]; + MULMOD2C(&x0, &x1, n_inv); + MULMOD2C(&x2, &x3, n_inv); + c1[i] = x0; + c1[i+1] = x1; + c1[i+2] = x2; + c1[i+3] = x3; + } + + return 1; +} + + |