diff options
author | Tim Peters <tim.peters@gmail.com> | 2003-02-02 07:51:32 (GMT) |
---|---|---|
committer | Tim Peters <tim.peters@gmail.com> | 2003-02-02 07:51:32 (GMT) |
commit | bf2674be0e95787cdeb154091b7377e30b2827bf (patch) | |
tree | 05ccb3b212ff3027587b948678959f7b6ca61546 | |
parent | 06dd8cf5e4100303714f9e3f72b3c52330fdd51c (diff) | |
download | cpython-bf2674be0e95787cdeb154091b7377e30b2827bf.zip cpython-bf2674be0e95787cdeb154091b7377e30b2827bf.tar.gz cpython-bf2674be0e95787cdeb154091b7377e30b2827bf.tar.bz2 |
long(string, base) now takes time linear in len(string) when base is a
power of 2. Enabled the tail end of test_long() in pickletester.py
because it no longer takes forever when run from test_pickle.py.
-rw-r--r-- | Lib/pickle.py | 5 | ||||
-rw-r--r-- | Lib/test/pickletester.py | 11 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Objects/longobject.c | 123 |
4 files changed, 113 insertions, 29 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py index ba0e38b..4e80cca 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -1427,9 +1427,6 @@ def encode_long(x): binary = _binascii.unhexlify(ashex) return binary[::-1] -# XXX OOPS! This is still quadratic-time. While hex(n) is linear-time -# XXX in the # of digits in n, long(s, 16) is still quadratic-time -# XXX in len(s). def decode_long(data): r"""Decode a long from a two's complement little-endian binary string. @@ -1453,7 +1450,7 @@ def decode_long(data): if nbytes == 0: return 0L ashex = _binascii.hexlify(data[::-1]) - n = long(ashex, 16) # quadratic time + n = long(ashex, 16) # quadratic time before Python 2.3; linear now if data[-1] >= '\x80': n -= 1L << (nbytes * 8) return n diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 6615307..2a1ca17 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -247,7 +247,7 @@ class AbstractPickleTests(unittest.TestCase): def test_long(self): for proto in protocols: - # 256 bytes is where LONG4 begins + # 256 bytes is where LONG4 begins. for nbits in 1, 8, 8*254, 8*255, 8*256, 8*257: nbase = 1L << nbits for npos in nbase-1, nbase, nbase+1: @@ -257,20 +257,11 @@ class AbstractPickleTests(unittest.TestCase): self.assertEqual(n, got) # Try a monster. This is quadratic-time in protos 0 & 1, so don't # bother with those. - # XXX Damn. pickle.py is still quadratic-time here, due to - # XXX long(string, 16). cPickle runs this in an eyeblink, but I - # XXX gave up waiting for pickle.py to get beyond "loading". Giving - # XXX up for now. - return - print "building long" nbase = long("deadbeeffeedface", 16) nbase += nbase << 1000000 for n in nbase, -nbase: - print "dumping" p = self.dumps(n, 2) - print "loading" got = self.loads(p) - print "checking" self.assertEqual(n, got) def test_reduce(self): @@ -12,6 +12,9 @@ What's New in Python 2.3 alpha 2? Core and builtins ----------------- +- long(string, base) takes time linear in len(string) when base is a power + of 2 now. It used to take time quadratic in len(string). + - filter returns now Unicode results for Unicode arguments. - raw_input can now return Unicode objects. diff --git a/Objects/longobject.c b/Objects/longobject.c index 9d7243f..7ca8244 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1090,6 +1090,95 @@ long_format(PyObject *aa, int base, int addL) return (PyObject *)str; } +/* *str points to the first digit in a string of base base digits. base + * is a power of 2 (2, 4, 8, 16, or 32). *str is set to point to the first + * non-digit (which may be *str!). A normalized long is returned. + * The point to this routine is that it takes time linear in the number of + * string characters. + */ +static PyLongObject * +long_from_binary_base(char **str, int base) +{ + char *p = *str; + char *start = p; + int bits_per_char; + int n; + PyLongObject *z; + twodigits accum; + int bits_in_accum; + digit *pdigit; + + assert(base >= 2 && base <= 32 && (base & (base - 1)) == 0); + n = base; + for (bits_per_char = -1; n; ++bits_per_char) + n >>= 1; + /* n <- total # of bits needed, while setting p to end-of-string */ + n = 0; + for (;;) { + int k = -1; + char ch = *p; + + if (ch <= '9') + k = ch - '0'; + else if (ch >= 'a') + k = ch - 'a' + 10; + else if (ch >= 'A') + k = ch - 'A' + 10; + if (k < 0 || k >= base) + break; + n += bits_per_char; + if (n < 0) { + PyErr_SetString(PyExc_ValueError, + "long string too large to convert"); + return NULL; + } + ++p; + } + *str = p; + /* n <- # of Python digits needed, = ceiling(n/SHIFT). */ + n = (n + SHIFT - 1) / SHIFT; + z = _PyLong_New(n); + if (z == NULL) + return NULL; + /* Read string from right, and fill in long from left; i.e., + * from least to most significant in both. + */ + accum = 0; + bits_in_accum = 0; + pdigit = z->ob_digit; + while (--p >= start) { + unsigned char ch = (unsigned char)*p; + digit k; + + if (ch <= '9') + k = ch - '0'; + else if (ch >= 'a') + k = ch - 'a' + 10; + else { + assert(ch >= 'A'); + k = ch - 'A' + 10; + } + assert(k >= 0 && k <= base); + accum |= k << bits_in_accum; + bits_in_accum += bits_per_char; + if (bits_in_accum >= SHIFT) { + *pdigit++ = (digit)(accum & MASK); + assert(pdigit - z->ob_digit <= n); + accum >>= SHIFT; + bits_in_accum -= SHIFT; + assert(bits_in_accum < SHIFT); + } + } + if (bits_in_accum) { + assert(bits_in_accum <= SHIFT); + *pdigit++ = (digit)accum; + assert(pdigit - z->ob_digit <= n); + } + while (pdigit - z->ob_digit < n) + *pdigit++ = 0; + return long_normalize(z); +} + PyObject * PyLong_FromString(char *str, char **pend, int base) { @@ -1122,23 +1211,27 @@ PyLong_FromString(char *str, char **pend, int base) } if (base == 16 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) str += 2; - z = _PyLong_New(0); start = str; - for ( ; z != NULL; ++str) { - int k = -1; - PyLongObject *temp; + if ((base & (base - 1)) == 0) + z = long_from_binary_base(&str, base); + else { + z = _PyLong_New(0); + for ( ; z != NULL; ++str) { + int k = -1; + PyLongObject *temp; - if (*str <= '9') - k = *str - '0'; - else if (*str >= 'a') - k = *str - 'a' + 10; - else if (*str >= 'A') - k = *str - 'A' + 10; - if (k < 0 || k >= base) - break; - temp = muladd1(z, (digit)base, (digit)k); - Py_DECREF(z); - z = temp; + if (*str <= '9') + k = *str - '0'; + else if (*str >= 'a') + k = *str - 'a' + 10; + else if (*str >= 'A') + k = *str - 'A' + 10; + if (k < 0 || k >= base) + break; + temp = muladd1(z, (digit)base, (digit)k); + Py_DECREF(z); + z = temp; + } } if (z == NULL) return NULL; |