summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/pickle.py5
-rw-r--r--Lib/test/pickletester.py11
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/longobject.c123
4 files changed, 113 insertions, 29 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index ba0e38b..4e80cca 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -1427,9 +1427,6 @@ def encode_long(x):
binary = _binascii.unhexlify(ashex)
return binary[::-1]
-# XXX OOPS! This is still quadratic-time. While hex(n) is linear-time
-# XXX in the # of digits in n, long(s, 16) is still quadratic-time
-# XXX in len(s).
def decode_long(data):
r"""Decode a long from a two's complement little-endian binary string.
@@ -1453,7 +1450,7 @@ def decode_long(data):
if nbytes == 0:
return 0L
ashex = _binascii.hexlify(data[::-1])
- n = long(ashex, 16) # quadratic time
+ n = long(ashex, 16) # quadratic time before Python 2.3; linear now
if data[-1] >= '\x80':
n -= 1L << (nbytes * 8)
return n
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 6615307..2a1ca17 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -247,7 +247,7 @@ class AbstractPickleTests(unittest.TestCase):
def test_long(self):
for proto in protocols:
- # 256 bytes is where LONG4 begins
+ # 256 bytes is where LONG4 begins.
for nbits in 1, 8, 8*254, 8*255, 8*256, 8*257:
nbase = 1L << nbits
for npos in nbase-1, nbase, nbase+1:
@@ -257,20 +257,11 @@ class AbstractPickleTests(unittest.TestCase):
self.assertEqual(n, got)
# Try a monster. This is quadratic-time in protos 0 & 1, so don't
# bother with those.
- # XXX Damn. pickle.py is still quadratic-time here, due to
- # XXX long(string, 16). cPickle runs this in an eyeblink, but I
- # XXX gave up waiting for pickle.py to get beyond "loading". Giving
- # XXX up for now.
- return
- print "building long"
nbase = long("deadbeeffeedface", 16)
nbase += nbase << 1000000
for n in nbase, -nbase:
- print "dumping"
p = self.dumps(n, 2)
- print "loading"
got = self.loads(p)
- print "checking"
self.assertEqual(n, got)
def test_reduce(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index 5d3f57c..e9f105b 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 2.3 alpha 2?
Core and builtins
-----------------
+- long(string, base) takes time linear in len(string) when base is a power
+ of 2 now. It used to take time quadratic in len(string).
+
- filter returns now Unicode results for Unicode arguments.
- raw_input can now return Unicode objects.
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 9d7243f..7ca8244 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1090,6 +1090,95 @@ long_format(PyObject *aa, int base, int addL)
return (PyObject *)str;
}
+/* *str points to the first digit in a string of base base digits. base
+ * is a power of 2 (2, 4, 8, 16, or 32). *str is set to point to the first
+ * non-digit (which may be *str!). A normalized long is returned.
+ * The point to this routine is that it takes time linear in the number of
+ * string characters.
+ */
+static PyLongObject *
+long_from_binary_base(char **str, int base)
+{
+ char *p = *str;
+ char *start = p;
+ int bits_per_char;
+ int n;
+ PyLongObject *z;
+ twodigits accum;
+ int bits_in_accum;
+ digit *pdigit;
+
+ assert(base >= 2 && base <= 32 && (base & (base - 1)) == 0);
+ n = base;
+ for (bits_per_char = -1; n; ++bits_per_char)
+ n >>= 1;
+ /* n <- total # of bits needed, while setting p to end-of-string */
+ n = 0;
+ for (;;) {
+ int k = -1;
+ char ch = *p;
+
+ if (ch <= '9')
+ k = ch - '0';
+ else if (ch >= 'a')
+ k = ch - 'a' + 10;
+ else if (ch >= 'A')
+ k = ch - 'A' + 10;
+ if (k < 0 || k >= base)
+ break;
+ n += bits_per_char;
+ if (n < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "long string too large to convert");
+ return NULL;
+ }
+ ++p;
+ }
+ *str = p;
+ /* n <- # of Python digits needed, = ceiling(n/SHIFT). */
+ n = (n + SHIFT - 1) / SHIFT;
+ z = _PyLong_New(n);
+ if (z == NULL)
+ return NULL;
+ /* Read string from right, and fill in long from left; i.e.,
+ * from least to most significant in both.
+ */
+ accum = 0;
+ bits_in_accum = 0;
+ pdigit = z->ob_digit;
+ while (--p >= start) {
+ unsigned char ch = (unsigned char)*p;
+ digit k;
+
+ if (ch <= '9')
+ k = ch - '0';
+ else if (ch >= 'a')
+ k = ch - 'a' + 10;
+ else {
+ assert(ch >= 'A');
+ k = ch - 'A' + 10;
+ }
+ assert(k >= 0 && k <= base);
+ accum |= k << bits_in_accum;
+ bits_in_accum += bits_per_char;
+ if (bits_in_accum >= SHIFT) {
+ *pdigit++ = (digit)(accum & MASK);
+ assert(pdigit - z->ob_digit <= n);
+ accum >>= SHIFT;
+ bits_in_accum -= SHIFT;
+ assert(bits_in_accum < SHIFT);
+ }
+ }
+ if (bits_in_accum) {
+ assert(bits_in_accum <= SHIFT);
+ *pdigit++ = (digit)accum;
+ assert(pdigit - z->ob_digit <= n);
+ }
+ while (pdigit - z->ob_digit < n)
+ *pdigit++ = 0;
+ return long_normalize(z);
+}
+
PyObject *
PyLong_FromString(char *str, char **pend, int base)
{
@@ -1122,23 +1211,27 @@ PyLong_FromString(char *str, char **pend, int base)
}
if (base == 16 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
str += 2;
- z = _PyLong_New(0);
start = str;
- for ( ; z != NULL; ++str) {
- int k = -1;
- PyLongObject *temp;
+ if ((base & (base - 1)) == 0)
+ z = long_from_binary_base(&str, base);
+ else {
+ z = _PyLong_New(0);
+ for ( ; z != NULL; ++str) {
+ int k = -1;
+ PyLongObject *temp;
- if (*str <= '9')
- k = *str - '0';
- else if (*str >= 'a')
- k = *str - 'a' + 10;
- else if (*str >= 'A')
- k = *str - 'A' + 10;
- if (k < 0 || k >= base)
- break;
- temp = muladd1(z, (digit)base, (digit)k);
- Py_DECREF(z);
- z = temp;
+ if (*str <= '9')
+ k = *str - '0';
+ else if (*str >= 'a')
+ k = *str - 'a' + 10;
+ else if (*str >= 'A')
+ k = *str - 'A' + 10;
+ if (k < 0 || k >= base)
+ break;
+ temp = muladd1(z, (digit)base, (digit)k);
+ Py_DECREF(z);
+ z = temp;
+ }
}
if (z == NULL)
return NULL;