summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Dickinson <dickinsm@gmail.com>2009-09-16 22:10:56 (GMT)
committerMark Dickinson <dickinsm@gmail.com>2009-09-16 22:10:56 (GMT)
commitaa2adc828a0583ee472d42a3b6a8964c822c7ee2 (patch)
tree48a90588c2477d91722480efaae8325b4c5919c7
parent2920adb4df51e7c3c10f9671087037888f010483 (diff)
downloadcpython-aa2adc828a0583ee472d42a3b6a8964c822c7ee2.zip
cpython-aa2adc828a0583ee472d42a3b6a8964c822c7ee2.tar.gz
cpython-aa2adc828a0583ee472d42a3b6a8964c822c7ee2.tar.bz2
Issue #6713: Improve performance of str(n) and repr(n) for integers n
(up to 3.1 times faster in tests), by special-casing base 10 in _PyLong_Format. (Backport of r74851 from py3k.)
-rw-r--r--Include/longintrepr.h4
-rw-r--r--Misc/NEWS2
-rw-r--r--Objects/longobject.c119
3 files changed, 125 insertions, 0 deletions
diff --git a/Include/longintrepr.h b/Include/longintrepr.h
index d4fcc7e..6425c30 100644
--- a/Include/longintrepr.h
+++ b/Include/longintrepr.h
@@ -47,12 +47,16 @@ typedef PY_INT32_T sdigit; /* signed variant of digit */
typedef PY_UINT64_T twodigits;
typedef PY_INT64_T stwodigits; /* signed variant of twodigits */
#define PyLong_SHIFT 30
+#define _PyLong_DECIMAL_SHIFT 9 /* max(e such that 10**e fits in a digit) */
+#define _PyLong_DECIMAL_BASE ((digit)1000000000) /* 10 ** DECIMAL_SHIFT */
#elif PYLONG_BITS_IN_DIGIT == 15
typedef unsigned short digit;
typedef short sdigit; /* signed variant of digit */
typedef unsigned long twodigits;
typedef long stwodigits; /* signed variant of twodigits */
#define PyLong_SHIFT 15
+#define _PyLong_DECIMAL_SHIFT 4 /* max(e such that 10**e fits in a digit) */
+#define _PyLong_DECIMAL_BASE ((digit)10000) /* 10 ** DECIMAL_SHIFT */
#else
#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"
#endif
diff --git a/Misc/NEWS b/Misc/NEWS
index f4b076e..7796957 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,8 @@ What's New in Python 2.7 alpha 1
Core and Builtins
-----------------
+- Issue #6713: Improve performance of integer -> string conversions.
+
- Issue #1590864: Fix potential deadlock when mixing threads and fork().
- Issue #6844: Do not emit DeprecationWarnings when accessing a "message"
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 5e85e05..23d2f13 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1361,6 +1361,122 @@ divrem1(PyLongObject *a, digit n, digit *prem)
return long_normalize(z);
}
+/* Convert a long integer to a base 10 string. Returns a new non-shared
+ string. (Return value is non-shared so that callers can modify the
+ returned value if necessary.) */
+
+static PyObject *
+long_to_decimal_string(PyObject *aa, int addL)
+{
+ PyLongObject *scratch, *a;
+ PyObject *str;
+ Py_ssize_t size, strlen, size_a, i, j;
+ digit *pout, *pin, rem, tenpow;
+ char *p;
+ int negative;
+
+ a = (PyLongObject *)aa;
+ if (a == NULL || !PyLong_Check(a)) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+ size_a = ABS(Py_SIZE(a));
+ negative = Py_SIZE(a) < 0;
+
+ /* quick and dirty upper bound for the number of digits
+ required to express a in base _PyLong_DECIMAL_BASE:
+
+ #digits = 1 + floor(log2(a) / log2(_PyLong_DECIMAL_BASE))
+
+ But log2(a) < size_a * PyLong_SHIFT, and
+ log2(_PyLong_DECIMAL_BASE) = log2(10) * _PyLong_DECIMAL_SHIFT
+ > 3 * _PyLong_DECIMAL_SHIFT
+ */
+ if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) {
+ PyErr_SetString(PyExc_OverflowError,
+ "long is too large to format");
+ return NULL;
+ }
+ /* the expression size_a * PyLong_SHIFT is now safe from overflow */
+ size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT);
+ scratch = _PyLong_New(size);
+ if (scratch == NULL)
+ return NULL;
+
+ /* convert array of base _PyLong_BASE digits in pin to an array of
+ base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP,
+ Volume 2 (3rd edn), section 4.4, Method 1b). */
+ pin = a->ob_digit;
+ pout = scratch->ob_digit;
+ size = 0;
+ for (i = size_a; --i >= 0; ) {
+ digit hi = pin[i];
+ for (j = 0; j < size; j++) {
+ twodigits z = (twodigits)pout[j] << PyLong_SHIFT | hi;
+ hi = z / _PyLong_DECIMAL_BASE;
+ pout[j] = z - (twodigits)hi * _PyLong_DECIMAL_BASE;
+ }
+ while (hi) {
+ pout[size++] = hi % _PyLong_DECIMAL_BASE;
+ hi /= _PyLong_DECIMAL_BASE;
+ }
+ /* check for keyboard interrupt */
+ SIGCHECK({
+ Py_DECREF(scratch);
+ return NULL;
+ })
+ }
+ /* pout should have at least one digit, so that the case when a = 0
+ works correctly */
+ if (size == 0)
+ pout[size++] = 0;
+
+ /* calculate exact length of output string, and allocate */
+ strlen = (addL != 0) + negative +
+ 1 + (size - 1) * _PyLong_DECIMAL_SHIFT;
+ tenpow = 10;
+ rem = pout[size-1];
+ while (rem >= tenpow) {
+ tenpow *= 10;
+ strlen++;
+ }
+ str = PyString_FromStringAndSize(NULL, strlen);
+ if (str == NULL) {
+ Py_DECREF(scratch);
+ return NULL;
+ }
+
+ /* fill the string right-to-left */
+ p = PyString_AS_STRING(str) + strlen;
+ *p = '\0';
+ if (addL)
+ *--p = 'L';
+ /* pout[0] through pout[size-2] contribute exactly
+ _PyLong_DECIMAL_SHIFT digits each */
+ for (i=0; i < size - 1; i++) {
+ rem = pout[i];
+ for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) {
+ *--p = '0' + rem % 10;
+ rem /= 10;
+ }
+ }
+ /* pout[size-1]: always produce at least one decimal digit */
+ rem = pout[i];
+ do {
+ *--p = '0' + rem % 10;
+ rem /= 10;
+ } while (rem != 0);
+
+ /* and sign */
+ if (negative)
+ *--p = '-';
+
+ /* check we've counted correctly */
+ assert(p == PyString_AS_STRING(str));
+ Py_DECREF(scratch);
+ return (PyObject *)str;
+}
+
/* Convert the long to a string object with given base,
appending a base prefix of 0[box] if base is 2, 8 or 16.
Add a trailing "L" if addL is non-zero.
@@ -1377,6 +1493,9 @@ _PyLong_Format(PyObject *aa, int base, int addL, int newstyle)
int bits;
char sign = '\0';
+ if (base == 10)
+ return long_to_decimal_string((PyObject *)a, addL);
+
if (a == NULL || !PyLong_Check(a)) {
PyErr_BadInternalCall();
return NULL;