From bd7926478de92a2a0ef4440e1a9ae61b706a80d2 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Wed, 18 Mar 2009 20:06:12 +0000 Subject: Issue #4258: Make it possible to use 30-bit digits for PyLongs: - new configure option --enable-big-digits - new structseq sys.int_info giving information about the internal format By default, 30-bit digits are enabled on 64-bit machines but disabled on 32-bit machines. --- Doc/library/sys.rst | 17 +++ Doc/whatsnew/3.1.rst | 23 +++ Include/longintrepr.h | 68 +++++++-- Include/longobject.h | 1 + Include/pyport.h | 51 +++++++ Lib/test/test_long.py | 31 +++- Lib/test/test_sys.py | 23 +-- Misc/NEWS | 7 + Objects/longobject.c | 62 +++++++- PC/pyconfig.h | 36 +++++ Python/marshal.c | 146 +++++++++++++----- Python/sysmodule.c | 3 + configure | 413 ++++++++++++++++++++++++++++++++++++++++++++++++++ configure.in | 23 +++ pyconfig.h.in | 29 ++++ 15 files changed, 865 insertions(+), 68 deletions(-) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index a00c516..091b4d6 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -413,6 +413,23 @@ always available. same information. +.. data:: int_info + + A struct sequence that holds information about Python's + internal representation of integers. The attributes are read only. + + +-------------------------+----------------------------------------------+ + | attribute | explanation | + +=========================+==============================================+ + | :const:`bits_per_digit` | number of bits held in each digit. Python | + | | integers are stored internally in base | + | | ``2**int_info.bits_per_digit`` | + +-------------------------+----------------------------------------------+ + | :const:`sizeof_digit` | size in bytes of the C type used to | + | | represent a digit | + +-------------------------+----------------------------------------------+ + + .. function:: intern(string) Enter *string* in the table of "interned" strings and return the interned string diff --git a/Doc/whatsnew/3.1.rst b/Doc/whatsnew/3.1.rst index 68a76f3..56e5869 100644 --- a/Doc/whatsnew/3.1.rst +++ b/Doc/whatsnew/3.1.rst @@ -87,5 +87,28 @@ Some smaller changes made to the core Python language are: (Contributed by Fredrik Johansson and Victor Stinner; :issue:`3439`.) +* Integers are now stored internally either in base 2**15 or in base + 2**30, the base being determined at build time. Previously, they + were always stored in base 2**15. Using base 2**30 gives + significant performance improvements on 64-bit machines, but + benchmark results on 32-bit machines have been mixed. Therefore, + the default is to use base 2**30 on 64-bit machines and base 2**15 + on 32-bit machines; on Unix, there's a new configure option + --enable-big-digits that can be used to override this default. + + Apart from the performance improvements this change should be + invisible to end users, with one exception: for testing and + debugging purposes there's a new structseq ``sys.int_info`` that + provides information about the internal format, giving the number of + bits per digit and the size in bytes of the C type used to store + each digit:: + + >>> import sys + >>> sys.int_info + sys.int_info(bits_per_digit=30, sizeof_digit=4) + + + (Contributed by Mark Dickinson; :issue:`4258`.) + .. ====================================================================== diff --git a/Include/longintrepr.h b/Include/longintrepr.h index 2dbb3f5..144d04b 100644 --- a/Include/longintrepr.h +++ b/Include/longintrepr.h @@ -7,24 +7,62 @@ extern "C" { /* This is published for the benefit of "friend" marshal.c only. */ -/* Parameters of the long integer representation. - These shouldn't have to be changed as C should guarantee that a short - contains at least 16 bits, but it's made changeable anyway. - Note: 'digit' should be able to hold 2*MASK+1, and 'twodigits' - should be able to hold the intermediate results in 'mul' - (at most (BASE-1)*(2*BASE+1) == MASK*(2*MASK+3)). - Also, x_sub assumes that 'digit' is an unsigned type, and overflow - is handled by taking the result mod 2**N for some N > SHIFT. - And, at some places it is assumed that MASK fits in an int, as well. - long_pow() requires that SHIFT be divisible by 5. */ +/* Parameters of the long integer representation. There are two different + sets of parameters: one set for 30-bit digits, stored in an unsigned 32-bit + integer type, and one set for 15-bit digits with each digit stored in an + unsigned short. The value of PYLONG_BITS_IN_DIGIT, defined either at + configure time or in pyport.h, is used to decide which digit size to use. -typedef unsigned short digit; -typedef short sdigit; /* signed variant of digit */ -#define BASE_TWODIGITS_TYPE long -typedef unsigned BASE_TWODIGITS_TYPE twodigits; -typedef BASE_TWODIGITS_TYPE stwodigits; /* signed variant of twodigits */ + Type 'digit' should be able to hold 2*PyLong_BASE-1, and type 'twodigits' + should be an unsigned integer type able to hold all integers up to + PyLong_BASE*PyLong_BASE-1. x_sub assumes that 'digit' is an unsigned type, + and that overflow is handled by taking the result modulo 2**N for some N > + PyLong_SHIFT. The majority of the code doesn't care about the precise + value of PyLong_SHIFT, but there are some notable exceptions: + + - long_pow() requires that PyLong_SHIFT be divisible by 5 + + - PyLong_{As,From}ByteArray require that PyLong_SHIFT be at least 8 + + - long_hash() requires that PyLong_SHIFT is *strictly* less than the number + of bits in an unsigned long, as do the PyLong <-> long (or unsigned long) + conversion functions + + - the long <-> size_t/Py_ssize_t conversion functions expect that + PyLong_SHIFT is strictly less than the number of bits in a size_t + + - the marshal code currently expects that PyLong_SHIFT is a multiple of 15 + + - NSMALLNEGINTS and NSMALLPOSINTS should be small enough to fit in a single + digit; with the current values this forces PyLong_SHIFT >= 9 + The values 15 and 30 should fit all of the above requirements, on any + platform. +*/ + +#if HAVE_STDINT_H +#include +#endif + +#if PYLONG_BITS_IN_DIGIT == 30 +#if !(defined HAVE_UINT64_T && defined HAVE_UINT32_T && \ + defined HAVE_INT64_T && defined HAVE_INT32_T) +#error "30-bit long digits requested, but the necessary types are not available on this platform" +#endif +typedef PY_UINT32_T digit; +typedef PY_INT32_T sdigit; /* signed variant of digit */ +typedef PY_UINT64_T twodigits; +typedef PY_INT64_T stwodigits; /* signed variant of twodigits */ +#define PyLong_SHIFT 30 +#elif PYLONG_BITS_IN_DIGIT == 15 +typedef unsigned short digit; +typedef short sdigit; /* signed variant of digit */ +typedef unsigned long twodigits; +typedef long stwodigits; /* signed variant of twodigits */ #define PyLong_SHIFT 15 +#else +#error "PYLONG_BITS_IN_DIGIT should be 15 or 30" +#endif #define PyLong_BASE ((digit)1 << PyLong_SHIFT) #define PyLong_MASK ((digit)(PyLong_BASE - 1)) diff --git a/Include/longobject.h b/Include/longobject.h index 7adf9c7..28fb707 100644 --- a/Include/longobject.h +++ b/Include/longobject.h @@ -26,6 +26,7 @@ PyAPI_FUNC(Py_ssize_t) PyLong_AsSsize_t(PyObject *); PyAPI_FUNC(size_t) PyLong_AsSize_t(PyObject *); PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLong(PyObject *); PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLongMask(PyObject *); +PyAPI_FUNC(PyObject *) PyLong_GetInfo(void); /* It may be useful in the future. I've added it in the PyInt -> PyLong cleanup to keep the extra information. [CH] */ diff --git a/Include/pyport.h b/Include/pyport.h index 97cc68d..9449b5f 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -69,6 +69,57 @@ Used in: PY_LONG_LONG #endif #endif /* HAVE_LONG_LONG */ +/* a build with 30-bit digits for Python long integers needs an exact-width + * 32-bit unsigned integer type to store those digits. (We could just use + * type 'unsigned long', but that would be wasteful on a system where longs + * are 64-bits.) On Unix systems, the autoconf macro AC_TYPE_UINT32_T defines + * uint32_t to be such a type unless stdint.h or inttypes.h defines uint32_t. + * However, it doesn't set HAVE_UINT32_T, so we do that here. + */ +#if (defined UINT32_MAX || defined uint32_t) +#ifndef PY_UINT32_T +#define HAVE_UINT32_T 1 +#define PY_UINT32_T uint32_t +#endif +#endif + +/* Macros for a 64-bit unsigned integer type; used for type 'twodigits' in the + * long integer implementation, when 30-bit digits are enabled. + */ +#if (defined UINT64_MAX || defined uint64_t) +#ifndef PY_UINT64_T +#define HAVE_UINT64_T 1 +#define PY_UINT64_T uint64_t +#endif +#endif + +/* Signed variants of the above */ +#if (defined INT32_MAX || defined int32_t) +#ifndef PY_INT32_T +#define HAVE_INT32_T 1 +#define PY_INT32_T int32_t +#endif +#endif +#if (defined INT64_MAX || defined int64_t) +#ifndef PY_INT64_T +#define HAVE_INT64_T 1 +#define PY_INT64_T int64_t +#endif +#endif + +/* If PYLONG_BITS_IN_DIGIT is not defined then we'll use 30-bit digits if all + the necessary integer types are available, and we're on a 64-bit platform + (as determined by SIZEOF_VOID_P); otherwise we use 15-bit digits. */ + +#ifndef PYLONG_BITS_IN_DIGIT +#if (defined HAVE_UINT64_T && defined HAVE_INT64_T && \ + defined HAVE_UINT32_T && defined HAVE_INT32_T && SIZEOF_VOID_P >= 8) +#define PYLONG_BITS_IN_DIGIT 30 +#else +#define PYLONG_BITS_IN_DIGIT 15 +#endif +#endif + /* uintptr_t is the C9X name for an unsigned integral type such that a * legitimate void* can be cast to uintptr_t and then back to void* again * without loss of information. Similarly for intptr_t, wrt a signed diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 0e07090..92285b2 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -15,7 +15,7 @@ class Frm(object): return self.format % self.args # SHIFT should match the value in longintrepr.h for best testing. -SHIFT = 15 +SHIFT = sys.int_info.bits_per_digit BASE = 2 ** SHIFT MASK = BASE - 1 KARATSUBA_CUTOFF = 70 # from longobject.c @@ -120,6 +120,35 @@ class LongTest(unittest.TestCase): y = self.getran(leny) or 1 self.check_division(x, y) + # specific numbers chosen to exercise corner cases of the + # current long division implementation + + # 30-bit cases involving a quotient digit estimate of BASE+1 + self.check_division(1231948412290879395966702881, + 1147341367131428698) + self.check_division(815427756481275430342312021515587883, + 707270836069027745) + self.check_division(627976073697012820849443363563599041, + 643588798496057020) + self.check_division(1115141373653752303710932756325578065, + 1038556335171453937726882627) + # 30-bit cases that require the post-subtraction correction step + self.check_division(922498905405436751940989320930368494, + 949985870686786135626943396) + self.check_division(768235853328091167204009652174031844, + 1091555541180371554426545266) + + # 15-bit cases involving a quotient digit estimate of BASE+1 + self.check_division(20172188947443, 615611397) + self.check_division(1020908530270155025, 950795710) + self.check_division(128589565723112408, 736393718) + self.check_division(609919780285761575, 18613274546784) + # 15-bit cases that require the post-subtraction correction step + self.check_division(710031681576388032, 26769404391308) + self.check_division(1933622614268221, 30212853348836) + + + def test_karatsuba(self): digits = list(range(1, 5)) + list(range(KARATSUBA_CUTOFF, KARATSUBA_CUTOFF + 10)) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 9f0c139..427b721 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -333,6 +333,9 @@ class SysModuleTest(unittest.TestCase): self.assert_(isinstance(sys.executable, str)) self.assertEqual(len(sys.float_info), 11) self.assertEqual(sys.float_info.radix, 2) + self.assertEqual(len(sys.int_info), 2) + self.assert_(sys.int_info.bits_per_digit % 5 == 0) + self.assert_(sys.int_info.sizeof_digit >= 1) self.assert_(isinstance(sys.hexversion, int)) self.assert_(isinstance(sys.maxsize, int)) self.assert_(isinstance(sys.maxunicode, int)) @@ -437,6 +440,7 @@ class SizeofTest(unittest.TestCase): if hasattr(sys, "gettotalrefcount"): self.header += '2P' self.vheader += '2P' + self.longdigit = sys.int_info.sizeof_digit import _testcapi self.gc_headsize = _testcapi.SIZEOF_PYGC_HEAD self.file = open(test.support.TESTFN, 'wb') @@ -471,7 +475,7 @@ class SizeofTest(unittest.TestCase): size = self.calcsize gc_header_size = self.gc_headsize # bool objects are not gc tracked - self.assertEqual(sys.getsizeof(True), size(vh) + self.H) + self.assertEqual(sys.getsizeof(True), size(vh) + self.longdigit) # but lists are self.assertEqual(sys.getsizeof([]), size(vh + 'PP') + gc_header_size) @@ -479,8 +483,8 @@ class SizeofTest(unittest.TestCase): h = self.header vh = self.vheader size = self.calcsize - self.assertEqual(sys.getsizeof(True), size(vh) + self.H) - self.assertEqual(sys.getsizeof(True, -1), size(vh) + self.H) + self.assertEqual(sys.getsizeof(True), size(vh) + self.longdigit) + self.assertEqual(sys.getsizeof(True, -1), size(vh) + self.longdigit) def test_objecttypes(self): # check all types defined in Objects/ @@ -489,7 +493,7 @@ class SizeofTest(unittest.TestCase): size = self.calcsize check = self.check_sizeof # bool - check(True, size(vh) + self.H) + check(True, size(vh) + self.longdigit) # buffer # XXX # builtin_function_or_method @@ -607,11 +611,12 @@ class SizeofTest(unittest.TestCase): check(reversed([]), size(h + 'lP')) # long check(0, size(vh)) - check(1, size(vh) + self.H) - check(-1, size(vh) + self.H) - check(32768, size(vh) + 2*self.H) - check(32768*32768-1, size(vh) + 2*self.H) - check(32768*32768, size(vh) + 3*self.H) + check(1, size(vh) + self.longdigit) + check(-1, size(vh) + self.longdigit) + PyLong_BASE = 2**sys.int_info.bits_per_digit + check(PyLong_BASE, size(vh) + 2*self.longdigit) + check(PyLong_BASE**2-1, size(vh) + 2*self.longdigit) + check(PyLong_BASE**2, size(vh) + 3*self.longdigit) # memory check(memoryview(b''), size(h + 'P PP2P2i7P')) # module diff --git a/Misc/NEWS b/Misc/NEWS index 90b6091..3e890f3 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,13 @@ What's New in Python 3.1 alpha 2? Core and Builtins ----------------- +- Issue #4258: Make it possible to use base 2**30 instead of base + 2**15 for the internal representation of integers, for performance + reasons. Base 2**30 is enabled by default on 64-bit machines. Add + --enable-big-digits option to configure, which overrides the + default. Add sys.int_info structseq to provide information about + the internal format. + - Issue #4474: PyUnicode_FromWideChar now converts characters outside the BMP to surrogate pairs, on systems with sizeof(wchar_t) == 4 and sizeof(Py_UNICODE) == 2. diff --git a/Objects/longobject.c b/Objects/longobject.c index b758c40..e1df9d9 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -4,6 +4,7 @@ #include "Python.h" #include "longintrepr.h" +#include "structseq.h" #include #include @@ -204,6 +205,7 @@ PyLong_FromLong(long ival) return (PyObject*)v; } +#if PyLONG_SHIFT==15 /* 2 digits */ if (!(abs_ival >> 2*PyLong_SHIFT)) { v = _PyLong_New(2); @@ -216,6 +218,7 @@ PyLong_FromLong(long ival) } return (PyObject*)v; } +#endif /* Larger numbers: loop to determine number of digits */ t = abs_ival; @@ -2864,10 +2867,20 @@ long_mul(PyLongObject *a, PyLongObject *b) CHECK_BINOP(a, b); + /* fast path for single-digit multiplication */ if (ABS(Py_SIZE(a)) <= 1 && ABS(Py_SIZE(b)) <= 1) { - PyObject *r; - r = PyLong_FromLong(MEDIUM_VALUE(a)*MEDIUM_VALUE(b)); - return r; + stwodigits v = (stwodigits)(MEDIUM_VALUE(a)) * MEDIUM_VALUE(b); +#ifdef HAVE_LONG_LONG + return PyLong_FromLongLong((PY_LONG_LONG)v); +#else + /* if we don't have long long then we're almost certainly + using 15-bit digits, so v will fit in a long. In the + unlikely event that we're using 30-bit digits on a platform + without long long, a large v will just cause us to fall + through to the general multiplication code below. */ + if (v >= LONG_MIN && v <= LONG_MAX) + return PyLong_FromLong((long)v); +#endif } z = k_mul(a, b); @@ -3991,6 +4004,45 @@ PyTypeObject PyLong_Type = { PyObject_Del, /* tp_free */ }; +static PyTypeObject Int_InfoType; + +PyDoc_STRVAR(int_info__doc__, +"sys.int_info\n\ +\n\ +A struct sequence that holds information about Python's\n\ +internal representation of integers. The attributes are read only."); + +static PyStructSequence_Field int_info_fields[] = { + {"bits_per_digit", "size of a digit in bits"}, + {"sizeof_digit", "size in bytes of the C type used to " + "represent a digit"}, + {NULL, NULL} +}; + +static PyStructSequence_Desc int_info_desc = { + "sys.int_info", /* name */ + int_info__doc__, /* doc */ + int_info_fields, /* fields */ + 2 /* number of fields */ +}; + +PyObject * +PyLong_GetInfo(void) +{ + PyObject* int_info; + int field = 0; + int_info = PyStructSequence_New(&Int_InfoType); + if (int_info == NULL) + return NULL; + PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(PyLong_SHIFT)); + PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(sizeof(digit))); + if (PyErr_Occurred()) { + Py_CLEAR(int_info); + return NULL; + } + return int_info; +} + int _PyLong_Init(void) { @@ -4023,6 +4075,10 @@ _PyLong_Init(void) v->ob_digit[0] = abs(ival); } #endif + /* initialize int_info */ + if (Int_InfoType.tp_name == 0) + PyStructSequence_InitType(&Int_InfoType, &int_info_desc); + return 1; } diff --git a/PC/pyconfig.h b/PC/pyconfig.h index d110476..04abc1f 100644 --- a/PC/pyconfig.h +++ b/PC/pyconfig.h @@ -396,6 +396,42 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */ #endif +/* define signed and unsigned exact-width 32-bit and 64-bit types, used in the + implementation of Python long integers. */ +#ifndef PY_UINT32_T +#if SIZEOF_INT == 4 +#define HAVE_UINT32_T 1 +#define PY_UINT32_T unsigned int +#elif SIZEOF_LONG == 4 +#define HAVE_UINT32_T 1 +#define PY_UINT32_T unsigned long +#endif +#endif + +#ifndef PY_UINT64_T +#if SIZEOF_LONG_LONG == 8 +#define HAVE_UINT64_T 1 +#define PY_UINT64_T unsigned PY_LONG_LONG +#endif +#endif + +#ifndef PY_INT32_T +#if SIZEOF_INT == 4 +#define HAVE_INT32_T 1 +#define PY_INT32_T int +#elif SIZEOF_LONG == 4 +#define HAVE_INT32_T 1 +#define PY_INT32_T long +#endif +#endif + +#ifndef PY_INT64_T +#if SIZEOF_LONG_LONG == 8 +#define HAVE_INT64_T 1 +#define PY_INT64_T PY_LONG_LONG +#endif +#endif + /* Fairly standard from here! */ /* Define to 1 if you have the `copysign' function. */ diff --git a/Python/marshal.c b/Python/marshal.c index 8fe34d4..e5e5ce4 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -11,6 +11,8 @@ #include "code.h" #include "marshal.h" +#define ABS(x) ((x) < 0 ? -(x) : (x)) + /* High water mark to determine when the marshalled object is dangerously deep * and risks coring the interpreter. When the object stack gets this deep, * raise an exception instead of continuing. @@ -122,6 +124,56 @@ w_long64(long x, WFILE *p) } #endif +/* We assume that Python longs are stored internally in base some power of + 2**15; for the sake of portability we'll always read and write them in base + exactly 2**15. */ + +#define PyLong_MARSHAL_SHIFT 15 +#define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT) +#define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1) +#if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0 +#error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT" +#endif +#define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT) + +static void +w_PyLong(const PyLongObject *ob, WFILE *p) +{ + Py_ssize_t i, j, n, l; + digit d; + + w_byte(TYPE_LONG, p); + if (Py_SIZE(ob) == 0) { + w_long((long)0, p); + return; + } + + /* set l to number of base PyLong_MARSHAL_BASE digits */ + n = ABS(Py_SIZE(ob)); + l = (n-1) * PyLong_MARSHAL_RATIO; + d = ob->ob_digit[n-1]; + assert(d != 0); /* a PyLong is always normalized */ + do { + d >>= PyLong_MARSHAL_SHIFT; + l++; + } while (d != 0); + w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p); + + for (i=0; i < n-1; i++) { + d = ob->ob_digit[i]; + for (j=0; j < PyLong_MARSHAL_RATIO; j++) { + w_short(d & PyLong_MARSHAL_MASK, p); + d >>= PyLong_MARSHAL_SHIFT; + } + assert (d == 0); + } + d = ob->ob_digit[n-1]; + do { + w_short(d & PyLong_MARSHAL_MASK, p); + d >>= PyLong_MARSHAL_SHIFT; + } while (d != 0); +} + static void w_object(PyObject *v, WFILE *p) { @@ -155,14 +207,8 @@ w_object(PyObject *v, WFILE *p) if ((x == -1) && PyErr_Occurred()) { PyLongObject *ob = (PyLongObject *)v; PyErr_Clear(); - w_byte(TYPE_LONG, p); - n = Py_SIZE(ob); - w_long((long)n, p); - if (n < 0) - n = -n; - for (i = 0; i < n; i++) - w_short(ob->ob_digit[i], p); - } + w_PyLong(ob, p); + } else { #if SIZEOF_LONG > 4 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31); @@ -481,6 +527,56 @@ r_long64(RFILE *p) } static PyObject * +r_PyLong(RFILE *p) +{ + PyLongObject *ob; + int size, i, j, md; + long n; + digit d; + + n = r_long(p); + if (n == 0) + return (PyObject *)_PyLong_New(0); + if (n < -INT_MAX || n > INT_MAX) { + PyErr_SetString(PyExc_ValueError, + "bad marshal data (long size out of range)"); + return NULL; + } + + size = 1 + (ABS(n)-1) / PyLong_MARSHAL_RATIO; + ob = _PyLong_New(size); + if (ob == NULL) + return NULL; + Py_SIZE(ob) = n > 0 ? size : -size; + + for (i = 0; i < size-1; i++) { + d = 0; + for (j=0; j < PyLong_MARSHAL_RATIO; j++) { + md = r_short(p); + if (md < 0 || md > PyLong_MARSHAL_BASE) + goto bad_digit; + d += (digit)md << j*PyLong_MARSHAL_SHIFT; + } + ob->ob_digit[i] = d; + } + d = 0; + for (j=0; j < (ABS(n)-1)%PyLong_MARSHAL_RATIO + 1; j++) { + md = r_short(p); + if (md < 0 || md > PyLong_MARSHAL_BASE) + goto bad_digit; + d += (digit)md << j*PyLong_MARSHAL_SHIFT; + } + ob->ob_digit[size-1] = d; + return (PyObject *)ob; + bad_digit: + Py_DECREF(ob); + PyErr_SetString(PyExc_ValueError, + "bad marshal data (digit out of range in long)"); + return NULL; +} + + +static PyObject * r_object(RFILE *p) { /* NULL is a valid return value, it does not necessarily means that @@ -544,38 +640,8 @@ r_object(RFILE *p) break; case TYPE_LONG: - { - int size; - PyLongObject *ob; - n = r_long(p); - if (n < -INT_MAX || n > INT_MAX) { - PyErr_SetString(PyExc_ValueError, - "bad marshal data (long size out of range)"); - retval = NULL; - break; - } - size = n<0 ? -n : n; - ob = _PyLong_New(size); - if (ob == NULL) { - retval = NULL; - break; - } - Py_SIZE(ob) = n; - for (i = 0; i < size; i++) { - int digit = r_short(p); - if (digit < 0) { - Py_DECREF(ob); - PyErr_SetString(PyExc_ValueError, - "bad marshal data (negative digit in long)"); - ob = NULL; - break; - } - if (ob != NULL) - ob->ob_digit[i] = digit; - } - retval = (PyObject *)ob; - break; - } + retval = r_PyLong(p); + break; case TYPE_FLOAT: { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index b47a25b..443d8eb 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1012,6 +1012,7 @@ PyDoc_STR( Static objects:\n\ \n\ float_info -- a dict with information about the float implementation.\n\ +int_info -- a struct sequence with information about the int implementation.\n\ maxsize -- the largest supported length of containers.\n\ maxunicode -- the largest supported character\n\ builtin_module_names -- tuple of module names built into this interpreter\n\ @@ -1375,6 +1376,8 @@ _PySys_Init(void) PyLong_FromSsize_t(PY_SSIZE_T_MAX)); SET_SYS_FROM_STRING("float_info", PyFloat_GetInfo()); + SET_SYS_FROM_STRING("int_info", + PyLong_GetInfo()); SET_SYS_FROM_STRING("maxunicode", PyLong_FromLong(PyUnicode_GetMax())); SET_SYS_FROM_STRING("builtin_module_names", diff --git a/configure b/configure index c83ae88..85db6d6 100755 --- a/configure +++ b/configure @@ -1320,6 +1320,8 @@ Optional Features: --enable-profiling enable C-level code profiling --enable-ipv6 Enable ipv6 (with ipv4) support --disable-ipv6 Disable ipv6 support + --enable-big-digits[=BITS] + use big digits for Python longs [BITS=30] Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -6965,6 +6967,386 @@ _ACEOF fi + + { echo "$as_me:$LINENO: checking for uint32_t" >&5 +echo $ECHO_N "checking for uint32_t... $ECHO_C" >&6; } +if test "${ac_cv_c_uint32_t+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_c_uint32_t=no + for ac_type in 'uint32_t' 'unsigned int' 'unsigned long int' \ + 'unsigned long long int' 'unsigned short int' 'unsigned char'; do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(($ac_type) -1 >> (32 - 1) == 1)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + case $ac_type in + uint32_t) ac_cv_c_uint32_t=yes ;; + *) ac_cv_c_uint32_t=$ac_type ;; +esac + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + test "$ac_cv_c_uint32_t" != no && break + done +fi +{ echo "$as_me:$LINENO: result: $ac_cv_c_uint32_t" >&5 +echo "${ECHO_T}$ac_cv_c_uint32_t" >&6; } + case $ac_cv_c_uint32_t in #( + no|yes) ;; #( + *) + +cat >>confdefs.h <<\_ACEOF +#define _UINT32_T 1 +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define uint32_t $ac_cv_c_uint32_t +_ACEOF +;; + esac + + + { echo "$as_me:$LINENO: checking for uint64_t" >&5 +echo $ECHO_N "checking for uint64_t... $ECHO_C" >&6; } +if test "${ac_cv_c_uint64_t+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_c_uint64_t=no + for ac_type in 'uint64_t' 'unsigned int' 'unsigned long int' \ + 'unsigned long long int' 'unsigned short int' 'unsigned char'; do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(($ac_type) -1 >> (64 - 1) == 1)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + case $ac_type in + uint64_t) ac_cv_c_uint64_t=yes ;; + *) ac_cv_c_uint64_t=$ac_type ;; +esac + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + test "$ac_cv_c_uint64_t" != no && break + done +fi +{ echo "$as_me:$LINENO: result: $ac_cv_c_uint64_t" >&5 +echo "${ECHO_T}$ac_cv_c_uint64_t" >&6; } + case $ac_cv_c_uint64_t in #( + no|yes) ;; #( + *) + +cat >>confdefs.h <<\_ACEOF +#define _UINT64_T 1 +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define uint64_t $ac_cv_c_uint64_t +_ACEOF +;; + esac + + + { echo "$as_me:$LINENO: checking for int32_t" >&5 +echo $ECHO_N "checking for int32_t... $ECHO_C" >&6; } +if test "${ac_cv_c_int32_t+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_c_int32_t=no + for ac_type in 'int32_t' 'int' 'long int' \ + 'long long int' 'short int' 'signed char'; do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(0 < ($ac_type) (((($ac_type) 1 << (32 - 2)) - 1) * 2 + 1))]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(($ac_type) (((($ac_type) 1 << (32 - 2)) - 1) * 2 + 1) + < ($ac_type) (((($ac_type) 1 << (32 - 2)) - 1) * 2 + 2))]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + case $ac_type in + int32_t) ac_cv_c_int32_t=yes ;; + *) ac_cv_c_int32_t=$ac_type ;; +esac + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + test "$ac_cv_c_int32_t" != no && break + done +fi +{ echo "$as_me:$LINENO: result: $ac_cv_c_int32_t" >&5 +echo "${ECHO_T}$ac_cv_c_int32_t" >&6; } + case $ac_cv_c_int32_t in #( + no|yes) ;; #( + *) + +cat >>confdefs.h <<_ACEOF +#define int32_t $ac_cv_c_int32_t +_ACEOF +;; + esac + + + { echo "$as_me:$LINENO: checking for int64_t" >&5 +echo $ECHO_N "checking for int64_t... $ECHO_C" >&6; } +if test "${ac_cv_c_int64_t+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_c_int64_t=no + for ac_type in 'int64_t' 'int' 'long int' \ + 'long long int' 'short int' 'signed char'; do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(0 < ($ac_type) (((($ac_type) 1 << (64 - 2)) - 1) * 2 + 1))]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !(($ac_type) (((($ac_type) 1 << (64 - 2)) - 1) * 2 + 1) + < ($ac_type) (((($ac_type) 1 << (64 - 2)) - 1) * 2 + 2))]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + case $ac_type in + int64_t) ac_cv_c_int64_t=yes ;; + *) ac_cv_c_int64_t=$ac_type ;; +esac + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + test "$ac_cv_c_int64_t" != no && break + done +fi +{ echo "$as_me:$LINENO: result: $ac_cv_c_int64_t" >&5 +echo "${ECHO_T}$ac_cv_c_int64_t" >&6; } + case $ac_cv_c_int64_t in #( + no|yes) ;; #( + *) + +cat >>confdefs.h <<_ACEOF +#define int64_t $ac_cv_c_int64_t +_ACEOF +;; + esac + { echo "$as_me:$LINENO: checking for ssize_t" >&5 echo $ECHO_N "checking for ssize_t... $ECHO_C" >&6; } if test "${ac_cv_type_ssize_t+set}" = set; then @@ -21687,6 +22069,37 @@ fi LIBS=$LIBS_SAVE +# determine what size digit to use for Python's longs +{ echo "$as_me:$LINENO: checking digit size for Python's longs" >&5 +echo $ECHO_N "checking digit size for Python's longs... $ECHO_C" >&6; } +# Check whether --enable-big-digits was given. +if test "${enable_big_digits+set}" = set; then + enableval=$enable_big_digits; case $enable_big_digits in +yes) + enable_big_digits=30 ;; +no) + enable_big_digits=15 ;; +15|30) + ;; +*) + { { echo "$as_me:$LINENO: error: bad value $enable_big_digits for --enable-big-digits; value should be 15 or 30" >&5 +echo "$as_me: error: bad value $enable_big_digits for --enable-big-digits; value should be 15 or 30" >&2;} + { (exit 1); exit 1; }; } ;; +esac +{ echo "$as_me:$LINENO: result: $enable_big_digits" >&5 +echo "${ECHO_T}$enable_big_digits" >&6; } + +cat >>confdefs.h <<_ACEOF +#define PYLONG_BITS_IN_DIGIT $enable_big_digits +_ACEOF + + +else + { echo "$as_me:$LINENO: result: no value specified" >&5 +echo "${ECHO_T}no value specified" >&6; } +fi + + # check for wchar.h if test "${ac_cv_header_wchar_h+set}" = set; then { echo "$as_me:$LINENO: checking for wchar.h" >&5 diff --git a/configure.in b/configure.in index a29a4c5..f57c8ae 100644 --- a/configure.in +++ b/configure.in @@ -1288,6 +1288,10 @@ AC_TYPE_PID_T AC_TYPE_SIGNAL AC_TYPE_SIZE_T AC_TYPE_UID_T +AC_TYPE_UINT32_T +AC_TYPE_UINT64_T +AC_TYPE_INT32_T +AC_TYPE_INT64_T AC_CHECK_TYPE(ssize_t, AC_DEFINE(HAVE_SSIZE_T, 1, Define if your compiler provides ssize_t),,) @@ -3125,6 +3129,25 @@ AC_CHECK_DECLS([isinf, isnan, isfinite], [], [], [[#include ]]) LIBS=$LIBS_SAVE +# determine what size digit to use for Python's longs +AC_MSG_CHECKING([digit size for Python's longs]) +AC_ARG_ENABLE(big-digits, +AC_HELP_STRING([--enable-big-digits@<:@=BITS@:>@],[use big digits for Python longs [[BITS=30]]]), +[case $enable_big_digits in +yes) + enable_big_digits=30 ;; +no) + enable_big_digits=15 ;; +[15|30]) + ;; +*) + AC_MSG_ERROR([bad value $enable_big_digits for --enable-big-digits; value should be 15 or 30]) ;; +esac +AC_MSG_RESULT($enable_big_digits) +AC_DEFINE_UNQUOTED(PYLONG_BITS_IN_DIGIT, $enable_big_digits, [Define as the preferred size in bits of long digits]) +], +[AC_MSG_RESULT(no value specified)]) + # check for wchar.h AC_CHECK_HEADER(wchar.h, [ AC_DEFINE(HAVE_WCHAR_H, 1, diff --git a/pyconfig.h.in b/pyconfig.h.in index adc422f..db7f9dd 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -855,6 +855,9 @@ /* Defined if PTHREAD_SCOPE_SYSTEM supported. */ #undef PTHREAD_SYSTEM_SCHED_SUPPORTED +/* Define as the preferred size in bits of long digits */ +#undef PYLONG_BITS_IN_DIGIT + /* Define to printf format modifier for Py_ssize_t */ #undef PY_FORMAT_SIZE_T @@ -1036,6 +1039,16 @@ /* Define to force use of thread-safe errno, h_errno, and other functions */ #undef _REENTRANT +/* Define for Solaris 2.5.1 so the uint32_t typedef from , + , or is not used. If the typedef was allowed, the + #define below would cause a syntax error. */ +#undef _UINT32_T + +/* Define for Solaris 2.5.1 so the uint64_t typedef from , + , or is not used. If the typedef was allowed, the + #define below would cause a syntax error. */ +#undef _UINT64_T + /* Define to the level of X/Open that your system supports */ #undef _XOPEN_SOURCE @@ -1062,6 +1075,14 @@ /* Define to `int' if doesn't define. */ #undef gid_t +/* Define to the type of a signed integer type of width exactly 32 bits if + such a type exists and the standard includes do not define it. */ +#undef int32_t + +/* Define to the type of a signed integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +#undef int64_t + /* Define to `int' if does not define. */ #undef mode_t @@ -1083,6 +1104,14 @@ /* Define to `int' if doesn't define. */ #undef uid_t +/* Define to the type of an unsigned integer type of width exactly 32 bits if + such a type exists and the standard includes do not define it. */ +#undef uint32_t + +/* Define to the type of an unsigned integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +#undef uint64_t + /* Define to empty if the keyword does not work. */ #undef volatile -- cgit v0.12