diff options
-rw-r--r-- | Doc/lib/libstruct.tex | 17 | ||||
-rw-r--r-- | Lib/test/test_struct.py | 171 | ||||
-rw-r--r-- | Misc/NEWS | 9 | ||||
-rw-r--r-- | Modules/structmodule.c | 192 | ||||
-rw-r--r-- | Objects/longobject.c | 25 |
5 files changed, 337 insertions, 77 deletions
diff --git a/Doc/lib/libstruct.tex b/Doc/lib/libstruct.tex index 9a1942d..f8056a2 100644 --- a/Doc/lib/libstruct.tex +++ b/Doc/lib/libstruct.tex @@ -72,7 +72,8 @@ Notes: \item[(1)] The \character{q} and \character{Q} conversion codes are available in native mode only if the platform C compiler supports C \ctype{long long}, - or, on Windows, \ctype{__int64}. + or, on Windows, \ctype{__int64}. They're always available in standard + modes. \end{description} @@ -100,8 +101,8 @@ passed in to \function{pack()} is too long, the stored representation is truncated. If the string is too short, padding is used to ensure that exactly enough bytes are used to satisfy the count. -For the \character{I} and \character{L} format characters, the return -value is a Python long integer. +For the \character{I}, \character{L}, \character{q} and \character{Q} +format characters, the return value is a Python long integer. For the \character{P} format character, the return value is a Python integer or long integer, depending on the size needed to hold a @@ -139,10 +140,12 @@ Native size and alignment are determined using the C compiler's order. Standard size and alignment are as follows: no alignment is required -for any type (so you have to use pad bytes); \ctype{short} is 2 bytes; -\ctype{int} and \ctype{long} are 4 bytes. \ctype{float} and -\ctype{double} are 32-bit and 64-bit IEEE floating point numbers, -respectively. +for any type (so you have to use pad bytes); +\ctype{short} is 2 bytes; +\ctype{int} and \ctype{long} are 4 bytes; +\ctype{long long} (\ctype{__int64} on Windows) is 8 bytes; +\ctype{float} and \ctype{double} are 32-bit and 64-bit +IEEE floating point numbers, respectively. Note the difference between \character{@} and \character{=}: both use native byte order, but the size and alignment of the latter is diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index c977913..e6c8bb2 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -12,6 +12,16 @@ def simple_err(func, *args): func.__name__, args) ## pdb.set_trace() +def any_err(func, *args): + try: + apply(func, args) + except (struct.error, OverflowError, TypeError): + pass + else: + raise TestFailed, "%s%s did not raise error" % ( + func.__name__, args) +## pdb.set_trace() + simple_err(struct.calcsize, 'Z') sz = struct.calcsize('i') @@ -113,7 +123,8 @@ for fmt, arg, big, lil, asy in tests: raise TestFailed, "unpack(%s, %s) -> (%s,) # expected (%s,)" % ( `fmt`, `res`, `rev`, `arg`) -# Some q/Q sanity checks. +########################################################################### +# q/Q tests. has_native_qQ = 1 try: @@ -124,18 +135,22 @@ except struct.error: if verbose: print "Platform has native q/Q?", has_native_qQ and "Yes." or "No." -simple_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless +any_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless simple_err(struct.pack, "q", "a") # can't pack string as 'q' regardless simple_err(struct.pack, "Q", "a") # ditto, but 'Q' +def string_reverse(s): + chars = list(s) + chars.reverse() + return "".join(chars) + def bigendian_to_native(value): if isbigendian: return value - chars = list(value) - chars.reverse() - return "".join(chars) + else: + return string_reverse(value) -if has_native_qQ: +def test_native_qQ(): bytes = struct.calcsize('q') # The expected values here are in big-endian format, primarily because # I'm on a little-endian machine and so this is the clearest way (for @@ -156,3 +171,147 @@ if has_native_qQ: verify(retrieved == input, "%r-unpack of %r gave %r, not %r" % (format, got, retrieved, input)) + +if has_native_qQ: + test_native_qQ() + +# Standard q/Q (8 bytes; should work on all platforms). + +MIN_Q, MAX_Q = 0, 2L**64 - 1 +MIN_q, MAX_q = -(2L**63), 2L**63 - 1 + +import binascii +def test_one_qQ(x, pack=struct.pack, + unpack=struct.unpack, + unhexlify=binascii.unhexlify): + if verbose: + print "trying std q/Q on", x, "==", hex(x) + + # Try 'q'. + if MIN_q <= x <= MAX_q: + # Try '>q'. + expected = long(x) + if x < 0: + expected += 1L << 64 + assert expected > 0 + expected = hex(expected)[2:-1] # chop "0x" and trailing 'L' + if len(expected) & 1: + expected = "0" + expected + expected = unhexlify(expected) + expected = "\x00" * (8 - len(expected)) + expected + + # >q pack work? + got = pack(">q", x) + verify(got == expected, + "'>q'-pack of %r gave %r, not %r" % + (x, got, expected)) + + # >q unpack work? + retrieved = unpack(">q", got)[0] + verify(x == retrieved, + "'>q'-unpack of %r gave %r, not %r" % + (got, retrieved, x)) + + # Adding any byte should cause a "too big" error. + any_err(unpack, ">q", '\x01' + got) + + # Try '<q'. + expected = string_reverse(expected) + + # <q pack work? + got = pack("<q", x) + verify(got == expected, + "'<q'-pack of %r gave %r, not %r" % + (x, got, expected)) + + # <q unpack work? + retrieved = unpack("<q", got)[0] + verify(x == retrieved, + "'<q'-unpack of %r gave %r, not %r" % + (got, retrieved, x)) + + # Adding any byte should cause a "too big" error. + any_err(unpack, "<q", '\x01' + got) + + else: + # x is out of q's range -- verify pack realizes that. + any_err(pack, '>q', x) + any_err(pack, '<q', x) + + # Much the same for 'Q'. + if MIN_Q <= x <= MAX_Q: + # Try '>Q'. + expected = long(x) + expected = hex(expected)[2:-1] # chop "0x" and trailing 'L' + if len(expected) & 1: + expected = "0" + expected + expected = unhexlify(expected) + expected = "\x00" * (8 - len(expected)) + expected + + # >Q pack work? + got = pack(">Q", x) + verify(got == expected, + "'>Q'-pack of %r gave %r, not %r" % + (x, got, expected)) + + # >Q unpack work? + retrieved = unpack(">Q", got)[0] + verify(x == retrieved, + "'>Q'-unpack of %r gave %r, not %r" % + (got, retrieved, x)) + + # Adding any byte should cause a "too big" error. + any_err(unpack, ">Q", '\x01' + got) + + # Try '<Q'. + expected = string_reverse(expected) + + # <Q pack work? + got = pack("<Q", x) + verify(got == expected, + "'<Q'-pack of %r gave %r, not %r" % + (x, got, expected)) + + # <Q unpack work? + retrieved = unpack("<Q", got)[0] + verify(x == retrieved, + "'<Q'-unpack of %r gave %r, not %r" % + (got, retrieved, x)) + + # Adding any byte should cause a "too big" error. + any_err(unpack, "<Q", '\x01' + got) + + else: + # x is out of Q's range -- verify pack realizes that. + any_err(pack, '>Q', x) + any_err(pack, '<Q', x) + +def test_std_qQ(): + from random import randrange + + # Create all interesting powers of 2. + values = [] + for exp in range(70): + values.append(1L << exp) + + # Add some random 64-bit values. + for i in range(50): + val = 0L + for j in range(8): + val = (val << 8) | randrange(256) + values.append(val) + + # Try all those, and their negations, and +-1 from them. Note + # that this tests all power-of-2 boundaries in range, and a few out + # of range, plus +-(2**n +- 1). + for base in values: + for val in -base, base: + for incr in -1, 0, 1: + x = val + incr + try: + x = int(x) + except OverflowError: + pass + test_one_qQ(x) + +test_std_qQ() @@ -84,6 +84,9 @@ Core sortdict(dict) function for a simple way to display a dict in sorted order. +- Many other small changes to dicts were made, resulting in faster + operation along the most common code paths. + - Dictionary objects now support the "in" operator: "x in dict" means the same as dict.has_key(x). @@ -119,7 +122,7 @@ Core - Collisions in dicts are resolved via a new approach, which can help dramatically in bad cases. For example, looking up every key in a dict - d with d.keys() = [i << 16 for i in range(20000)] is approximately 500x + d with d.keys() == [i << 16 for i in range(20000)] is approximately 500x faster now. Thanks to Christian Tismer for pointing out the cause and the nature of an effective cure (last December! better late than never). @@ -145,8 +148,8 @@ Library native mode, these can be used only when the platform C compiler supports these types (when HAVE_LONG_LONG is #define'd by the Python config process), and then they inherit the sizes and alignments of the C types. - XXX TODO In standard mode, 'q' and 'Q' are supported on all platforms, and - XXX TODO are 8-byte integral types. + In standard mode, 'q' and 'Q' are supported on all platforms, and are + 8-byte integral types. Tests diff --git a/Modules/structmodule.c b/Modules/structmodule.c index 9b79978..4a8886f 100644 --- a/Modules/structmodule.c +++ b/Modules/structmodule.c @@ -80,6 +80,34 @@ typedef struct { char c; LONG_LONG x; } s_long_long; #pragma options align=reset #endif +/* Helper to get a PyLongObject by hook or by crook. Caller should decref. */ + +static PyObject * +get_pylong(PyObject *v) +{ + PyNumberMethods *m; + + assert(v != NULL); + if (PyInt_Check(v)) + return PyLong_FromLong(PyInt_AS_LONG(v)); + if (PyLong_Check(v)) { + Py_INCREF(v); + return v; + } + m = v->ob_type->tp_as_number; + if (m != NULL && m->nb_long != NULL) { + v = m->nb_long(v); + if (v == NULL) + return NULL; + if (PyLong_Check(v)) + return v; + Py_DECREF(v); + } + PyErr_SetString(StructError, + "cannot convert argument to long"); + return NULL; +} + /* Helper routine to get a Python integer and raise the appropriate error if it isn't one */ @@ -123,33 +151,13 @@ static int get_longlong(PyObject *v, LONG_LONG *p) { LONG_LONG x; - int v_needs_decref = 0; - if (PyInt_Check(v)) { - x = (LONG_LONG)PyInt_AS_LONG(v); - *p = x; - return 0; - } - if (!PyLong_Check(v)) { - PyNumberMethods *m = v->ob_type->tp_as_number; - if (m != NULL && m->nb_long != NULL) { - v = m->nb_long(v); - if (v == NULL) - return -1; - v_needs_decref = 1; - } - if (!PyLong_Check(v)) { - PyErr_SetString(StructError, - "cannot convert argument to long"); - if (v_needs_decref) - Py_DECREF(v); - return -1; - } - } + v = get_pylong(v); + if (v == NULL) + return -1; assert(PyLong_Check(v)); x = PyLong_AsLongLong(v); - if (v_needs_decref) - Py_DECREF(v); + Py_DECREF(v); if (x == (LONG_LONG)-1 && PyErr_Occurred()) return -1; *p = x; @@ -162,39 +170,13 @@ static int get_ulonglong(PyObject *v, unsigned LONG_LONG *p) { unsigned LONG_LONG x; - int v_needs_decref = 0; - if (PyInt_Check(v)) { - long i = PyInt_AS_LONG(v); - if (i < 0) { - PyErr_SetString(StructError, "can't convert negative " - "int to unsigned"); - return -1; - } - x = (unsigned LONG_LONG)i; - *p = x; - return 0; - } - if (!PyLong_Check(v)) { - PyNumberMethods *m = v->ob_type->tp_as_number; - if (m != NULL && m->nb_long != NULL) { - v = m->nb_long(v); - if (v == NULL) - return -1; - v_needs_decref = 1; - } - if (!PyLong_Check(v)) { - PyErr_SetString(StructError, - "cannot convert argument to long"); - if (v_needs_decref) - Py_DECREF(v); - return -1; - } - } + v = get_pylong(v); + if (v == NULL) + return -1; assert(PyLong_Check(v)); x = PyLong_AsUnsignedLongLong(v); - if (v_needs_decref) - Py_DECREF(v); + Py_DECREF(v); if (x == (unsigned LONG_LONG)-1 && PyErr_Occurred()) return -1; *p = x; @@ -500,7 +482,7 @@ typedef struct _formatdef { TYPE is one of char, byte, ubyte, etc. */ -/* Native mode routines. */ +/* Native mode routines. ****************************************************/ static PyObject * nu_char(const char *p, const formatdef *f) @@ -797,6 +779,8 @@ static formatdef native_table[] = { {0} }; +/* Big-endian routines. *****************************************************/ + static PyObject * bu_int(const char *p, const formatdef *f) { @@ -826,6 +810,24 @@ bu_uint(const char *p, const formatdef *f) } static PyObject * +bu_longlong(const char *p, const formatdef *f) +{ + return _PyLong_FromByteArray((const unsigned char *)p, + 8, + 0, /* little-endian */ + 1 /* signed */); +} + +static PyObject * +bu_ulonglong(const char *p, const formatdef *f) +{ + return _PyLong_FromByteArray((const unsigned char *)p, + 8, + 0, /* little-endian */ + 0 /* signed */); +} + +static PyObject * bu_float(const char *p, const formatdef *f) { return unpack_float(p, 1); @@ -868,6 +870,34 @@ bp_uint(char *p, PyObject *v, const formatdef *f) } static int +bp_longlong(char *p, PyObject *v, const formatdef *f) +{ + int res; + v = get_pylong(v); + res = _PyLong_AsByteArray((PyLongObject *)v, + (unsigned char *)p, + 8, + 0, /* little_endian */ + 1 /* signed */); + Py_DECREF(v); + return res; +} + +static int +bp_ulonglong(char *p, PyObject *v, const formatdef *f) +{ + int res; + v = get_pylong(v); + res = _PyLong_AsByteArray((PyLongObject *)v, + (unsigned char *)p, + 8, + 0, /* little_endian */ + 0 /* signed */); + Py_DECREF(v); + return res; +} + +static int bp_float(char *p, PyObject *v, const formatdef *f) { double x = PyFloat_AsDouble(v); @@ -904,11 +934,15 @@ static formatdef bigendian_table[] = { {'I', 4, 0, bu_uint, bp_uint}, {'l', 4, 0, bu_int, bp_int}, {'L', 4, 0, bu_uint, bp_uint}, + {'q', 8, 0, bu_longlong, bp_longlong}, + {'Q', 8, 0, bu_ulonglong, bp_ulonglong}, {'f', 4, 0, bu_float, bp_float}, {'d', 8, 0, bu_double, bp_double}, {0} }; +/* Little-endian routines. *****************************************************/ + static PyObject * lu_int(const char *p, const formatdef *f) { @@ -938,6 +972,24 @@ lu_uint(const char *p, const formatdef *f) } static PyObject * +lu_longlong(const char *p, const formatdef *f) +{ + return _PyLong_FromByteArray((const unsigned char *)p, + 8, + 1, /* little-endian */ + 1 /* signed */); +} + +static PyObject * +lu_ulonglong(const char *p, const formatdef *f) +{ + return _PyLong_FromByteArray((const unsigned char *)p, + 8, + 1, /* little-endian */ + 0 /* signed */); +} + +static PyObject * lu_float(const char *p, const formatdef *f) { return unpack_float(p+3, -1); @@ -980,6 +1032,34 @@ lp_uint(char *p, PyObject *v, const formatdef *f) } static int +lp_longlong(char *p, PyObject *v, const formatdef *f) +{ + int res; + v = get_pylong(v); + res = _PyLong_AsByteArray((PyLongObject*)v, + (unsigned char *)p, + 8, + 1, /* little_endian */ + 1 /* signed */); + Py_DECREF(v); + return res; +} + +static int +lp_ulonglong(char *p, PyObject *v, const formatdef *f) +{ + int res; + v = get_pylong(v); + res = _PyLong_AsByteArray((PyLongObject*)v, + (unsigned char *)p, + 8, + 1, /* little_endian */ + 0 /* signed */); + Py_DECREF(v); + return res; +} + +static int lp_float(char *p, PyObject *v, const formatdef *f) { double x = PyFloat_AsDouble(v); @@ -1016,6 +1096,8 @@ static formatdef lilendian_table[] = { {'I', 4, 0, lu_uint, lp_uint}, {'l', 4, 0, lu_int, lp_int}, {'L', 4, 0, lu_uint, lp_uint}, + {'q', 8, 0, lu_longlong, lp_longlong}, + {'Q', 8, 0, lu_ulonglong, lp_ulonglong}, {'f', 4, 0, lu_float, lp_float}, {'d', 8, 0, lu_double, lp_double}, {0} diff --git a/Objects/longobject.c b/Objects/longobject.c index 92f8b04..fac8bb6 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -364,20 +364,33 @@ _PyLong_AsByteArray(PyLongObject* v, accumbits = 0; carry = do_twos_comp ? 1 : 0; for (i = 0; i < ndigits; ++i) { + unsigned int oldaccumbits = accumbits; twodigits thisdigit = v->ob_digit[i]; if (do_twos_comp) { thisdigit = (thisdigit ^ MASK) + carry; carry = thisdigit >> SHIFT; thisdigit &= MASK; } + if (i < ndigits - 1) + accumbits += SHIFT; + else { + /* The most-significant digit may be partly empty. */ + twodigits bitmask = 1 << (SHIFT - 1); + twodigits signbit = do_twos_comp << (SHIFT - 1); + unsigned int nsignbits = 0; + while ((thisdigit & bitmask) == signbit && bitmask) { + ++nsignbits; + bitmask >>= 1; + signbit >>= 1; + } + accumbits += SHIFT - nsignbits; + } /* Because we're going LSB to MSB, thisdigit is more significant than what's already in accum, so needs to be prepended to accum. */ - accum |= thisdigit << accumbits; - accumbits += SHIFT; + accum |= thisdigit << oldaccumbits; /* Store as many bytes as possible. */ - assert(accumbits >= 8); - do { + while (accumbits >= 8) { if (j >= n) goto Overflow; ++j; @@ -385,13 +398,13 @@ _PyLong_AsByteArray(PyLongObject* v, p += pincr; accumbits -= 8; accum >>= 8; - } while (accumbits >= 8); + } } /* Store the straggler (if any). */ assert(accumbits < 8); assert(carry == 0); /* else do_twos_comp and *every* digit was 0 */ - if (accum) { + if (accumbits > 0) { if (j >= n) goto Overflow; ++j; |