From 7a3bfc3a472dafc42d20845389eb79db8af0b046 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Tue, 12 Jun 2001 01:22:22 +0000 Subject: Added q/Q standard (x-platform 8-byte ints) mode in struct module. This completes the q/Q project. longobject.c _PyLong_AsByteArray: The original code had a gross bug: the most-significant Python digit doesn't necessarily have SHIFT significant bits, and you really need to count how many copies of the sign bit it has else spurious overflow errors result. test_struct.py: This now does exhaustive std q/Q testing at, and on both sides of, all relevant power-of-2 boundaries, both positive and negative. NEWS: Added brief dict news while I was at it. --- Doc/lib/libstruct.tex | 17 +++-- Lib/test/test_struct.py | 171 ++++++++++++++++++++++++++++++++++++++++-- Misc/NEWS | 9 ++- Modules/structmodule.c | 192 ++++++++++++++++++++++++++++++++++-------------- Objects/longobject.c | 25 +++++-- 5 files changed, 337 insertions(+), 77 deletions(-) diff --git a/Doc/lib/libstruct.tex b/Doc/lib/libstruct.tex index 9a1942d..f8056a2 100644 --- a/Doc/lib/libstruct.tex +++ b/Doc/lib/libstruct.tex @@ -72,7 +72,8 @@ Notes: \item[(1)] The \character{q} and \character{Q} conversion codes are available in native mode only if the platform C compiler supports C \ctype{long long}, - or, on Windows, \ctype{__int64}. + or, on Windows, \ctype{__int64}. They're always available in standard + modes. \end{description} @@ -100,8 +101,8 @@ passed in to \function{pack()} is too long, the stored representation is truncated. If the string is too short, padding is used to ensure that exactly enough bytes are used to satisfy the count. -For the \character{I} and \character{L} format characters, the return -value is a Python long integer. +For the \character{I}, \character{L}, \character{q} and \character{Q} +format characters, the return value is a Python long integer. For the \character{P} format character, the return value is a Python integer or long integer, depending on the size needed to hold a @@ -139,10 +140,12 @@ Native size and alignment are determined using the C compiler's order. Standard size and alignment are as follows: no alignment is required -for any type (so you have to use pad bytes); \ctype{short} is 2 bytes; -\ctype{int} and \ctype{long} are 4 bytes. \ctype{float} and -\ctype{double} are 32-bit and 64-bit IEEE floating point numbers, -respectively. +for any type (so you have to use pad bytes); +\ctype{short} is 2 bytes; +\ctype{int} and \ctype{long} are 4 bytes; +\ctype{long long} (\ctype{__int64} on Windows) is 8 bytes; +\ctype{float} and \ctype{double} are 32-bit and 64-bit +IEEE floating point numbers, respectively. Note the difference between \character{@} and \character{=}: both use native byte order, but the size and alignment of the latter is diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index c977913..e6c8bb2 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -12,6 +12,16 @@ def simple_err(func, *args): func.__name__, args) ## pdb.set_trace() +def any_err(func, *args): + try: + apply(func, args) + except (struct.error, OverflowError, TypeError): + pass + else: + raise TestFailed, "%s%s did not raise error" % ( + func.__name__, args) +## pdb.set_trace() + simple_err(struct.calcsize, 'Z') sz = struct.calcsize('i') @@ -113,7 +123,8 @@ for fmt, arg, big, lil, asy in tests: raise TestFailed, "unpack(%s, %s) -> (%s,) # expected (%s,)" % ( `fmt`, `res`, `rev`, `arg`) -# Some q/Q sanity checks. +########################################################################### +# q/Q tests. has_native_qQ = 1 try: @@ -124,18 +135,22 @@ except struct.error: if verbose: print "Platform has native q/Q?", has_native_qQ and "Yes." or "No." -simple_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless +any_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless simple_err(struct.pack, "q", "a") # can't pack string as 'q' regardless simple_err(struct.pack, "Q", "a") # ditto, but 'Q' +def string_reverse(s): + chars = list(s) + chars.reverse() + return "".join(chars) + def bigendian_to_native(value): if isbigendian: return value - chars = list(value) - chars.reverse() - return "".join(chars) + else: + return string_reverse(value) -if has_native_qQ: +def test_native_qQ(): bytes = struct.calcsize('q') # The expected values here are in big-endian format, primarily because # I'm on a little-endian machine and so this is the clearest way (for @@ -156,3 +171,147 @@ if has_native_qQ: verify(retrieved == input, "%r-unpack of %r gave %r, not %r" % (format, got, retrieved, input)) + +if has_native_qQ: + test_native_qQ() + +# Standard q/Q (8 bytes; should work on all platforms). + +MIN_Q, MAX_Q = 0, 2L**64 - 1 +MIN_q, MAX_q = -(2L**63), 2L**63 - 1 + +import binascii +def test_one_qQ(x, pack=struct.pack, + unpack=struct.unpack, + unhexlify=binascii.unhexlify): + if verbose: + print "trying std q/Q on", x, "==", hex(x) + + # Try 'q'. + if MIN_q <= x <= MAX_q: + # Try '>q'. + expected = long(x) + if x < 0: + expected += 1L << 64 + assert expected > 0 + expected = hex(expected)[2:-1] # chop "0x" and trailing 'L' + if len(expected) & 1: + expected = "0" + expected + expected = unhexlify(expected) + expected = "\x00" * (8 - len(expected)) + expected + + # >q pack work? + got = pack(">q", x) + verify(got == expected, + "'>q'-pack of %r gave %r, not %r" % + (x, got, expected)) + + # >q unpack work? + retrieved = unpack(">q", got)[0] + verify(x == retrieved, + "'>q'-unpack of %r gave %r, not %r" % + (got, retrieved, x)) + + # Adding any byte should cause a "too big" error. + any_err(unpack, ">q", '\x01' + got) + + # Try 'q', x) + any_err(pack, 'Q'. + expected = long(x) + expected = hex(expected)[2:-1] # chop "0x" and trailing 'L' + if len(expected) & 1: + expected = "0" + expected + expected = unhexlify(expected) + expected = "\x00" * (8 - len(expected)) + expected + + # >Q pack work? + got = pack(">Q", x) + verify(got == expected, + "'>Q'-pack of %r gave %r, not %r" % + (x, got, expected)) + + # >Q unpack work? + retrieved = unpack(">Q", got)[0] + verify(x == retrieved, + "'>Q'-unpack of %r gave %r, not %r" % + (got, retrieved, x)) + + # Adding any byte should cause a "too big" error. + any_err(unpack, ">Q", '\x01' + got) + + # Try 'Q', x) + any_err(pack, 'ob_type->tp_as_number; + if (m != NULL && m->nb_long != NULL) { + v = m->nb_long(v); + if (v == NULL) + return NULL; + if (PyLong_Check(v)) + return v; + Py_DECREF(v); + } + PyErr_SetString(StructError, + "cannot convert argument to long"); + return NULL; +} + /* Helper routine to get a Python integer and raise the appropriate error if it isn't one */ @@ -123,33 +151,13 @@ static int get_longlong(PyObject *v, LONG_LONG *p) { LONG_LONG x; - int v_needs_decref = 0; - if (PyInt_Check(v)) { - x = (LONG_LONG)PyInt_AS_LONG(v); - *p = x; - return 0; - } - if (!PyLong_Check(v)) { - PyNumberMethods *m = v->ob_type->tp_as_number; - if (m != NULL && m->nb_long != NULL) { - v = m->nb_long(v); - if (v == NULL) - return -1; - v_needs_decref = 1; - } - if (!PyLong_Check(v)) { - PyErr_SetString(StructError, - "cannot convert argument to long"); - if (v_needs_decref) - Py_DECREF(v); - return -1; - } - } + v = get_pylong(v); + if (v == NULL) + return -1; assert(PyLong_Check(v)); x = PyLong_AsLongLong(v); - if (v_needs_decref) - Py_DECREF(v); + Py_DECREF(v); if (x == (LONG_LONG)-1 && PyErr_Occurred()) return -1; *p = x; @@ -162,39 +170,13 @@ static int get_ulonglong(PyObject *v, unsigned LONG_LONG *p) { unsigned LONG_LONG x; - int v_needs_decref = 0; - if (PyInt_Check(v)) { - long i = PyInt_AS_LONG(v); - if (i < 0) { - PyErr_SetString(StructError, "can't convert negative " - "int to unsigned"); - return -1; - } - x = (unsigned LONG_LONG)i; - *p = x; - return 0; - } - if (!PyLong_Check(v)) { - PyNumberMethods *m = v->ob_type->tp_as_number; - if (m != NULL && m->nb_long != NULL) { - v = m->nb_long(v); - if (v == NULL) - return -1; - v_needs_decref = 1; - } - if (!PyLong_Check(v)) { - PyErr_SetString(StructError, - "cannot convert argument to long"); - if (v_needs_decref) - Py_DECREF(v); - return -1; - } - } + v = get_pylong(v); + if (v == NULL) + return -1; assert(PyLong_Check(v)); x = PyLong_AsUnsignedLongLong(v); - if (v_needs_decref) - Py_DECREF(v); + Py_DECREF(v); if (x == (unsigned LONG_LONG)-1 && PyErr_Occurred()) return -1; *p = x; @@ -500,7 +482,7 @@ typedef struct _formatdef { TYPE is one of char, byte, ubyte, etc. */ -/* Native mode routines. */ +/* Native mode routines. ****************************************************/ static PyObject * nu_char(const char *p, const formatdef *f) @@ -797,6 +779,8 @@ static formatdef native_table[] = { {0} }; +/* Big-endian routines. *****************************************************/ + static PyObject * bu_int(const char *p, const formatdef *f) { @@ -826,6 +810,24 @@ bu_uint(const char *p, const formatdef *f) } static PyObject * +bu_longlong(const char *p, const formatdef *f) +{ + return _PyLong_FromByteArray((const unsigned char *)p, + 8, + 0, /* little-endian */ + 1 /* signed */); +} + +static PyObject * +bu_ulonglong(const char *p, const formatdef *f) +{ + return _PyLong_FromByteArray((const unsigned char *)p, + 8, + 0, /* little-endian */ + 0 /* signed */); +} + +static PyObject * bu_float(const char *p, const formatdef *f) { return unpack_float(p, 1); @@ -868,6 +870,34 @@ bp_uint(char *p, PyObject *v, const formatdef *f) } static int +bp_longlong(char *p, PyObject *v, const formatdef *f) +{ + int res; + v = get_pylong(v); + res = _PyLong_AsByteArray((PyLongObject *)v, + (unsigned char *)p, + 8, + 0, /* little_endian */ + 1 /* signed */); + Py_DECREF(v); + return res; +} + +static int +bp_ulonglong(char *p, PyObject *v, const formatdef *f) +{ + int res; + v = get_pylong(v); + res = _PyLong_AsByteArray((PyLongObject *)v, + (unsigned char *)p, + 8, + 0, /* little_endian */ + 0 /* signed */); + Py_DECREF(v); + return res; +} + +static int bp_float(char *p, PyObject *v, const formatdef *f) { double x = PyFloat_AsDouble(v); @@ -904,11 +934,15 @@ static formatdef bigendian_table[] = { {'I', 4, 0, bu_uint, bp_uint}, {'l', 4, 0, bu_int, bp_int}, {'L', 4, 0, bu_uint, bp_uint}, + {'q', 8, 0, bu_longlong, bp_longlong}, + {'Q', 8, 0, bu_ulonglong, bp_ulonglong}, {'f', 4, 0, bu_float, bp_float}, {'d', 8, 0, bu_double, bp_double}, {0} }; +/* Little-endian routines. *****************************************************/ + static PyObject * lu_int(const char *p, const formatdef *f) { @@ -938,6 +972,24 @@ lu_uint(const char *p, const formatdef *f) } static PyObject * +lu_longlong(const char *p, const formatdef *f) +{ + return _PyLong_FromByteArray((const unsigned char *)p, + 8, + 1, /* little-endian */ + 1 /* signed */); +} + +static PyObject * +lu_ulonglong(const char *p, const formatdef *f) +{ + return _PyLong_FromByteArray((const unsigned char *)p, + 8, + 1, /* little-endian */ + 0 /* signed */); +} + +static PyObject * lu_float(const char *p, const formatdef *f) { return unpack_float(p+3, -1); @@ -980,6 +1032,34 @@ lp_uint(char *p, PyObject *v, const formatdef *f) } static int +lp_longlong(char *p, PyObject *v, const formatdef *f) +{ + int res; + v = get_pylong(v); + res = _PyLong_AsByteArray((PyLongObject*)v, + (unsigned char *)p, + 8, + 1, /* little_endian */ + 1 /* signed */); + Py_DECREF(v); + return res; +} + +static int +lp_ulonglong(char *p, PyObject *v, const formatdef *f) +{ + int res; + v = get_pylong(v); + res = _PyLong_AsByteArray((PyLongObject*)v, + (unsigned char *)p, + 8, + 1, /* little_endian */ + 0 /* signed */); + Py_DECREF(v); + return res; +} + +static int lp_float(char *p, PyObject *v, const formatdef *f) { double x = PyFloat_AsDouble(v); @@ -1016,6 +1096,8 @@ static formatdef lilendian_table[] = { {'I', 4, 0, lu_uint, lp_uint}, {'l', 4, 0, lu_int, lp_int}, {'L', 4, 0, lu_uint, lp_uint}, + {'q', 8, 0, lu_longlong, lp_longlong}, + {'Q', 8, 0, lu_ulonglong, lp_ulonglong}, {'f', 4, 0, lu_float, lp_float}, {'d', 8, 0, lu_double, lp_double}, {0} diff --git a/Objects/longobject.c b/Objects/longobject.c index 92f8b04..fac8bb6 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -364,20 +364,33 @@ _PyLong_AsByteArray(PyLongObject* v, accumbits = 0; carry = do_twos_comp ? 1 : 0; for (i = 0; i < ndigits; ++i) { + unsigned int oldaccumbits = accumbits; twodigits thisdigit = v->ob_digit[i]; if (do_twos_comp) { thisdigit = (thisdigit ^ MASK) + carry; carry = thisdigit >> SHIFT; thisdigit &= MASK; } + if (i < ndigits - 1) + accumbits += SHIFT; + else { + /* The most-significant digit may be partly empty. */ + twodigits bitmask = 1 << (SHIFT - 1); + twodigits signbit = do_twos_comp << (SHIFT - 1); + unsigned int nsignbits = 0; + while ((thisdigit & bitmask) == signbit && bitmask) { + ++nsignbits; + bitmask >>= 1; + signbit >>= 1; + } + accumbits += SHIFT - nsignbits; + } /* Because we're going LSB to MSB, thisdigit is more significant than what's already in accum, so needs to be prepended to accum. */ - accum |= thisdigit << accumbits; - accumbits += SHIFT; + accum |= thisdigit << oldaccumbits; /* Store as many bytes as possible. */ - assert(accumbits >= 8); - do { + while (accumbits >= 8) { if (j >= n) goto Overflow; ++j; @@ -385,13 +398,13 @@ _PyLong_AsByteArray(PyLongObject* v, p += pincr; accumbits -= 8; accum >>= 8; - } while (accumbits >= 8); + } } /* Store the straggler (if any). */ assert(accumbits < 8); assert(carry == 0); /* else do_twos_comp and *every* digit was 0 */ - if (accum) { + if (accumbits > 0) { if (j >= n) goto Overflow; ++j; -- cgit v0.12