From 7b9542a3f747787aedf056ddaef99e4469777aa9 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Sun, 10 Jun 2001 23:40:19 +0000 Subject: Initial support for 'q' and 'Q' struct format codes: for now, only in native mode, and only when config #defines HAVE_LONG_LONG. Standard mode will eventually treat them as 8-byte ints across all platforms, but that likely requires a new set of routines in longobject.c first (while sizeof(long) >= 4 is guaranteed by C, there's nothing in C we can rely on x-platform to hold 8 bytes of int, so we'll have to roll our own; I'm thinking of a simple pair of conversion functions, Python long to/from sized vector of unsigned bytes; that may be useful for GMP conversions too; std q/Q would call them with size fixed at 8). test_struct.py: In addition to adding some native-mode 'q' and 'Q' tests, got rid of unused code, and repaired a non-portable assumption about native sizeof(short) (it isn't 2 on some Cray boxes). libstruct.tex: In addition to adding a bit of 'q'/'Q' docs (more needed later), removed an erroneous footnote about 'I' behavior. --- Doc/lib/libstruct.tex | 11 +-- Lib/test/test_struct.py | 57 ++++++++++++--- Misc/NEWS | 8 +++ Modules/structmodule.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 247 insertions(+), 16 deletions(-) diff --git a/Doc/lib/libstruct.tex b/Doc/lib/libstruct.tex index 637d3e6..9a1942d 100644 --- a/Doc/lib/libstruct.tex +++ b/Doc/lib/libstruct.tex @@ -53,9 +53,11 @@ C and Python values should be obvious given their types: \lineiv{h}{\ctype{short}}{integer}{} \lineiv{H}{\ctype{unsigned short}}{integer}{} \lineiv{i}{\ctype{int}}{integer}{} - \lineiv{I}{\ctype{unsigned int}}{long}{(1)} + \lineiv{I}{\ctype{unsigned int}}{long}{} \lineiv{l}{\ctype{long}}{integer}{} \lineiv{L}{\ctype{unsigned long}}{long}{} + \lineiv{q}{\ctype{long long}}{long}{(1)} + \lineiv{Q}{\ctype{unsigned long long}}{long}{(1)} \lineiv{f}{\ctype{float}}{float}{} \lineiv{d}{\ctype{double}}{float}{} \lineiv{s}{\ctype{char[]}}{string}{} @@ -68,10 +70,9 @@ Notes: \begin{description} \item[(1)] - The \character{I} conversion code will convert to a Python long if - the C \ctype{int} is the same size as a C \ctype{long}, which is - typical on most modern systems. If a C \ctype{int} is smaller than - a C \ctype{long}, an Python integer will be created instead. + The \character{q} and \character{Q} conversion codes are available in + native mode only if the platform C compiler supports C \ctype{long long}, + or, on Windows, \ctype{__int64}. \end{description} diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index c48d92d..ebd6e45 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -1,4 +1,4 @@ -from test_support import TestFailed, verbose +from test_support import TestFailed, verbose, verify import struct ## import pdb @@ -12,7 +12,7 @@ def simple_err(func, *args): func.__name__, args) ## pdb.set_trace() -simple_err(struct.calcsize, 'Q') +simple_err(struct.calcsize, 'Z') sz = struct.calcsize('i') if sz * 3 != struct.calcsize('iii'): @@ -93,14 +93,7 @@ tests = [ '\000\000\000\000\000\000\000\300', 0), ] -def badpack(fmt, arg, got, exp): - return - -def badunpack(fmt, arg, got, exp): - return "unpack(%s, %s) -> (%s,) # expected (%s,)" % ( - `fmt`, `arg`, `got`, `exp`) - -isbigendian = struct.pack('=h', 1) == '\0\1' +isbigendian = struct.pack('=i', 1)[0] == chr(0) for fmt, arg, big, lil, asy in tests: if verbose: @@ -119,3 +112,47 @@ for fmt, arg, big, lil, asy in tests: if rev != arg and not asy: raise TestFailed, "unpack(%s, %s) -> (%s,) # expected (%s,)" % ( `fmt`, `res`, `rev`, `arg`) + +# Some q/Q sanity checks. + +has_native_qQ = 1 +try: + struct.pack("q", 5) +except struct.error: + has_native_qQ = 0 + +if verbose: + print "Platform has native q/Q?", has_native_qQ and "Yes." or "No." + +simple_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless +simple_err(struct.pack, "q", "a") # can't pack string as 'q' regardless +simple_err(struct.pack, "Q", "a") # ditto, but 'Q' + +def force_bigendian(value): + if isbigendian: + return value + chars = list(value) + chars.reverse() + return "".join(chars) + +if has_native_qQ: + bytes = struct.calcsize('q') + # The expected values here are in big-endian format, primarily because + # I'm on a little-endian machine and so this is the clearest way (for + # me) to force the code to get exercised. + for format, input, expected in ( + ('q', -1, '\xff' * bytes), + ('q', 0, '\x00' * bytes), + ('Q', 0, '\x00' * bytes), + ('q', 1L, '\x00' * (bytes-1) + '\x01'), + ('Q', (1L << (8*bytes))-1, '\xff' * bytes), + ('q', (1L << (8*bytes-1))-1, '\x7f' + '\xff' * (bytes - 1))): + got = struct.pack(format, input) + bigexpected = force_bigendian(expected) + verify(got == bigexpected, + "%r-pack of %r gave %r, not %r" % + (format, input, got, bigexpected)) + retrieved = struct.unpack(format, got)[0] + verify(retrieved == input, + "%r-unpack of %r gave %r, not %r" % + (format, got, retrieved, input)) diff --git a/Misc/NEWS b/Misc/NEWS index 62df987..18a87e9 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -140,6 +140,14 @@ Library - pprint functions now much faster for large containers (tuple, list, dict). +- New 'q' and 'Q' format codes in the struct module, corresponding to C + types "long long" and "unsigned long long" (on Windows, __int64). In + native mode, these can be used only when the platform C compiler supports + these types (when HAVE_LONG_LONG is #define'd by the Python config + process), and then they inherit the sizes and alignments of the C types. + XXX TODO In standard mode, 'q' and 'Q' are supported on all platforms, and + XXX TODO are 8-byte integral types. + Tests - New test_mutants.py runs dict comparisons where the key and value diff --git a/Modules/structmodule.c b/Modules/structmodule.c index 9d1c436..a168b9f 100644 --- a/Modules/structmodule.c +++ b/Modules/structmodule.c @@ -22,9 +22,11 @@ these can be preceded by a decimal repeat count:\n\ h:short; H:unsigned short; i:int; I:unsigned int;\n\ l:long; L:unsigned long; f:float; d:double.\n\ Special cases (preceding decimal count indicates length):\n\ - s:string (array of char); p: pascal string (w. count byte).\n\ + s:string (array of char); p: pascal string (with count byte).\n\ Special case (only available in native format):\n\ P:an integer type that is wide enough to hold a pointer.\n\ +Special case (not in native mode unless 'long long' in platform C):\n\ + q:long long; Q:unsigned long long\n\ Whitespace between formats is ignored.\n\ \n\ The variable struct.error is an exception raised on errors."; @@ -65,6 +67,18 @@ typedef struct { char c; void *x; } s_void_p; #define DOUBLE_ALIGN (sizeof(s_double) - sizeof(double)) #define VOID_P_ALIGN (sizeof(s_void_p) - sizeof(void *)) +/* We can't support q and Q in native mode unless the compiler does; + in std mode, they're 8 bytes on all platforms. */ +#ifdef HAVE_LONG_LONG +typedef struct { char c; LONG_LONG x; } s_long_long; +#define LONG_LONG_ALIGN (sizeof(s_long_long) - sizeof(LONG_LONG)) + +#else +static char qQ_error_msg[] = +"q and Q unavailable in native mode on this platform; use a standard mode.\0"; + +#endif + #define STRINGIFY(x) #x #ifdef __powerc @@ -106,6 +120,93 @@ get_ulong(PyObject *v, unsigned long *p) } } +#ifdef HAVE_LONG_LONG + +/* Same, but handling native long long. */ + +static int +get_longlong(PyObject *v, LONG_LONG *p) +{ + LONG_LONG x; + int v_needs_decref = 0; + + if (PyInt_Check(v)) { + x = (LONG_LONG)PyInt_AS_LONG(v); + *p = x; + return 0; + } + if (!PyLong_Check(v)) { + PyNumberMethods *m = v->ob_type->tp_as_number; + if (m != NULL && m->nb_long != NULL) { + v = m->nb_long(v); + if (v == NULL) + return -1; + v_needs_decref = 1; + } + if (!PyLong_Check(v)) { + PyErr_SetString(StructError, + "cannot convert argument to long"); + if (v_needs_decref) + Py_DECREF(v); + return -1; + } + } + assert(PyLong_Check(v)); + x = PyLong_AsLongLong(v); + if (v_needs_decref) + Py_DECREF(v); + if (x == (LONG_LONG)-1 && PyErr_Occurred()) + return -1; + *p = x; + return 0; +} + +/* Same, but handling native unsigned long long. */ + +static int +get_ulonglong(PyObject *v, unsigned LONG_LONG *p) +{ + unsigned LONG_LONG x; + int v_needs_decref = 0; + + if (PyInt_Check(v)) { + long i = PyInt_AS_LONG(v); + if (i < 0) { + PyErr_SetString(StructError, "can't convert negative " + "int to unsigned"); + return -1; + } + x = (unsigned LONG_LONG)i; + *p = x; + return 0; + } + if (!PyLong_Check(v)) { + PyNumberMethods *m = v->ob_type->tp_as_number; + if (m != NULL && m->nb_long != NULL) { + v = m->nb_long(v); + if (v == NULL) + return -1; + v_needs_decref = 1; + } + if (!PyLong_Check(v)) { + PyErr_SetString(StructError, + "cannot convert argument to long"); + if (v_needs_decref) + Py_DECREF(v); + return -1; + } + } + assert(PyLong_Check(v)); + x = PyLong_AsUnsignedLongLong(v); + if (v_needs_decref) + Py_DECREF(v); + if (x == (unsigned LONG_LONG)-1 && PyErr_Occurred()) + return -1; + *p = x; + return 0; +} + +#endif /* Floating point helpers */ @@ -395,6 +496,17 @@ typedef struct _formatdef { const struct _formatdef *); } formatdef; +/* A large number of small routines follow, with names of the form + + [bln][up]_TYPE + + [bln] distiguishes among big-endian, little-endian and native. + [pu] distiguishes between pack (to struct) and unpack (from struct). + TYPE is one of char, byte, ubyte, etc. +*/ + +/* Native mode routines. */ + static PyObject * nu_char(const char *p, const formatdef *f) { @@ -450,6 +562,34 @@ nu_ulong(const char *p, const formatdef *f) return PyLong_FromUnsignedLong(*(unsigned long *)p); } +/* Native mode doesn't support q or Q unless the platform C supports + long long (or, on Windows, __int64). */ + +#ifdef HAVE_LONG_LONG + +static PyObject * +nu_longlong(const char *p, const formatdef *f) +{ + return PyLong_FromLongLong(*(LONG_LONG *)p); +} + +static PyObject * +nu_ulonglong(const char *p, const formatdef *f) +{ + return PyLong_FromUnsignedLongLong(*(unsigned LONG_LONG *)p); +} + +#else + +static PyObject * +nu_qQerror(const char *p, const formatdef *f) +{ + PyErr_SetString(StructError, qQ_error_msg); + return NULL; +} + +#endif + static PyObject * nu_float(const char *p, const formatdef *f) { @@ -585,6 +725,39 @@ np_ulong(char *p, PyObject *v, const formatdef *f) return 0; } +#ifdef HAVE_LONG_LONG + +static int +np_longlong(char *p, PyObject *v, const formatdef *f) +{ + LONG_LONG x; + if (get_longlong(v, &x) < 0) + return -1; + * (LONG_LONG *)p = x; + return 0; +} + +static int +np_ulonglong(char *p, PyObject *v, const formatdef *f) +{ + unsigned LONG_LONG x; + if (get_ulonglong(v, &x) < 0) + return -1; + * (unsigned LONG_LONG *)p = x; + return 0; +} + +#else + +static int +np_qQerror(char *p, PyObject *v, const formatdef *f) +{ + PyErr_SetString(StructError, qQ_error_msg); + return -1; +} + +#endif + static int np_float(char *p, PyObject *v, const formatdef *f) { @@ -642,6 +815,18 @@ static formatdef native_table[] = { {'f', sizeof(float), FLOAT_ALIGN, nu_float, np_float}, {'d', sizeof(double), DOUBLE_ALIGN, nu_double, np_double}, {'P', sizeof(void *), VOID_P_ALIGN, nu_void_p, np_void_p}, +#ifdef HAVE_LONG_LONG + {'q', sizeof(LONG_LONG), LONG_LONG_ALIGN, nu_longlong, np_longlong}, + {'Q', sizeof(LONG_LONG), LONG_LONG_ALIGN, nu_ulonglong,np_ulonglong}, +#else + /* n[pu]_qQerror just raise errors, but give them "the expected" size + and alignment anyway so that calcsize returns something reasonable, + and so unpack code that works on a 'long long' platform ends up in + the error routine instead of with a mysterious "unpack str size + does not match format" msg when run on a non-'long long' box. */ + {'q', 8, 8, nu_qQerror, np_qQerror}, + {'Q', 8, 8, nu_qQerror, np_qQerror}, +#endif {0} }; -- cgit v0.12