From 45d9c91d4b0d61faaa85ea834e59c86815c00709 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 6 Oct 2011 15:27:40 +0200 Subject: Issue #3163: The struct module gets new format characters 'n' and 'N' supporting C integer types `ssize_t` and `size_t`, respectively. --- Doc/library/struct.rst | 21 +++++++++--- Lib/test/test_struct.py | 66 ++++++++++++++++++++++-------------- Misc/NEWS | 3 ++ Modules/_struct.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 150 insertions(+), 30 deletions(-) diff --git a/Doc/library/struct.rst b/Doc/library/struct.rst index 12820e0..994506c 100644 --- a/Doc/library/struct.rst +++ b/Doc/library/struct.rst @@ -187,17 +187,24 @@ platform-dependent. | ``Q`` | :c:type:`unsigned long | integer | 8 | \(2), \(3) | | | long` | | | | +--------+--------------------------+--------------------+----------------+------------+ -| ``f`` | :c:type:`float` | float | 4 | \(4) | +| ``n`` | :c:type:`ssize_t` | integer | | \(4) | +--------+--------------------------+--------------------+----------------+------------+ -| ``d`` | :c:type:`double` | float | 8 | \(4) | +| ``N`` | :c:type:`size_t` | integer | | \(4) | ++--------+--------------------------+--------------------+----------------+------------+ +| ``f`` | :c:type:`float` | float | 4 | \(5) | ++--------+--------------------------+--------------------+----------------+------------+ +| ``d`` | :c:type:`double` | float | 8 | \(5) | +--------+--------------------------+--------------------+----------------+------------+ | ``s`` | :c:type:`char[]` | bytes | | | +--------+--------------------------+--------------------+----------------+------------+ | ``p`` | :c:type:`char[]` | bytes | | | +--------+--------------------------+--------------------+----------------+------------+ -| ``P`` | :c:type:`void \*` | integer | | \(5) | +| ``P`` | :c:type:`void \*` | integer | | \(6) | +--------+--------------------------+--------------------+----------------+------------+ +.. versionchanged:: 3.3 + Added support for the ``'n'`` and ``'N'`` formats. + Notes: (1) @@ -219,11 +226,17 @@ Notes: Use of the :meth:`__index__` method for non-integers is new in 3.2. (4) + The ``'n'`` and ``'N'`` conversion codes are only available for the native + size (selected as the default or with the ``'@'`` byte order character). + For the standard size, you can use whichever of the other integer formats + fits your application. + +(5) For the ``'f'`` and ``'d'`` conversion codes, the packed representation uses the IEEE 754 binary32 (for ``'f'``) or binary64 (for ``'d'``) format, regardless of the floating-point format used by the platform. -(5) +(6) The ``'P'`` format character is only available for the native byte ordering (selected as the default or with the ``'@'`` byte order character). The byte order character ``'='`` chooses to use little- or big-endian ordering based diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index 2ccaad2..dc75858 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -8,9 +8,19 @@ from test.support import run_unittest ISBIGENDIAN = sys.byteorder == "big" IS32BIT = sys.maxsize == 0x7fffffff -integer_codes = 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q' +integer_codes = 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q', 'n', 'N' byteorders = '', '@', '=', '<', '>', '!' +def iter_integer_formats(byteorders=byteorders): + for code in integer_codes: + for byteorder in byteorders: + if (byteorder in ('', '@') and code in ('q', 'Q') and + not HAVE_LONG_LONG): + continue + if (byteorder not in ('', '@') and code in ('n', 'N')): + continue + yield code, byteorder + # Native 'q' packing isn't available on systems that don't have the C # long long type. try: @@ -141,14 +151,13 @@ class StructTest(unittest.TestCase): } # standard integer sizes - for code in integer_codes: - for byteorder in '=', '<', '>', '!': - format = byteorder+code - size = struct.calcsize(format) - self.assertEqual(size, expected_size[code]) + for code, byteorder in iter_integer_formats(('=', '<', '>', '!')): + format = byteorder+code + size = struct.calcsize(format) + self.assertEqual(size, expected_size[code]) # native integer sizes - native_pairs = 'bB', 'hH', 'iI', 'lL' + native_pairs = 'bB', 'hH', 'iI', 'lL', 'nN' if HAVE_LONG_LONG: native_pairs += 'qQ', for format_pair in native_pairs: @@ -166,9 +175,11 @@ class StructTest(unittest.TestCase): if HAVE_LONG_LONG: self.assertLessEqual(8, struct.calcsize('q')) self.assertLessEqual(struct.calcsize('l'), struct.calcsize('q')) + self.assertGreaterEqual(struct.calcsize('n'), struct.calcsize('i')) + self.assertGreaterEqual(struct.calcsize('n'), struct.calcsize('P')) def test_integers(self): - # Integer tests (bBhHiIlLqQ). + # Integer tests (bBhHiIlLqQnN). import binascii class IntTester(unittest.TestCase): @@ -182,11 +193,11 @@ class StructTest(unittest.TestCase): self.byteorder) self.bytesize = struct.calcsize(format) self.bitsize = self.bytesize * 8 - if self.code in tuple('bhilq'): + if self.code in tuple('bhilqn'): self.signed = True self.min_value = -(2**(self.bitsize-1)) self.max_value = 2**(self.bitsize-1) - 1 - elif self.code in tuple('BHILQ'): + elif self.code in tuple('BHILQN'): self.signed = False self.min_value = 0 self.max_value = 2**self.bitsize - 1 @@ -316,14 +327,23 @@ class StructTest(unittest.TestCase): struct.pack, self.format, obj) - for code in integer_codes: - for byteorder in byteorders: - if (byteorder in ('', '@') and code in ('q', 'Q') and - not HAVE_LONG_LONG): - continue + for code, byteorder in iter_integer_formats(): + format = byteorder+code + t = IntTester(format) + t.run() + + def test_nN_code(self): + # n and N don't exist in standard sizes + def assertStructError(func, *args, **kwargs): + with self.assertRaises(struct.error) as cm: + func(*args, **kwargs) + self.assertIn("bad char in struct format", str(cm.exception)) + for code in 'nN': + for byteorder in ('=', '<', '>', '!'): format = byteorder+code - t = IntTester(format) - t.run() + assertStructError(struct.calcsize, format) + assertStructError(struct.pack, format, 0) + assertStructError(struct.unpack, format, b"") def test_p_code(self): # Test p ("Pascal string") code. @@ -377,14 +397,10 @@ class StructTest(unittest.TestCase): self.assertRaises(OverflowError, struct.pack, ">f", big) def test_1530559(self): - for byteorder in '', '@', '=', '<', '>', '!': - for code in integer_codes: - if (byteorder in ('', '@') and code in ('q', 'Q') and - not HAVE_LONG_LONG): - continue - format = byteorder + code - self.assertRaises(struct.error, struct.pack, format, 1.0) - self.assertRaises(struct.error, struct.pack, format, 1.5) + for code, byteorder in iter_integer_formats(): + format = byteorder + code + self.assertRaises(struct.error, struct.pack, format, 1.0) + self.assertRaises(struct.error, struct.pack, format, 1.5) self.assertRaises(struct.error, struct.pack, 'P', 1.0) self.assertRaises(struct.error, struct.pack, 'P', 1.5) diff --git a/Misc/NEWS b/Misc/NEWS index ef0edc3..9564891 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -294,6 +294,9 @@ Core and Builtins Library ------- +- Issue #3163: The struct module gets new format characters 'n' and 'N' + supporting C integer types ``ssize_t`` and ``size_t``, respectively. + - Issue #13099: Fix sqlite3.Cursor.lastrowid under a Turkish locale. Reported and diagnosed by Thomas Kluyver. diff --git a/Modules/_struct.c b/Modules/_struct.c index 1604b90..dcdc83e 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -58,6 +58,7 @@ typedef struct { char c; long x; } st_long; typedef struct { char c; float x; } st_float; typedef struct { char c; double x; } st_double; typedef struct { char c; void *x; } st_void_p; +typedef struct { char c; size_t x; } st_size_t; #define SHORT_ALIGN (sizeof(st_short) - sizeof(short)) #define INT_ALIGN (sizeof(st_int) - sizeof(int)) @@ -65,6 +66,7 @@ typedef struct { char c; void *x; } st_void_p; #define FLOAT_ALIGN (sizeof(st_float) - sizeof(float)) #define DOUBLE_ALIGN (sizeof(st_double) - sizeof(double)) #define VOID_P_ALIGN (sizeof(st_void_p) - sizeof(void *)) +#define SIZE_T_ALIGN (sizeof(st_size_t) - sizeof(size_t)) /* We can't support q and Q in native mode unless the compiler does; in std mode, they're 8 bytes on all platforms. */ @@ -213,6 +215,52 @@ get_ulonglong(PyObject *v, unsigned PY_LONG_LONG *p) #endif +/* Same, but handling Py_ssize_t */ + +static int +get_ssize_t(PyObject *v, Py_ssize_t *p) +{ + Py_ssize_t x; + + v = get_pylong(v); + if (v == NULL) + return -1; + assert(PyLong_Check(v)); + x = PyLong_AsSsize_t(v); + Py_DECREF(v); + if (x == (Py_ssize_t)-1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) + PyErr_SetString(StructError, + "argument out of range"); + return -1; + } + *p = x; + return 0; +} + +/* Same, but handling size_t */ + +static int +get_size_t(PyObject *v, size_t *p) +{ + size_t x; + + v = get_pylong(v); + if (v == NULL) + return -1; + assert(PyLong_Check(v)); + x = PyLong_AsSize_t(v); + Py_DECREF(v); + if (x == (size_t)-1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) + PyErr_SetString(StructError, + "argument out of range"); + return -1; + } + *p = x; + return 0; +} + #define RANGE_ERROR(x, f, flag, mask) return _range_error(f, flag) @@ -369,6 +417,23 @@ nu_ulong(const char *p, const formatdef *f) return PyLong_FromUnsignedLong(x); } +static PyObject * +nu_ssize_t(const char *p, const formatdef *f) +{ + Py_ssize_t x; + memcpy((char *)&x, p, sizeof x); + return PyLong_FromSsize_t(x); +} + +static PyObject * +nu_size_t(const char *p, const formatdef *f) +{ + size_t x; + memcpy((char *)&x, p, sizeof x); + return PyLong_FromSize_t(x); +} + + /* Native mode doesn't support q or Q unless the platform C supports long long (or, on Windows, __int64). */ @@ -558,6 +623,26 @@ np_ulong(char *p, PyObject *v, const formatdef *f) return 0; } +static int +np_ssize_t(char *p, PyObject *v, const formatdef *f) +{ + Py_ssize_t x; + if (get_ssize_t(v, &x) < 0) + return -1; + memcpy(p, (char *)&x, sizeof x); + return 0; +} + +static int +np_size_t(char *p, PyObject *v, const formatdef *f) +{ + size_t x; + if (get_size_t(v, &x) < 0) + return -1; + memcpy(p, (char *)&x, sizeof x); + return 0; +} + #ifdef HAVE_LONG_LONG static int @@ -651,6 +736,8 @@ static formatdef native_table[] = { {'I', sizeof(int), INT_ALIGN, nu_uint, np_uint}, {'l', sizeof(long), LONG_ALIGN, nu_long, np_long}, {'L', sizeof(long), LONG_ALIGN, nu_ulong, np_ulong}, + {'n', sizeof(size_t), SIZE_T_ALIGN, nu_ssize_t, np_ssize_t}, + {'N', sizeof(size_t), SIZE_T_ALIGN, nu_size_t, np_size_t}, #ifdef HAVE_LONG_LONG {'q', sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_longlong, np_longlong}, {'Q', sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_ulonglong,np_ulonglong}, @@ -1951,7 +2038,8 @@ these can be preceded by a decimal repeat count:\n\ l:long; L:unsigned long; f:float; d:double.\n\ Special cases (preceding decimal count indicates length):\n\ s:string (array of char); p: pascal string (with count byte).\n\ -Special case (only available in native format):\n\ +Special cases (only available in native format):\n\ + n:ssize_t; N:size_t;\n\ P:an integer type that is wide enough to hold a pointer.\n\ Special case (not in native mode unless 'long long' in platform C):\n\ q:long long; Q:unsigned long long\n\ -- cgit v0.12