diff options
-rw-r--r-- | Doc/ACKS.txt | 1 | ||||
-rw-r--r-- | Doc/library/struct.rst | 69 | ||||
-rw-r--r-- | Modules/_struct.c | 20 |
3 files changed, 48 insertions, 42 deletions
diff --git a/Doc/ACKS.txt b/Doc/ACKS.txt index 1130762..1bec09e 100644 --- a/Doc/ACKS.txt +++ b/Doc/ACKS.txt @@ -62,6 +62,7 @@ docs@python.org), and we'll be glad to correct the problem. * Ben Gertzfield * Nadim Ghaznavi * Jonathan Giddy + * Matt Giuca * Shelley Gooch * Nathaniel Gray * Grant Griffin diff --git a/Doc/library/struct.rst b/Doc/library/struct.rst index 282483d..7802fdb 100644 --- a/Doc/library/struct.rst +++ b/Doc/library/struct.rst @@ -1,19 +1,19 @@ -:mod:`struct` --- Interpret strings as packed binary data +:mod:`struct` --- Interpret bytes as packed binary data ========================================================= .. module:: struct - :synopsis: Interpret strings as packed binary data. + :synopsis: Interpret bytes as packed binary data. .. index:: pair: C; structures triple: packing; binary; data This module performs conversions between Python values and C structs represented -as Python strings. It uses :dfn:`format strings` (explained below) as compact -descriptions of the lay-out of the C structs and the intended conversion to/from -Python values. This can be used in handling binary data stored in files or from -network connections, among other sources. +as Python :class:`bytes` objects. It uses :dfn:`format strings` (explained +below) as compact descriptions of the lay-out of the C structs and the +intended conversion to/from Python values. This can be used in handling +binary data stored in files or from network connections, among other sources. The module defines the following exception and functions: @@ -26,7 +26,7 @@ The module defines the following exception and functions: .. function:: pack(fmt, v1, v2, ...) - Return a string containing the values ``v1, v2, ...`` packed according to the + Return a bytes containing the values ``v1, v2, ...`` packed according to the given format. The arguments must match the values required by the format exactly. @@ -38,12 +38,12 @@ The module defines the following exception and functions: a required argument. -.. function:: unpack(fmt, string) +.. function:: unpack(fmt, bytes) - Unpack the string (presumably packed by ``pack(fmt, ...)``) according to the + Unpack the bytes (presumably packed by ``pack(fmt, ...)``) according to the given format. The result is a tuple even if it contains exactly one item. The - string must contain exactly the amount of data required by the format - (``len(string)`` must equal ``calcsize(fmt)``). + bytes must contain exactly the amount of data required by the format + (``len(bytes)`` must equal ``calcsize(fmt)``). .. function:: unpack_from(fmt, buffer[,offset=0]) @@ -56,7 +56,7 @@ The module defines the following exception and functions: .. function:: calcsize(fmt) - Return the size of the struct (and hence of the string) corresponding to the + Return the size of the struct (and hence of the bytes) corresponding to the given format. Format characters have the following meaning; the conversion between C and @@ -67,13 +67,13 @@ Python values should be obvious given their types: +========+=========================+====================+=======+ | ``x`` | pad byte | no value | | +--------+-------------------------+--------------------+-------+ -| ``c`` | :ctype:`char` | string of length 1 | | +| ``c`` | :ctype:`char` | bytes of length 1 | | +--------+-------------------------+--------------------+-------+ -| ``b`` | :ctype:`signed char` | integer | | +| ``b`` | :ctype:`signed char` | integer | \(1) | +--------+-------------------------+--------------------+-------+ | ``B`` | :ctype:`unsigned char` | integer | | +--------+-------------------------+--------------------+-------+ -| ``?`` | :ctype:`_Bool` | bool | \(1) | +| ``?`` | :ctype:`_Bool` | bool | \(2) | +--------+-------------------------+--------------------+-------+ | ``h`` | :ctype:`short` | integer | | +--------+-------------------------+--------------------+-------+ @@ -87,18 +87,18 @@ Python values should be obvious given their types: +--------+-------------------------+--------------------+-------+ | ``L`` | :ctype:`unsigned long` | integer | | +--------+-------------------------+--------------------+-------+ -| ``q`` | :ctype:`long long` | integer | \(2) | +| ``q`` | :ctype:`long long` | integer | \(3) | +--------+-------------------------+--------------------+-------+ -| ``Q`` | :ctype:`unsigned long | integer | \(2) | +| ``Q`` | :ctype:`unsigned long | integer | \(3) | | | long` | | | +--------+-------------------------+--------------------+-------+ | ``f`` | :ctype:`float` | float | | +--------+-------------------------+--------------------+-------+ | ``d`` | :ctype:`double` | float | | +--------+-------------------------+--------------------+-------+ -| ``s`` | :ctype:`char[]` | string | | +| ``s`` | :ctype:`char[]` | bytes | \(1) | +--------+-------------------------+--------------------+-------+ -| ``p`` | :ctype:`char[]` | string | | +| ``p`` | :ctype:`char[]` | bytes | \(1) | +--------+-------------------------+--------------------+-------+ | ``P`` | :ctype:`void \*` | integer | | +--------+-------------------------+--------------------+-------+ @@ -106,11 +106,16 @@ Python values should be obvious given their types: Notes: (1) + The ``c``, ``s`` and ``p`` conversion codes operate on :class:`bytes` + objects, but packing with such codes also supports :class:`str` objects, + which are encoded using UTF-8. + +(2) The ``'?'`` conversion code corresponds to the :ctype:`_Bool` type defined by C99. If this type is not available, it is simulated using a :ctype:`char`. In standard mode, it is always represented by one byte. -(2) +(3) The ``'q'`` and ``'Q'`` conversion codes are available in native mode only if the platform C compiler supports C :ctype:`long long`, or, on Windows, :ctype:`__int64`. They are always available in standard modes. @@ -121,11 +126,11 @@ the format string ``'4h'`` means exactly the same as ``'hhhh'``. Whitespace characters between formats are ignored; a count and its format must not contain whitespace though. -For the ``'s'`` format character, the count is interpreted as the size of the -string, not a repeat count like for the other format characters; for example, +For the ``'s'`` format character, the count is interpreted as the length of the +bytes, not a repeat count like for the other format characters; for example, ``'10s'`` means a single 10-byte string, while ``'10c'`` means 10 characters. For packing, the string is truncated or padded with null bytes as appropriate to -make it fit. For unpacking, the resulting string always has exactly the +make it fit. For unpacking, the resulting bytes object always has exactly the specified number of bytes. As a special case, ``'0s'`` means a single, empty string (while ``'0c'`` means 0 characters). @@ -137,7 +142,7 @@ passed in to :func:`pack` is too long (longer than the count minus 1), only the leading count-1 bytes of the string are stored. If the string is shorter than count-1, it is padded with null bytes so that exactly count bytes in all are used. Note that for :func:`unpack`, the ``'p'`` format character consumes count -bytes, but that the string returned can never contain more than 255 characters. +bytes, but that the string returned can never contain more than 255 bytes. @@ -203,8 +208,8 @@ machine):: >>> from struct import * >>> pack('hhl', 1, 2, 3) - '\x00\x01\x00\x02\x00\x00\x00\x03' - >>> unpack('hhl', '\x00\x01\x00\x02\x00\x00\x00\x03') + b'\x00\x01\x00\x02\x00\x00\x00\x03' + >>> unpack('hhl', b'\x00\x01\x00\x02\x00\x00\x00\x03') (1, 2, 3) >>> calcsize('hhl') 8 @@ -219,13 +224,13 @@ enforce any alignment. Unpacked fields can be named by assigning them to variables or by wrapping the result in a named tuple:: - >>> record = 'raymond \x32\x12\x08\x01\x08' + >>> record = b'raymond \x32\x12\x08\x01\x08' >>> name, serialnum, school, gradelevel = unpack('<10sHHb', record) >>> from collections import namedtuple >>> Student = namedtuple('Student', 'name serialnum school gradelevel') - >>> Student._make(unpack('<10sHHb', s)) - Student(name='raymond ', serialnum=4658, school=264, gradelevel=8) + >>> Student._make(unpack('<10sHHb', record)) + Student(name=b'raymond ', serialnum=4658, school=264, gradelevel=8) .. seealso:: @@ -265,10 +270,10 @@ The :mod:`struct` module also defines the following type: Identical to the :func:`pack_into` function, using the compiled format. - .. method:: unpack(string) + .. method:: unpack(bytes) Identical to the :func:`unpack` function, using the compiled format. - (``len(string)`` must equal :attr:`self.size`). + (``len(bytes)`` must equal :attr:`self.size`). .. method:: unpack_from(buffer[, offset=0]) @@ -283,6 +288,6 @@ The :mod:`struct` module also defines the following type: .. attribute:: size - The calculated size of the struct (and hence of the string) corresponding + The calculated size of the struct (and hence of the bytes) corresponding to :attr:`format`. diff --git a/Modules/_struct.c b/Modules/_struct.c index 6be4557..a1e56be 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -1,4 +1,4 @@ -/* struct module -- pack values into and (out of) strings */ +/* struct module -- pack values into and (out of) bytes objects */ /* New version supporting byte order, alignment and size options, character strings, and unsigned numbers */ @@ -610,7 +610,7 @@ np_char(char *p, PyObject *v, const formatdef *f) } if (!PyBytes_Check(v) || PyBytes_Size(v) != 1) { PyErr_SetString(StructError, - "char format requires string of length 1"); + "char format requires bytes or string of length 1"); return -1; } *p = *PyBytes_AsString(v); @@ -1654,7 +1654,7 @@ s_pack_internal(PyStructObject *soself, PyObject *args, int offset, char* buf) isstring = PyBytes_Check(v); if (!isstring && !PyByteArray_Check(v)) { PyErr_SetString(StructError, - "argument for 's' must be a string"); + "argument for 's' must be a bytes or string"); return -1; } if (isstring) { @@ -1680,7 +1680,7 @@ s_pack_internal(PyStructObject *soself, PyObject *args, int offset, char* buf) isstring = PyBytes_Check(v); if (!isstring && !PyByteArray_Check(v)) { PyErr_SetString(StructError, - "argument for 'p' must be a string"); + "argument for 'p' must be a bytes or string"); return -1; } if (isstring) { @@ -1714,9 +1714,9 @@ s_pack_internal(PyStructObject *soself, PyObject *args, int offset, char* buf) PyDoc_STRVAR(s_pack__doc__, -"S.pack(v1, v2, ...) -> string\n\ +"S.pack(v1, v2, ...) -> bytes\n\ \n\ -Return a string containing values v1, v2, ... packed according to this\n\ +Return a bytes containing values v1, v2, ... packed according to this\n\ Struct's format. See struct.__doc__ for more on format strings."); static PyObject * @@ -1944,7 +1944,7 @@ calcsize(PyObject *self, PyObject *fmt) } PyDoc_STRVAR(pack_doc, -"Return string containing values v1, v2, ... packed according to fmt."); +"Return bytes containing values v1, v2, ... packed according to fmt."); static PyObject * pack(PyObject *self, PyObject *args) @@ -2003,8 +2003,8 @@ pack_into(PyObject *self, PyObject *args) } PyDoc_STRVAR(unpack_doc, -"Unpack the string containing packed C structure data, according to fmt.\n\ -Requires len(string) == calcsize(fmt)."); +"Unpack the bytes containing packed C structure data, according to fmt.\n\ +Requires len(bytes) == calcsize(fmt)."); static PyObject * unpack(PyObject *self, PyObject *args) @@ -2068,7 +2068,7 @@ static struct PyMethodDef module_functions[] = { PyDoc_STRVAR(module_doc, "Functions to convert between Python values and C structs.\n\ -Python strings are used to hold the data representing the C struct\n\ +Python bytes objects are used to hold the data representing the C struct\n\ and also as format strings to describe the layout of data in the C struct.\n\ \n\ The optional first format char indicates byte order, size and alignment:\n\ |