summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/struct.rst20
-rw-r--r--Lib/struct.py1
-rw-r--r--Lib/test/test_struct.py74
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/_struct.c165
5 files changed, 262 insertions, 1 deletions
diff --git a/Doc/library/struct.rst b/Doc/library/struct.rst
index 994506c..f2ea361 100644
--- a/Doc/library/struct.rst
+++ b/Doc/library/struct.rst
@@ -66,6 +66,19 @@ The module defines the following exception and functions:
format (``len(buffer[offset:])`` must be at least ``calcsize(fmt)``).
+.. function:: iter_unpack(fmt, buffer)
+
+ Iteratively unpack from the buffer *buffer* according to the format
+ string *fmt*. This function returns an iterator which will read
+ equally-sized chunks from the buffer until all its contents have been
+ consumed. The buffer's size in bytes must be a multiple of the amount
+ of data required by the format, as reflected by :func:`calcsize`.
+
+ Each iteration yields a tuple as specified by the format string.
+
+ .. versionadded:: 3.4
+
+
.. function:: calcsize(fmt)
Return the size of the struct (and hence of the bytes object produced by
@@ -388,6 +401,13 @@ The :mod:`struct` module also defines the following type:
(``len(buffer[offset:])`` must be at least :attr:`self.size`).
+ .. method:: iter_unpack(buffer)
+
+ Identical to the :func:`iter_unpack` function, using the compiled format.
+ (``len(buffer)`` must be a multiple of :attr:`self.size`).
+
+ .. versionadded:: 3.4
+
.. attribute:: format
The format string used to construct this Struct object.
diff --git a/Lib/struct.py b/Lib/struct.py
index 9bfc23f..d6bba58 100644
--- a/Lib/struct.py
+++ b/Lib/struct.py
@@ -1,6 +1,7 @@
__all__ = [
# Functions
'calcsize', 'pack', 'pack_into', 'unpack', 'unpack_from',
+ 'iter_unpack',
# Classes
'Struct',
diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py
index eb97a2c..8ffa7e6 100644
--- a/Lib/test/test_struct.py
+++ b/Lib/test/test_struct.py
@@ -1,4 +1,6 @@
+from collections import abc
import array
+import operator
import unittest
import struct
import sys
@@ -593,8 +595,78 @@ class StructTest(unittest.TestCase):
self.check_sizeof('0s', 1)
self.check_sizeof('0c', 0)
+
+class UnpackIteratorTest(unittest.TestCase):
+ """
+ Tests for iterative unpacking (struct.Struct.iter_unpack).
+ """
+
+ def test_construct(self):
+ def _check_iterator(it):
+ self.assertIsInstance(it, abc.Iterator)
+ self.assertIsInstance(it, abc.Iterable)
+ s = struct.Struct('>ibcp')
+ it = s.iter_unpack(b"")
+ _check_iterator(it)
+ it = s.iter_unpack(b"1234567")
+ _check_iterator(it)
+ # Wrong bytes length
+ with self.assertRaises(struct.error):
+ s.iter_unpack(b"123456")
+ with self.assertRaises(struct.error):
+ s.iter_unpack(b"12345678")
+ # Zero-length struct
+ s = struct.Struct('>')
+ with self.assertRaises(struct.error):
+ s.iter_unpack(b"")
+ with self.assertRaises(struct.error):
+ s.iter_unpack(b"12")
+
+ def test_iterate(self):
+ s = struct.Struct('>IB')
+ b = bytes(range(1, 16))
+ it = s.iter_unpack(b)
+ self.assertEqual(next(it), (0x01020304, 5))
+ self.assertEqual(next(it), (0x06070809, 10))
+ self.assertEqual(next(it), (0x0b0c0d0e, 15))
+ self.assertRaises(StopIteration, next, it)
+ self.assertRaises(StopIteration, next, it)
+
+ def test_arbitrary_buffer(self):
+ s = struct.Struct('>IB')
+ b = bytes(range(1, 11))
+ it = s.iter_unpack(memoryview(b))
+ self.assertEqual(next(it), (0x01020304, 5))
+ self.assertEqual(next(it), (0x06070809, 10))
+ self.assertRaises(StopIteration, next, it)
+ self.assertRaises(StopIteration, next, it)
+
+ def test_length_hint(self):
+ lh = operator.length_hint
+ s = struct.Struct('>IB')
+ b = bytes(range(1, 16))
+ it = s.iter_unpack(b)
+ self.assertEqual(lh(it), 3)
+ next(it)
+ self.assertEqual(lh(it), 2)
+ next(it)
+ self.assertEqual(lh(it), 1)
+ next(it)
+ self.assertEqual(lh(it), 0)
+ self.assertRaises(StopIteration, next, it)
+ self.assertEqual(lh(it), 0)
+
+ def test_module_func(self):
+ # Sanity check for the global struct.iter_unpack()
+ it = struct.iter_unpack('>IB', bytes(range(1, 11)))
+ self.assertEqual(next(it), (0x01020304, 5))
+ self.assertEqual(next(it), (0x06070809, 10))
+ self.assertRaises(StopIteration, next, it)
+ self.assertRaises(StopIteration, next, it)
+
+
def test_main():
- support.run_unittest(StructTest)
+ support.run_unittest(__name__)
if __name__ == '__main__':
test_main()
diff --git a/Misc/NEWS b/Misc/NEWS
index 273f11c..5c1016d 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -49,6 +49,9 @@ Core and Builtins
Library
-------
+- Issue #17804: New function ``struct.iter_unpack`` allows for streaming
+ struct unpacking.
+
- Issue #17830: When keyword.py is used to update a keyword file, it now
preserves the line endings of the original file.
diff --git a/Modules/_struct.c b/Modules/_struct.c
index 208559c..2dec4ed 100644
--- a/Modules/_struct.c
+++ b/Modules/_struct.c
@@ -1247,6 +1247,9 @@ align(Py_ssize_t size, char c, const formatdef *e)
return size;
}
+/*
+ * Struct object implementation.
+ */
/* calculate the size of a format string */
@@ -1556,6 +1559,142 @@ s_unpack_from(PyObject *self, PyObject *args, PyObject *kwds)
}
+/* Unpack iterator type */
+
+typedef struct {
+ PyObject_HEAD
+ PyStructObject *so;
+ Py_buffer buf;
+ Py_ssize_t index;
+} unpackiterobject;
+
+static void
+unpackiter_dealloc(unpackiterobject *self)
+{
+ Py_XDECREF(self->so);
+ PyBuffer_Release(&self->buf);
+ PyObject_GC_Del(self);
+}
+
+static int
+unpackiter_traverse(unpackiterobject *self, visitproc visit, void *arg)
+{
+ Py_VISIT(self->so);
+ Py_VISIT(self->buf.obj);
+ return 0;
+}
+
+static PyObject *
+unpackiter_len(unpackiterobject *self)
+{
+ Py_ssize_t len;
+ if (self->so == NULL)
+ len = 0;
+ else
+ len = (self->buf.len - self->index) / self->so->s_size;
+ return PyLong_FromSsize_t(len);
+}
+
+static PyMethodDef unpackiter_methods[] = {
+ {"__length_hint__", (PyCFunction) unpackiter_len, METH_NOARGS, NULL},
+ {NULL, NULL} /* sentinel */
+};
+
+static PyObject *
+unpackiter_iternext(unpackiterobject *self)
+{
+ PyObject *result;
+ if (self->so == NULL)
+ return NULL;
+ if (self->index >= self->buf.len) {
+ /* Iterator exhausted */
+ Py_CLEAR(self->so);
+ PyBuffer_Release(&self->buf);
+ return NULL;
+ }
+ assert(self->index + self->so->s_size <= self->buf.len);
+ result = s_unpack_internal(self->so,
+ (char*) self->buf.buf + self->index);
+ self->index += self->so->s_size;
+ return result;
+}
+
+PyTypeObject unpackiter_type = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ "unpack_iterator", /* tp_name */
+ sizeof(unpackiterobject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)unpackiter_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_reserved */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
+ 0, /* tp_doc */
+ (traverseproc)unpackiter_traverse, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)unpackiter_iternext, /* tp_iternext */
+ unpackiter_methods /* tp_methods */
+};
+
+PyDoc_STRVAR(s_iter_unpack__doc__,
+"S.iter_unpack(buffer) -> iterator(v1, v2, ...)\n\
+\n\
+Return an iterator yielding tuples unpacked from the given bytes\n\
+source, like a repeated invocation of unpack_from(). Requires\n\
+that the bytes length be a multiple of the struct size.");
+
+static PyObject *
+s_iter_unpack(PyObject *_so, PyObject *input)
+{
+ PyStructObject *so = (PyStructObject *) _so;
+ unpackiterobject *self;
+
+ assert(PyStruct_Check(_so));
+ assert(so->s_codes != NULL);
+
+ if (so->s_size == 0) {
+ PyErr_Format(StructError,
+ "cannot iteratively unpack with a struct of length 0");
+ return NULL;
+ }
+
+ self = (unpackiterobject *) PyType_GenericAlloc(&unpackiter_type, 0);
+ if (self == NULL)
+ return NULL;
+
+ if (PyObject_GetBuffer(input, &self->buf, PyBUF_SIMPLE) < 0) {
+ Py_DECREF(self);
+ return NULL;
+ }
+ if (self->buf.len % so->s_size != 0) {
+ PyErr_Format(StructError,
+ "iterative unpacking requires a bytes length "
+ "multiple of %zd",
+ so->s_size);
+ Py_DECREF(self);
+ return NULL;
+ }
+ Py_INCREF(so);
+ self->so = so;
+ self->index = 0;
+ return (PyObject *) self;
+}
+
+
/*
* Guts of the pack function.
*
@@ -1776,6 +1915,7 @@ s_sizeof(PyStructObject *self, void *unused)
/* List of functions */
static struct PyMethodDef s_methods[] = {
+ {"iter_unpack", s_iter_unpack, METH_O, s_iter_unpack__doc__},
{"pack", s_pack, METH_VARARGS, s_pack__doc__},
{"pack_into", s_pack_into, METH_VARARGS, s_pack_into__doc__},
{"unpack", s_unpack, METH_O, s_unpack__doc__},
@@ -2025,9 +2165,34 @@ unpack_from(PyObject *self, PyObject *args, PyObject *kwds)
return result;
}
+PyDoc_STRVAR(iter_unpack_doc,
+"iter_unpack(fmt, buffer) -> iterator(v1, v2, ...)\n\
+\n\
+Return an iterator yielding tuples unpacked from the given bytes\n\
+source according to the format string, like a repeated invocation of\n\
+unpack_from(). Requires that the bytes length be a multiple of the\n\
+format struct size.");
+
+static PyObject *
+iter_unpack(PyObject *self, PyObject *args)
+{
+ PyObject *s_object, *fmt, *input, *result;
+
+ if (!PyArg_ParseTuple(args, "OO:iter_unpack", &fmt, &input))
+ return NULL;
+
+ s_object = cache_struct(fmt);
+ if (s_object == NULL)
+ return NULL;
+ result = s_iter_unpack(s_object, input);
+ Py_DECREF(s_object);
+ return result;
+}
+
static struct PyMethodDef module_functions[] = {
{"_clearcache", (PyCFunction)clearcache, METH_NOARGS, clearcache_doc},
{"calcsize", calcsize, METH_O, calcsize_doc},
+ {"iter_unpack", iter_unpack, METH_VARARGS, iter_unpack_doc},
{"pack", pack, METH_VARARGS, pack_doc},
{"pack_into", pack_into, METH_VARARGS, pack_into_doc},
{"unpack", unpack, METH_VARARGS, unpack_doc},