summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2018-01-11 11:03:20 (GMT)
committerGitHub <noreply@github.com>2018-01-11 11:03:20 (GMT)
commit0a2da50e1867831251fad75377d0f10713eb6301 (patch)
tree4a4f07b2488a4eed222060dc45a05430340a0ad3
parentcb3ae5588bd7733e76dc09277bb7626652d9bb64 (diff)
downloadcpython-0a2da50e1867831251fad75377d0f10713eb6301.zip
cpython-0a2da50e1867831251fad75377d0f10713eb6301.tar.gz
cpython-0a2da50e1867831251fad75377d0f10713eb6301.tar.bz2
bpo-31993: Do not create frames for large bytes and str objects (#5114)
when serialize into memory buffer with C pickle implementations. This optimization already is performed when serialize into memory with Python pickle implementations or into a file with both implementations.
-rw-r--r--Lib/test/pickletester.py17
-rw-r--r--Lib/test/test_pickle.py4
-rw-r--r--Modules/_pickle.c180
3 files changed, 97 insertions, 104 deletions
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 5d983eb..f4e3f81 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -2097,20 +2097,21 @@ class AbstractPickleTests(unittest.TestCase):
N = 1024 * 1024
obj = [b'x' * N, b'y' * N, 'z' * N]
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
- for fast in [True, False]:
+ for fast in [False, True]:
with self.subTest(proto=proto, fast=fast):
- if hasattr(self, 'pickler'):
+ if not fast:
+ # fast=False by default.
+ # This covers in-memory pickling with pickle.dumps().
+ pickled = self.dumps(obj, proto)
+ else:
+ # Pickler is required when fast=True.
+ if not hasattr(self, 'pickler'):
+ continue
buf = io.BytesIO()
pickler = self.pickler(buf, protocol=proto)
pickler.fast = fast
pickler.dump(obj)
pickled = buf.getvalue()
- elif fast:
- continue
- else:
- # Fallback to self.dumps when fast=False and
- # self.pickler is not available.
- pickled = self.dumps(obj, proto)
unpickled = self.loads(pickled)
# More informative error message in case of failure.
self.assertEqual([len(x) for x in obj],
diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py
index 895ed48..0051a01 100644
--- a/Lib/test/test_pickle.py
+++ b/Lib/test/test_pickle.py
@@ -71,8 +71,6 @@ class PyPicklerTests(AbstractPickleTests):
class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests,
BigmemPickleTests):
- pickler = pickle._Pickler
- unpickler = pickle._Unpickler
bad_stack_errors = (pickle.UnpicklingError, IndexError)
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
@@ -84,6 +82,8 @@ class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests,
def loads(self, buf, **kwds):
return pickle.loads(buf, **kwds)
+ test_framed_write_sizes_with_delayed_writer = None
+
class PersistentPicklerUnpicklerMixin(object):
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index 5cb1fba..5b3de4d 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -2142,47 +2142,74 @@ done:
return 0;
}
-/* No-copy code-path to write large contiguous data directly into the
- underlying file object, bypassing the output_buffer of the Pickler. */
-static int
-_Pickler_write_large_bytes(
- PicklerObject *self, const char *header, Py_ssize_t header_size,
- PyObject *payload)
-{
- assert(self->output_buffer != NULL);
- assert(self->write != NULL);
- PyObject *result;
+/* Perform direct write of the header and payload of the binary object.
- /* Commit the previous frame. */
- if (_Pickler_CommitFrame(self)) {
- return -1;
+ The large contiguous data is written directly into the underlying file
+ object, bypassing the output_buffer of the Pickler. We intentionally
+ do not insert a protocol 4 frame opcode to make it possible to optimize
+ file.read calls in the loader.
+ */
+static int
+_Pickler_write_bytes(PicklerObject *self,
+ const char *header, Py_ssize_t header_size,
+ const char *data, Py_ssize_t data_size,
+ PyObject *payload)
+{
+ int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
+ int framing = self->framing;
+
+ if (bypass_buffer) {
+ assert(self->output_buffer != NULL);
+ /* Commit the previous frame. */
+ if (_Pickler_CommitFrame(self)) {
+ return -1;
+ }
+ /* Disable framing temporarily */
+ self->framing = 0;
}
- /* Disable frameing temporarily */
- self->framing = 0;
if (_Pickler_Write(self, header, header_size) < 0) {
return -1;
}
- /* Dump the output buffer to the file. */
- if (_Pickler_FlushToFile(self) < 0) {
- return -1;
- }
- /* Stream write the payload into the file without going through the
- output buffer. */
- result = PyObject_CallFunctionObjArgs(self->write, payload, NULL);
- if (result == NULL) {
- return -1;
- }
- Py_DECREF(result);
+ if (bypass_buffer && self->write != NULL) {
+ /* Bypass the in-memory buffer to directly stream large data
+ into the underlying file object. */
+ PyObject *result, *mem = NULL;
+ /* Dump the output buffer to the file. */
+ if (_Pickler_FlushToFile(self) < 0) {
+ return -1;
+ }
- /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
- if (_Pickler_ClearBuffer(self) < 0) {
- return -1;
+ /* Stream write the payload into the file without going through the
+ output buffer. */
+ if (payload == NULL) {
+ payload = mem = PyMemoryView_FromMemory((char *) data, data_size,
+ PyBUF_READ);
+ if (payload == NULL) {
+ return -1;
+ }
+ }
+ result = PyObject_CallFunctionObjArgs(self->write, payload, NULL);
+ Py_XDECREF(mem);
+ if (result == NULL) {
+ return -1;
+ }
+ Py_DECREF(result);
+
+ /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
+ if (_Pickler_ClearBuffer(self) < 0) {
+ return -1;
+ }
+ }
+ else {
+ if (_Pickler_Write(self, data, data_size) < 0) {
+ return -1;
+ }
}
/* Re-enable framing for subsequent calls to _Pickler_Write. */
- self->framing = 1;
+ self->framing = framing;
return 0;
}
@@ -2265,20 +2292,10 @@ save_bytes(PicklerObject *self, PyObject *obj)
return -1; /* string too large */
}
- if (size < FRAME_SIZE_TARGET || self->write == NULL) {
- if (_Pickler_Write(self, header, len) < 0) {
- return -1;
- }
- if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0) {
- return -1;
- }
- }
- else {
- /* Bypass the in-memory buffer to directly stream large data
- into the underlying file object. */
- if (_Pickler_write_large_bytes(self, header, len, obj) < 0) {
- return -1;
- }
+ if (_Pickler_write_bytes(self, header, len,
+ PyBytes_AS_STRING(obj), size, obj) < 0)
+ {
+ return -1;
}
if (memo_put(self, obj) < 0)
@@ -2360,11 +2377,29 @@ error:
}
static int
-write_utf8(PicklerObject *self, const char *data, Py_ssize_t size)
+write_unicode_binary(PicklerObject *self, PyObject *obj)
{
char header[9];
Py_ssize_t len;
- PyObject *mem;
+ PyObject *encoded = NULL;
+ Py_ssize_t size;
+ const char *data;
+
+ if (PyUnicode_READY(obj))
+ return -1;
+
+ data = PyUnicode_AsUTF8AndSize(obj, &size);
+ if (data == NULL) {
+ /* Issue #8383: for strings with lone surrogates, fallback on the
+ "surrogatepass" error handler. */
+ PyErr_Clear();
+ encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
+ if (encoded == NULL)
+ return -1;
+
+ data = PyBytes_AS_STRING(encoded);
+ size = PyBytes_GET_SIZE(encoded);
+ }
assert(size >= 0);
if (size <= 0xff && self->proto >= 4) {
@@ -2388,62 +2423,19 @@ write_utf8(PicklerObject *self, const char *data, Py_ssize_t size)
else {
PyErr_SetString(PyExc_OverflowError,
"cannot serialize a string larger than 4GiB");
+ Py_XDECREF(encoded);
return -1;
}
- if (size < FRAME_SIZE_TARGET || self->write == NULL) {
- if (_Pickler_Write(self, header, len) < 0) {
- return -1;
- }
- if (_Pickler_Write(self, data, size) < 0) {
- return -1;
- }
- }
- else {
- /* Bypass the in-memory buffer to directly stream large data
- into the underlying file object. */
- mem = PyMemoryView_FromMemory((char *) data, size, PyBUF_READ);
- if (mem == NULL) {
- return -1;
- }
- if (_Pickler_write_large_bytes(self, header, len, mem) < 0) {
- Py_DECREF(mem);
- return -1;
- }
- Py_DECREF(mem);
+ if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
+ Py_XDECREF(encoded);
+ return -1;
}
+ Py_XDECREF(encoded);
return 0;
}
static int
-write_unicode_binary(PicklerObject *self, PyObject *obj)
-{
- PyObject *encoded = NULL;
- Py_ssize_t size;
- const char *data;
- int r;
-
- if (PyUnicode_READY(obj))
- return -1;
-
- data = PyUnicode_AsUTF8AndSize(obj, &size);
- if (data != NULL)
- return write_utf8(self, data, size);
-
- /* Issue #8383: for strings with lone surrogates, fallback on the
- "surrogatepass" error handler. */
- PyErr_Clear();
- encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
- if (encoded == NULL)
- return -1;
-
- r = write_utf8(self, PyBytes_AS_STRING(encoded),
- PyBytes_GET_SIZE(encoded));
- Py_DECREF(encoded);
- return r;
-}
-
-static int
save_unicode(PicklerObject *self, PyObject *obj)
{
if (self->bin) {