summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2016-05-20 09:42:37 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2016-05-20 09:42:37 (GMT)
commit19ed27ec2b6d8871249e0dd8f56d40a0a78094f8 (patch)
treefd8d3c94bdff9438b9b330da6f7ea4b63ebe2378
parent744c34e2ea91ba8f9e945bbeba121c7e95063056 (diff)
downloadcpython-19ed27ec2b6d8871249e0dd8f56d40a0a78094f8.zip
cpython-19ed27ec2b6d8871249e0dd8f56d40a0a78094f8.tar.gz
cpython-19ed27ec2b6d8871249e0dd8f56d40a0a78094f8.tar.bz2
Optimize pickle.load() and pickle.loads()
Issue #27056: Optimize pickle.load() and pickle.loads(), up to 10% faster to deserialize a lot of small objects.
-rw-r--r--Doc/whatsnew/3.6.rst3
-rw-r--r--Misc/NEWS5
-rw-r--r--Modules/_pickle.c45
3 files changed, 33 insertions, 20 deletions
diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst
index 52be134..67fd50f 100644
--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -467,6 +467,9 @@ Optimizations
with a short lifetime, and use :c:func:`malloc` for larger memory blocks.
(Contributed by Victor Stinner in :issue:`26249`).
+* :func:`pickle.load` and :func:`pickle.loads` are now up to 10% faster when
+ deserializing many small objects (Contributed by Victor Stinner in
+ :issue:`27056`).
Build and C API Changes
=======================
diff --git a/Misc/NEWS b/Misc/NEWS
index 94e508f..ba66c4e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -16,6 +16,9 @@ Core and Builtins
Library
-------
+- Issue #27056: Optimize pickle.load() and pickle.loads(), up to 10% faster
+ to deserialize a lot of small objects.
+
What's New in Python 3.6.0 alpha 1?
===================================
@@ -341,7 +344,7 @@ Library
- Issue #26977: Removed unnecessary, and ignored, call to sum of squares helper
in statistics.pvariance.
-- Issue #26002: Use bisect in statistics.median instead of a linear search.
+- Issue #26002: Use bisect in statistics.median instead of a linear search.
Patch by Upendra Kuma.
- Issue #25974: Make use of new Decimal.as_integer_ratio() method in statistics
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index fdd60e0..e3aa7c5 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -1197,21 +1197,9 @@ _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
return read_size;
}
-/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
-
- This should be used for all data reads, rather than accessing the unpickler's
- input buffer directly. This method deals correctly with reading from input
- streams, which the input buffer doesn't deal with.
-
- Note that when reading from a file-like object, self->next_read_idx won't
- be updated (it should remain at 0 for the entire unpickling process). You
- should use this function's return value to know how many bytes you can
- consume.
-
- Returns -1 (with an exception set) on failure. On success, return the
- number of chars read. */
+/* Don't call it directly: use _Unpickler_Read() */
static Py_ssize_t
-_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
+_Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
{
Py_ssize_t num_read;
@@ -1222,11 +1210,10 @@ _Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
"read would overflow (invalid bytecode)");
return -1;
}
- if (self->next_read_idx + n <= self->input_len) {
- *s = self->input_buffer + self->next_read_idx;
- self->next_read_idx += n;
- return n;
- }
+
+ /* This case is handled by the _Unpickler_Read() macro for efficiency */
+ assert(self->next_read_idx + n > self->input_len);
+
if (!self->read) {
PyErr_Format(PyExc_EOFError, "Ran out of input");
return -1;
@@ -1243,6 +1230,26 @@ _Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
return n;
}
+/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
+
+ This should be used for all data reads, rather than accessing the unpickler's
+ input buffer directly. This method deals correctly with reading from input
+ streams, which the input buffer doesn't deal with.
+
+ Note that when reading from a file-like object, self->next_read_idx won't
+ be updated (it should remain at 0 for the entire unpickling process). You
+ should use this function's return value to know how many bytes you can
+ consume.
+
+ Returns -1 (with an exception set) on failure. On success, return the
+ number of chars read. */
+#define _Unpickler_Read(self, s, n) \
+ (((self)->next_read_idx + (n) <= (self)->input_len) \
+ ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
+ (self)->next_read_idx += (n), \
+ (n)) \
+ : _Unpickler_ReadImpl(self, (s), (n)))
+
static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
char **result)