Optimize pickle.load() and pickle.loads()

Issue #27056: Optimize pickle.load() and pickle.loads(), up to 10% faster to deserialize a lot of small objects.
author: Victor Stinner <victor.stinner@gmail.com> 2016-05-20 09:42:37 (GMT)
committer: Victor Stinner <victor.stinner@gmail.com> 2016-05-20 09:42:37 (GMT)
commit: 19ed27ec2b6d8871249e0dd8f56d40a0a78094f8 (patch)
tree: fd8d3c94bdff9438b9b330da6f7ea4b63ebe2378 /Modules/_pickle.c
parent: 744c34e2ea91ba8f9e945bbeba121c7e95063056 (diff)
download: cpython-19ed27ec2b6d8871249e0dd8f56d40a0a78094f8.zip
cpython-19ed27ec2b6d8871249e0dd8f56d40a0a78094f8.tar.gz
cpython-19ed27ec2b6d8871249e0dd8f56d40a0a78094f8.tar.bz2
1 files changed, 26 insertions, 19 deletions
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index fdd60e0..e3aa7c5 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -1197,21 +1197,9 @@ _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
     return read_size;
 }
 
-/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
-
-   This should be used for all data reads, rather than accessing the unpickler's
-   input buffer directly. This method deals correctly with reading from input
-   streams, which the input buffer doesn't deal with.
-
-   Note that when reading from a file-like object, self->next_read_idx won't
-   be updated (it should remain at 0 for the entire unpickling process). You
-   should use this function's return value to know how many bytes you can
-   consume.
-
-   Returns -1 (with an exception set) on failure. On success, return the
-   number of chars read. */
+/* Don't call it directly: use _Unpickler_Read() */
 static Py_ssize_t
-_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
+_Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
 {
     Py_ssize_t num_read;
 
@@ -1222,11 +1210,10 @@ _Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
                         "read would overflow (invalid bytecode)");
         return -1;
     }
-    if (self->next_read_idx + n <= self->input_len) {
-        *s = self->input_buffer + self->next_read_idx;
-        self->next_read_idx += n;
-        return n;
-    }
+
+    /* This case is handled by the _Unpickler_Read() macro for efficiency */
+    assert(self->next_read_idx + n > self->input_len);
+
     if (!self->read) {
         PyErr_Format(PyExc_EOFError, "Ran out of input");
         return -1;
@@ -1243,6 +1230,26 @@ _Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
     return n;
 }
 
+/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
+
+   This should be used for all data reads, rather than accessing the unpickler's
+   input buffer directly. This method deals correctly with reading from input
+   streams, which the input buffer doesn't deal with.
+
+   Note that when reading from a file-like object, self->next_read_idx won't
+   be updated (it should remain at 0 for the entire unpickling process). You
+   should use this function's return value to know how many bytes you can
+   consume.
+
+   Returns -1 (with an exception set) on failure. On success, return the
+   number of chars read. */
+#define _Unpickler_Read(self, s, n) \
+    (((self)->next_read_idx + (n) <= (self)->input_len)      \
+     ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
+        (self)->next_read_idx += (n),                        \
+        (n))                                                 \
+     : _Unpickler_ReadImpl(self, (s), (n)))
+
 static Py_ssize_t
 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
                     char **result)
author	Victor Stinner <victor.stinner@gmail.com>	2016-05-20 09:42:37 (GMT)
committer	Victor Stinner <victor.stinner@gmail.com>	2016-05-20 09:42:37 (GMT)
commit	19ed27ec2b6d8871249e0dd8f56d40a0a78094f8 (patch)
tree	fd8d3c94bdff9438b9b330da6f7ea4b63ebe2378 /Modules/_pickle.c
parent	744c34e2ea91ba8f9e945bbeba121c7e95063056 (diff)
download	cpython-19ed27ec2b6d8871249e0dd8f56d40a0a78094f8.zip cpython-19ed27ec2b6d8871249e0dd8f56d40a0a78094f8.tar.gz cpython-19ed27ec2b6d8871249e0dd8f56d40a0a78094f8.tar.bz2