summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2010-09-09 18:33:21 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2010-09-09 18:33:21 (GMT)
commitea99c5c94985c21d8a64c9a3d753bde7f801c14a (patch)
tree193ab657d8c66abf7b3147b17b5d1fe7c79f3c87
parent350c7229be7b288e2f4c93d0ae2e111cb15f640e (diff)
downloadcpython-ea99c5c94985c21d8a64c9a3d753bde7f801c14a.zip
cpython-ea99c5c94985c21d8a64c9a3d753bde7f801c14a.tar.gz
cpython-ea99c5c94985c21d8a64c9a3d753bde7f801c14a.tar.bz2
Issue #9410: Various optimizations to the pickle module, leading to
speedups up to 4x (depending on the benchmark). Mostly ported from Unladen Swallow; initial patch by Alexandre Vassalotti.
-rw-r--r--Lib/pickle.py12
-rw-r--r--Lib/test/pickletester.py9
-rw-r--r--Lib/test/test_pickle.py15
-rw-r--r--Misc/NEWS4
-rw-r--r--Modules/_pickle.c2349
5 files changed, 1863 insertions, 526 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 8732508..aca8fd1 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -1287,12 +1287,6 @@ def decode_long(data):
"""
return int.from_bytes(data, byteorder='little', signed=True)
-# Use the faster _pickle if possible
-try:
- from _pickle import *
-except ImportError:
- Pickler, Unpickler = _Pickler, _Unpickler
-
# Shorthands
def dump(obj, file, protocol=None, *, fix_imports=True):
@@ -1316,6 +1310,12 @@ def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
return Unpickler(file, fix_imports=fix_imports,
encoding=encoding, errors=errors).load()
+# Use the faster _pickle if possible
+try:
+ from _pickle import *
+except ImportError:
+ Pickler, Unpickler = _Pickler, _Unpickler
+
# Doctest
def _test():
import doctest
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 70e5ad0..33c85dc 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -1068,6 +1068,15 @@ class AbstractPickleTests(unittest.TestCase):
dumped = self.dumps(set([3]), 2)
self.assertEqual(dumped, DATA6)
+ def test_large_pickles(self):
+ # Test the correctness of internal buffering routines when handling
+ # large data.
+ for proto in protocols:
+ data = (1, b'x' * (256 * 1024))
+ dumped = self.dumps(data, proto)
+ loaded = self.loads(dumped)
+ self.assertEqual(loaded, data)
+
# Test classes for reduce_ex
diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py
index bb681bf..7f642c8 100644
--- a/Lib/test/test_pickle.py
+++ b/Lib/test/test_pickle.py
@@ -37,6 +37,18 @@ class PyPicklerTests(AbstractPickleTests):
return u.load()
+class InMemoryPickleTests(AbstractPickleTests):
+
+ pickler = pickle._Pickler
+ unpickler = pickle._Unpickler
+
+ def dumps(self, arg, proto=None):
+ return pickle.dumps(arg, proto)
+
+ def loads(self, buf):
+ return pickle.loads(buf)
+
+
class PyPersPicklerTests(AbstractPersistentPicklerTests):
pickler = pickle._Pickler
@@ -95,7 +107,8 @@ def test_main():
tests.extend([CPicklerTests, CPersPicklerTests,
CDumpPickle_LoadPickle, DumpPickle_CLoadPickle,
PyPicklerUnpicklerObjectTests,
- CPicklerUnpicklerObjectTests])
+ CPicklerUnpicklerObjectTests,
+ InMemoryPickleTests])
support.run_unittest(*tests)
support.run_doctest(pickle)
diff --git a/Misc/NEWS b/Misc/NEWS
index 4966aa9..eedea78 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -20,6 +20,10 @@ Core and Builtins
Library
-------
+- Issue #9410: Various optimizations to the pickle module, leading to
+ speedups up to 4x (depending on the benchmark). Mostly ported from
+ Unladen Swallow; initial patch by Alexandre Vassalotti.
+
- The pprint module now supports printing OrderedDicts in their given
order (formerly, it would sort the keys).
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index c47a844..f9dc045 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -10,7 +10,6 @@ enum {
DEFAULT_PROTOCOL = 3
};
-
/* Pickle opcodes. These must be kept updated with pickle.py.
Extensive docs are in pickletools.py. */
enum opcode {
@@ -96,9 +95,12 @@ enum {
checking for self-referential data-structures. */
FAST_NESTING_LIMIT = 50,
- /* Size of the write buffer of Pickler. Higher values will reduce the
- number of calls to the write() method of the output stream. */
- WRITE_BUF_SIZE = 256,
+ /* Initial size of the write buffer of Pickler. */
+ WRITE_BUF_SIZE = 4096,
+
+ /* Maximum size of the write buffer of Pickler when pickling to a
+ stream. This is ignored for in-memory pickling. */
+ MAX_WRITE_BUF_SIZE = 64 * 1024,
};
/* Exception classes for pickle. These should override the ones defined in
@@ -140,23 +142,19 @@ stack_underflow(void)
/* Internal data type used as the unpickling stack. */
typedef struct {
- PyObject_HEAD
- int length; /* number of initial slots in data currently used */
- int size; /* number of slots in data allocated */
+ PyObject_VAR_HEAD
PyObject **data;
+ Py_ssize_t allocated; /* number of slots in data allocated */
} Pdata;
static void
Pdata_dealloc(Pdata *self)
{
- int i;
- PyObject **p;
-
- for (i = self->length, p = self->data; --i >= 0; p++) {
- Py_DECREF(*p);
+ int i = Py_SIZE(self);
+ while (--i >= 0) {
+ Py_DECREF(self->data[i]);
}
- if (self->data)
- PyMem_Free(self->data);
+ PyMem_FREE(self->data);
PyObject_Del(self);
}
@@ -175,9 +173,9 @@ Pdata_New(void)
if (!(self = PyObject_New(Pdata, &Pdata_Type)))
return NULL;
- self->size = 8;
- self->length = 0;
- self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
+ Py_SIZE(self) = 0;
+ self->allocated = 8;
+ self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
if (self->data)
return (PyObject *)self;
Py_DECREF(self);
@@ -191,42 +189,40 @@ Pdata_New(void)
static int
Pdata_clear(Pdata *self, int clearto)
{
- int i;
- PyObject **p;
+ int i = Py_SIZE(self);
if (clearto < 0)
return stack_underflow();
- if (clearto >= self->length)
+ if (clearto >= i)
return 0;
- for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
- Py_CLEAR(*p);
+ while (--i >= clearto) {
+ Py_CLEAR(self->data[i]);
}
- self->length = clearto;
-
+ Py_SIZE(self) = clearto;
return 0;
}
static int
Pdata_grow(Pdata *self)
{
- int bigger;
- size_t nbytes;
- PyObject **tmp;
+ PyObject **data = self->data;
+ Py_ssize_t allocated = self->allocated;
+ Py_ssize_t new_allocated;
- bigger = (self->size << 1) + 1;
- if (bigger <= 0) /* was 0, or new value overflows */
- goto nomemory;
- if ((int)(size_t)bigger != bigger)
+ new_allocated = (allocated >> 3) + 6;
+ /* check for integer overflow */
+ if (new_allocated > PY_SSIZE_T_MAX - allocated)
goto nomemory;
- nbytes = (size_t)bigger * sizeof(PyObject *);
- if (nbytes / sizeof(PyObject *) != (size_t)bigger)
+ new_allocated += allocated;
+ if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
goto nomemory;
- tmp = PyMem_Realloc(self->data, nbytes);
- if (tmp == NULL)
+ data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
+ if (data == NULL)
goto nomemory;
- self->data = tmp;
- self->size = bigger;
+
+ self->data = data;
+ self->allocated = new_allocated;
return 0;
nomemory:
@@ -241,21 +237,21 @@ Pdata_grow(Pdata *self)
static PyObject *
Pdata_pop(Pdata *self)
{
- if (self->length == 0) {
+ if (Py_SIZE(self) == 0) {
PyErr_SetString(UnpicklingError, "bad pickle data");
return NULL;
}
- return self->data[--(self->length)];
+ return self->data[--Py_SIZE(self)];
}
#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
static int
Pdata_push(Pdata *self, PyObject *obj)
{
- if (self->length == self->size && Pdata_grow(self) < 0) {
+ if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
return -1;
}
- self->data[self->length++] = obj;
+ self->data[Py_SIZE(self)++] = obj;
return 0;
}
@@ -274,14 +270,14 @@ Pdata_poptuple(Pdata *self, Py_ssize_t start)
PyObject *tuple;
Py_ssize_t len, i, j;
- len = self->length - start;
+ len = Py_SIZE(self) - start;
tuple = PyTuple_New(len);
if (tuple == NULL)
return NULL;
for (i = start, j = 0; j < len; i++, j++)
PyTuple_SET_ITEM(tuple, j, self->data[i]);
- self->length = start;
+ Py_SIZE(self) = start;
return tuple;
}
@@ -291,31 +287,45 @@ Pdata_poplist(Pdata *self, Py_ssize_t start)
PyObject *list;
Py_ssize_t len, i, j;
- len = self->length - start;
+ len = Py_SIZE(self) - start;
list = PyList_New(len);
if (list == NULL)
return NULL;
for (i = start, j = 0; j < len; i++, j++)
PyList_SET_ITEM(list, j, self->data[i]);
- self->length = start;
+ Py_SIZE(self) = start;
return list;
}
+typedef struct {
+ PyObject *me_key;
+ long me_value;
+} PyMemoEntry;
+
+typedef struct {
+ Py_ssize_t mt_mask;
+ Py_ssize_t mt_used;
+ Py_ssize_t mt_allocated;
+ PyMemoEntry *mt_table;
+} PyMemoTable;
+
typedef struct PicklerObject {
PyObject_HEAD
- PyObject *write; /* write() method of the output stream */
- PyObject *memo; /* Memo dictionary, keep track of the seen
+ PyMemoTable *memo; /* Memo table, keep track of the seen
objects to support self-referential objects
- pickling. */
+ pickling. */
PyObject *pers_func; /* persistent_id() method, can be NULL */
PyObject *arg;
+
+ PyObject *write; /* write() method of the output stream. */
+ PyObject *output_buffer; /* Write into a local bytearray buffer before
+ flushing to the stream. */
+ Py_ssize_t output_len; /* Length of output_buffer. */
+ Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
int proto; /* Pickle protocol number, >= 0 */
int bin; /* Boolean, true if proto > 0 */
int buf_size; /* Size of the current buffered pickle data */
- char *write_buf; /* Write buffer, this is to avoid calling the
- write() method of the output stream too
- often. */
int fast; /* Enable fast mode if set to a true value.
The fast mode disable the usage of memo,
therefore speeding the pickling process by
@@ -331,15 +341,23 @@ typedef struct PicklerObject {
typedef struct UnpicklerObject {
PyObject_HEAD
Pdata *stack; /* Pickle data stack, store unpickled objects. */
- PyObject *readline; /* readline() method of the output stream */
- PyObject *read; /* read() method of the output stream */
- PyObject *memo; /* Memo dictionary, provide the objects stored
- using the PUT opcodes. */
+
+ /* The unpickler memo is just an array of PyObject *s. Using a dict
+ is unnecessary, since the keys are contiguous ints. */
+ PyObject **memo;
+ Py_ssize_t memo_size;
+
PyObject *arg;
PyObject *pers_func; /* persistent_load() method, can be NULL. */
- PyObject *last_string; /* Reference to the last string read by the
- readline() method. */
- char *buffer; /* Reading buffer. */
+
+ Py_buffer buffer;
+ char *input_buffer;
+ char *input_line;
+ Py_ssize_t input_len;
+ Py_ssize_t next_read_idx;
+ PyObject *read; /* read() method of the input stream. */
+ PyObject *readline; /* readline() method of the input stream. */
+
char *encoding; /* Name of the encoding to be used for
decoding strings pickled using Python
2.x. The default value is "ASCII" */
@@ -362,9 +380,230 @@ static PyTypeObject Pickler_Type;
static PyTypeObject Unpickler_Type;
+/*************************************************************************
+ A custom hashtable mapping void* to longs. This is used by the pickler for
+ memoization. Using a custom hashtable rather than PyDict allows us to skip
+ a bunch of unnecessary object creation. This makes a huge performance
+ difference. */
+
+#define MT_MINSIZE 8
+#define PERTURB_SHIFT 5
+
+
+static PyMemoTable *
+PyMemoTable_New(void)
+{
+ PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
+ if (memo == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ memo->mt_used = 0;
+ memo->mt_allocated = MT_MINSIZE;
+ memo->mt_mask = MT_MINSIZE - 1;
+ memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
+ if (memo->mt_table == NULL) {
+ PyMem_FREE(memo);
+ PyErr_NoMemory();
+ return NULL;
+ }
+ memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
+
+ return memo;
+}
+
+static PyMemoTable *
+PyMemoTable_Copy(PyMemoTable *self)
+{
+ Py_ssize_t i;
+ PyMemoTable *new = PyMemoTable_New();
+ if (new == NULL)
+ return NULL;
+
+ new->mt_used = self->mt_used;
+ new->mt_allocated = self->mt_allocated;
+ new->mt_mask = self->mt_mask;
+ /* The table we get from _New() is probably smaller than we wanted.
+ Free it and allocate one that's the right size. */
+ PyMem_FREE(new->mt_table);
+ new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
+ if (new->mt_table == NULL) {
+ PyMem_FREE(new);
+ return NULL;
+ }
+ for (i = 0; i < self->mt_allocated; i++) {
+ Py_XINCREF(self->mt_table[i].me_key);
+ }
+ memcpy(new->mt_table, self->mt_table,
+ sizeof(PyMemoEntry) * self->mt_allocated);
+
+ return new;
+}
+
+static Py_ssize_t
+PyMemoTable_Size(PyMemoTable *self)
+{
+ return self->mt_used;
+}
+
+static int
+PyMemoTable_Clear(PyMemoTable *self)
+{
+ Py_ssize_t i = self->mt_allocated;
+
+ while (--i >= 0) {
+ Py_XDECREF(self->mt_table[i].me_key);
+ }
+ self->mt_used = 0;
+ memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
+ return 0;
+}
+
+static void
+PyMemoTable_Del(PyMemoTable *self)
+{
+ if (self == NULL)
+ return;
+ PyMemoTable_Clear(self);
+
+ PyMem_FREE(self->mt_table);
+ PyMem_FREE(self);
+}
+
+/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
+ can be considerably simpler than dictobject.c's lookdict(). */
+static PyMemoEntry *
+_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
+{
+ size_t i;
+ size_t perturb;
+ size_t mask = (size_t)self->mt_mask;
+ PyMemoEntry *table = self->mt_table;
+ PyMemoEntry *entry;
+ long hash = (long)key >> 3;
+
+ i = hash & mask;
+ entry = &table[i];
+ if (entry->me_key == NULL || entry->me_key == key)
+ return entry;
+
+ for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
+ i = (i << 2) + i + perturb + 1;
+ entry = &table[i & mask];
+ if (entry->me_key == NULL || entry->me_key == key)
+ return entry;
+ }
+ assert(0); /* Never reached */
+ return NULL;
+}
+
+/* Returns -1 on failure, 0 on success. */
+static int
+_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
+{
+ PyMemoEntry *oldtable = NULL;
+ PyMemoEntry *oldentry, *newentry;
+ Py_ssize_t new_size = MT_MINSIZE;
+ Py_ssize_t to_process;
+
+ assert(min_size > 0);
+
+ /* Find the smallest valid table size >= min_size. */
+ while (new_size < min_size && new_size > 0)
+ new_size <<= 1;
+ if (new_size <= 0) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ /* new_size needs to be a power of two. */
+ assert((new_size & (new_size - 1)) == 0);
+
+ /* Allocate new table. */
+ oldtable = self->mt_table;
+ self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
+ if (self->mt_table == NULL) {
+ PyMem_FREE(oldtable);
+ PyErr_NoMemory();
+ return -1;
+ }
+ self->mt_allocated = new_size;
+ self->mt_mask = new_size - 1;
+ memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
+
+ /* Copy entries from the old table. */
+ to_process = self->mt_used;
+ for (oldentry = oldtable; to_process > 0; oldentry++) {
+ if (oldentry->me_key != NULL) {
+ to_process--;
+ /* newentry is a pointer to a chunk of the new
+ mt_table, so we're setting the key:value pair
+ in-place. */
+ newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
+ newentry->me_key = oldentry->me_key;
+ newentry->me_value = oldentry->me_value;
+ }
+ }
+
+ /* Deallocate the old table. */
+ PyMem_FREE(oldtable);
+ return 0;
+}
+
+/* Returns NULL on failure, a pointer to the value otherwise. */
+static long *
+PyMemoTable_Get(PyMemoTable *self, PyObject *key)
+{
+ PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
+ if (entry->me_key == NULL)
+ return NULL;
+ return &entry->me_value;
+}
+
+/* Returns -1 on failure, 0 on success. */
+static int
+PyMemoTable_Set(PyMemoTable *self, PyObject *key, long value)
+{
+ PyMemoEntry *entry;
+
+ assert(key != NULL);
+
+ entry = _PyMemoTable_Lookup(self, key);
+ if (entry->me_key != NULL) {
+ entry->me_value = value;
+ return 0;
+ }
+ Py_INCREF(key);
+ entry->me_key = key;
+ entry->me_value = value;
+ self->mt_used++;
+
+ /* If we added a key, we can safely resize. Otherwise just return!
+ * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
+ *
+ * Quadrupling the size improves average table sparseness
+ * (reducing collisions) at the cost of some memory. It also halves
+ * the number of expensive resize operations in a growing memo table.
+ *
+ * Very large memo tables (over 50K items) use doubling instead.
+ * This may help applications with severe memory constraints.
+ */
+ if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
+ return 0;
+ return _PyMemoTable_ResizeTable(self,
+ (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
+}
+
+#undef MT_MINSIZE
+#undef PERTURB_SHIFT
+
+/*************************************************************************/
+
/* Helpers for creating the argument tuple passed to functions. This has the
- performance advantage of calling PyTuple_New() only once. */
+ performance advantage of calling PyTuple_New() only once.
+ XXX(avassalotti): Inline directly in _Pickler_FastCall() and
+ _Unpickler_FastCall(). */
#define ARG_TUP(self, obj) do { \
if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
@@ -401,10 +640,9 @@ static PyTypeObject Unpickler_Type;
XXX: And, what is the reference behavior of these? Steal, borrow? At first
glance, it seems to steal the reference of 'arg' and borrow the reference
- of 'func'.
- */
+ of 'func'. */
static PyObject *
-pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
+_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
{
PyObject *result = NULL;
@@ -416,181 +654,541 @@ pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
return result;
}
+static int
+_Pickler_ClearBuffer(PicklerObject *self)
+{
+ Py_CLEAR(self->output_buffer);
+ self->output_buffer =
+ PyBytes_FromStringAndSize(NULL, self->max_output_len);
+ if (self->output_buffer == NULL)
+ return -1;
+ self->output_len = 0;
+ return 0;
+}
+
static PyObject *
-unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
+_Pickler_GetString(PicklerObject *self)
{
- PyObject *result = NULL;
+ PyObject *output_buffer = self->output_buffer;
- ARG_TUP(self, arg);
- if (self->arg) {
- result = PyObject_Call(func, self->arg, NULL);
- FREE_ARG_TUP(self);
- }
- return result;
+ assert(self->output_buffer != NULL);
+ self->output_buffer = NULL;
+ /* Resize down to exact size */
+ if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
+ return NULL;
+ return output_buffer;
}
-static Py_ssize_t
-pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
+static int
+_Pickler_FlushToFile(PicklerObject *self)
{
- PyObject *data, *result;
+ PyObject *output, *result;
+
+ assert(self->write != NULL);
- if (self->write_buf == NULL) {
- PyErr_SetString(PyExc_SystemError, "invalid write buffer");
+ output = _Pickler_GetString(self);
+ if (output == NULL)
return -1;
- }
- if (s == NULL) {
- if (!(self->buf_size))
- return 0;
- data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
- if (data == NULL)
- return -1;
- }
- else {
- if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
- if (pickler_write(self, NULL, 0) < 0)
+ result = _Pickler_FastCall(self, self->write, output);
+ Py_XDECREF(result);
+ return (result == NULL) ? -1 : 0;
+}
+
+static int
+_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
+{
+ Py_ssize_t i, required;
+ char *buffer;
+
+ assert(s != NULL);
+
+ required = self->output_len + n;
+ if (required > self->max_output_len) {
+ if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
+ /* XXX This reallocates a new buffer every time, which is a bit
+ wasteful. */
+ if (_Pickler_FlushToFile(self) < 0)
+ return -1;
+ if (_Pickler_ClearBuffer(self) < 0)
return -1;
}
-
- if (n > WRITE_BUF_SIZE) {
- if (!(data = PyBytes_FromStringAndSize(s, n)))
+ if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
+ /* we already flushed above, so the buffer is empty */
+ PyObject *result;
+ /* XXX we could spare an intermediate copy and pass
+ a memoryview instead */
+ PyObject *output = PyBytes_FromStringAndSize(s, n);
+ if (s == NULL)
return -1;
+ result = _Pickler_FastCall(self, self->write, output);
+ Py_XDECREF(result);
+ return (result == NULL) ? -1 : 0;
}
else {
- memcpy(self->write_buf + self->buf_size, s, n);
- self->buf_size += n;
- return n;
+ if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ self->max_output_len = (self->output_len + n) * 2;
+ if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
+ return -1;
}
}
+ buffer = PyBytes_AS_STRING(self->output_buffer);
+ if (n < 8) {
+ /* This is faster than memcpy when the string is short. */
+ for (i = 0; i < n; i++) {
+ buffer[self->output_len + i] = s[i];
+ }
+ }
+ else {
+ memcpy(buffer + self->output_len, s, n);
+ }
+ self->output_len += n;
+ return n;
+}
+
+static PicklerObject *
+_Pickler_New(void)
+{
+ PicklerObject *self;
+
+ self = PyObject_GC_New(PicklerObject, &Pickler_Type);
+ if (self == NULL)
+ return NULL;
+
+ self->pers_func = NULL;
+ self->arg = NULL;
+ self->write = NULL;
+ self->proto = 0;
+ self->bin = 0;
+ self->fast = 0;
+ self->fast_nesting = 0;
+ self->fix_imports = 0;
+ self->fast_memo = NULL;
+
+ self->memo = PyMemoTable_New();
+ if (self->memo == NULL) {
+ Py_DECREF(self);
+ return NULL;
+ }
+ self->max_output_len = WRITE_BUF_SIZE;
+ self->output_len = 0;
+ self->output_buffer = PyBytes_FromStringAndSize(NULL,
+ self->max_output_len);
+ if (self->output_buffer == NULL) {
+ Py_DECREF(self);
+ return NULL;
+ }
+ return self;
+}
+
+static int
+_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
+ PyObject *fix_imports_obj)
+{
+ long proto = 0;
+ int fix_imports;
+
+ if (proto_obj == NULL || proto_obj == Py_None)
+ proto = DEFAULT_PROTOCOL;
+ else {
+ proto = PyLong_AsLong(proto_obj);
+ if (proto == -1 && PyErr_Occurred())
+ return -1;
+ }
+ if (proto < 0)
+ proto = HIGHEST_PROTOCOL;
+ if (proto > HIGHEST_PROTOCOL) {
+ PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
+ HIGHEST_PROTOCOL);
+ return -1;
+ }
+ fix_imports = PyObject_IsTrue(fix_imports_obj);
+ if (fix_imports == -1)
+ return -1;
+
+ self->proto = proto;
+ self->bin = proto > 0;
+ self->fix_imports = fix_imports && proto < 3;
+
+ return 0;
+}
- /* object with write method */
- result = pickler_call(self, self->write, data);
- if (result == NULL)
+/* Returns -1 (with an exception set) on failure, 0 on success. This may
+ be called once on a freshly created Pickler. */
+static int
+_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
+{
+ assert(file != NULL);
+ self->write = PyObject_GetAttrString(file, "write");
+ if (self->write == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_AttributeError))
+ PyErr_SetString(PyExc_TypeError,
+ "file must have a 'write' attribute");
return -1;
+ }
- Py_DECREF(result);
- self->buf_size = 0;
- return n;
+ return 0;
}
-/* XXX: These read/readline functions ought to be optimized. Buffered I/O
- might help a lot, especially with the new (but much slower) io library.
- On the other hand, the added complexity might not worth it.
- */
+/* See documentation for _Pickler_FastCall(). */
+static PyObject *
+_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
+{
+ PyObject *result = NULL;
+
+ ARG_TUP(self, arg);
+ if (self->arg) {
+ result = PyObject_Call(func, self->arg, NULL);
+ FREE_ARG_TUP(self);
+ }
+ return result;
+}
+
+/* Returns the size of the input on success, -1 on failure. This takes its
+ own reference to `input`. */
+static Py_ssize_t
+_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
+{
+ if (self->buffer.buf != NULL)
+ PyBuffer_Release(&self->buffer);
+ if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
+ return -1;
+ self->input_buffer = self->buffer.buf;
+ self->input_len = self->buffer.len;
+ self->next_read_idx = 0;
+ return self->input_len;
+}
+
+static const Py_ssize_t READ_WHOLE_LINE = -1;
+
+/* If reading from a file, we need to only pull the bytes we need, since there
+ may be multiple pickle objects arranged contiguously in the same input
+ buffer.
+
+ If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
+ bytes from the input stream/buffer.
+
+ Update the unpickler's input buffer with the newly-read data. Returns -1 on
+ failure; on success, returns the number of bytes read from the file.
-/* Read at least n characters from the input stream and set s to the current
- reading position. */
+ On success, self->input_len will be 0; this is intentional so that when
+ unpickling from a file, the "we've run out of data" code paths will trigger,
+ causing the Unpickler to go back to the file for more data. Use the returned
+ size to tell you how much data you can process. */
static Py_ssize_t
-unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
+_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
{
- PyObject *len;
PyObject *data;
+ Py_ssize_t read_size;
- len = PyLong_FromSsize_t(n);
- if (len == NULL)
- return -1;
+ assert(self->read != NULL);
+ assert(self->next_read_idx == 0);
+
+ if (n == READ_WHOLE_LINE)
+ data = PyObject_Call(self->readline, empty_tuple, NULL);
+ else {
+ PyObject *len = PyLong_FromSsize_t(n);
+ if (len == NULL)
+ return -1;
+ data = _Unpickler_FastCall(self, self->read, len);
+ }
- data = unpickler_call(self, self->read, len);
if (data == NULL)
return -1;
- /* XXX: Should bytearray be supported too? */
- if (!PyBytes_Check(data)) {
- PyErr_SetString(PyExc_ValueError,
- "read() from the underlying stream did not "
- "return bytes");
- Py_DECREF(data);
- return -1;
+ read_size = _Unpickler_SetStringInput(self, data);
+ self->input_len = 0;
+ Py_DECREF(data);
+ return read_size;
+}
+
+/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
+
+ This should be used for all data reads, rather than accessing the unpickler's
+ input buffer directly. This method deals correctly with reading from input
+ streams, which the input buffer doesn't deal with.
+
+ Note that when reading from a file-like object, self->next_read_idx won't
+ be updated (it should remain at 0 for the entire unpickling process). You
+ should use this function's return value to know how many bytes you can
+ consume.
+
+ Returns -1 (with an exception set) on failure. On success, return the
+ number of chars read. */
+static Py_ssize_t
+_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
+{
+ if (n == 0) {
+ *s = NULL;
+ return 0;
}
- if (PyBytes_GET_SIZE(data) != n) {
- PyErr_SetNone(PyExc_EOFError);
- Py_DECREF(data);
+ /* This condition will always be true if self->read. */
+ if (self->next_read_idx + n > self->input_len) {
+ if (self->read) {
+ Py_ssize_t num_read;
+ assert(self->next_read_idx == self->input_len);
+ num_read = _Unpickler_ReadFromFile(self, n);
+ if (n < 0)
+ return -1;
+ if (num_read == n) {
+ *s = self->input_buffer;
+ return num_read;
+ }
+ }
+ PyErr_Format(PyExc_EOFError, "Ran out of input");
return -1;
}
+ assert(self->read == NULL);
+ *s = self->input_buffer + self->next_read_idx;
+ self->next_read_idx += n;
+ return n;
+}
- Py_XDECREF(self->last_string);
- self->last_string = data;
-
- if (!(*s = PyBytes_AS_STRING(data)))
+static Py_ssize_t
+_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
+ char **result)
+{
+ char *input_line = PyMem_Realloc(self->input_line, len + 1);
+ if (input_line == NULL)
return -1;
- return n;
+ memcpy(input_line, line, len);
+ input_line[len] = '\0';
+ self->input_line = input_line;
+ *result = self->input_line;
+ return len;
}
+/* Read a line from the input stream/buffer. If we run off the end of the input
+ before hitting \n, return the data we found.
+
+ Returns the number of chars read, or -1 on failure. */
static Py_ssize_t
-unpickler_readline(UnpicklerObject *self, char **s)
+_Unpickler_Readline(UnpicklerObject *self, char **result)
{
- PyObject *data;
+ Py_ssize_t i, num_read;
+
+ /* This loop will never be entered if self->read is not NULL. */
+ for (i = self->next_read_idx; i < self->input_len; i++) {
+ assert(self->read == NULL);
+ if (self->input_buffer[i] == '\n') {
+ char *line_start = self->input_buffer + self->next_read_idx;
+ num_read = i - self->next_read_idx + 1;
+ self->next_read_idx = i + 1;
+ return _Unpickler_CopyLine(self, line_start, num_read, result);
+ }
+ }
+ if (self->read) {
+ assert(self->next_read_idx == self->input_len);
+ num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
+ if (num_read < 0)
+ return -1;
+ *result = self->input_buffer;
+ return num_read;
+ }
+
+ /* If we get here, we've run off the end of the input string. Return the
+ remaining string and let the caller figure it out. */
+ *result = self->input_buffer + self->next_read_idx;
+ num_read = i - self->next_read_idx;
+ self->next_read_idx = i;
+ return num_read;
+}
- data = PyObject_CallObject(self->readline, empty_tuple);
- if (data == NULL)
- return -1;
+/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
+ will be modified in place. */
+static int
+_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
+{
+ Py_ssize_t i;
+ PyObject **memo;
+
+ assert(new_size > self->memo_size);
- /* XXX: Should bytearray be supported too? */
- if (!PyBytes_Check(data)) {
- PyErr_SetString(PyExc_ValueError,
- "readline() from the underlying stream did not "
- "return bytes");
+ memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
+ if (memo == NULL) {
+ PyErr_NoMemory();
return -1;
}
+ self->memo = memo;
+ for (i = self->memo_size; i < new_size; i++)
+ self->memo[i] = NULL;
+ self->memo_size = new_size;
+ return 0;
+}
- Py_XDECREF(self->last_string);
- self->last_string = data;
-
- if (!(*s = PyBytes_AS_STRING(data)))
- return -1;
+/* Returns NULL if idx is out of bounds. */
+static PyObject *
+_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
+{
+ if (idx < 0 || idx >= self->memo_size)
+ return NULL;
- return PyBytes_GET_SIZE(data);
+ return self->memo[idx];
}
-/* Generate a GET opcode for an object stored in the memo. The 'key' argument
- should be the address of the object as returned by PyLong_FromVoidPtr(). */
+/* Returns -1 (with an exception set) on failure, 0 on success.
+ This takes its own reference to `value`. */
static int
-memo_get(PicklerObject *self, PyObject *key)
+_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
{
- PyObject *value;
- PyObject *memo_id;
- long x;
- char pdata[30];
- int len;
+ PyObject *old_item;
- value = PyDict_GetItemWithError(self->memo, key);
- if (value == NULL) {
- if (!PyErr_Occurred())
- PyErr_SetObject(PyExc_KeyError, key);
- return -1;
+ if (idx >= self->memo_size) {
+ if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
+ return -1;
+ assert(idx < self->memo_size);
+ }
+ Py_INCREF(value);
+ old_item = self->memo[idx];
+ self->memo[idx] = value;
+ Py_XDECREF(old_item);
+ return 0;
+}
+
+static PyObject **
+_Unpickler_NewMemo(Py_ssize_t new_size)
+{
+ PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
+ if (memo == NULL)
+ return NULL;
+ memset(memo, 0, new_size * sizeof(PyObject *));
+ return memo;
+}
+
+/* Free the unpickler's memo, taking care to decref any items left in it. */
+static void
+_Unpickler_MemoCleanup(UnpicklerObject *self)
+{
+ Py_ssize_t i;
+ PyObject **memo = self->memo;
+
+ if (self->memo == NULL)
+ return;
+ self->memo = NULL;
+ i = self->memo_size;
+ while (--i >= 0) {
+ Py_XDECREF(memo[i]);
+ }
+ PyMem_FREE(memo);
+}
+
+static UnpicklerObject *
+_Unpickler_New(void)
+{
+ UnpicklerObject *self;
+
+ self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
+ if (self == NULL)
+ return NULL;
+
+ self->stack = (Pdata *)Pdata_New();
+ if (self->stack == NULL) {
+ Py_DECREF(self);
+ return NULL;
+ }
+ memset(&self->buffer, 0, sizeof(Py_buffer));
+
+ self->memo_size = 32;
+ self->memo = _Unpickler_NewMemo(self->memo_size);
+ if (self->memo == NULL) {
+ Py_DECREF(self);
+ return NULL;
}
- memo_id = PyTuple_GetItem(value, 0);
- if (memo_id == NULL)
+ self->arg = NULL;
+ self->pers_func = NULL;
+ self->input_buffer = NULL;
+ self->input_line = NULL;
+ self->input_len = 0;
+ self->next_read_idx = 0;
+ self->read = NULL;
+ self->readline = NULL;
+ self->encoding = NULL;
+ self->errors = NULL;
+ self->marks = NULL;
+ self->num_marks = 0;
+ self->marks_size = 0;
+ self->proto = 0;
+ self->fix_imports = 0;
+
+ return self;
+}
+
+/* Returns -1 (with an exception set) on failure, 0 on success. This may
+ be called once on a freshly created Pickler. */
+static int
+_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
+{
+ self->read = PyObject_GetAttrString(file, "read");
+ self->readline = PyObject_GetAttrString(file, "readline");
+ if (self->readline == NULL || self->read == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_AttributeError))
+ PyErr_SetString(PyExc_TypeError,
+ "file must have 'read' and 'readline' attributes");
+ Py_CLEAR(self->read);
+ Py_CLEAR(self->readline);
return -1;
+ }
+ return 0;
+}
- if (!PyLong_Check(memo_id)) {
- PyErr_SetString(PicklingError, "memo id must be an integer");
+/* Returns -1 (with an exception set) on failure, 0 on success. This may
+ be called once on a freshly created Pickler. */
+static int
+_Unpickler_SetInputEncoding(UnpicklerObject *self,
+ const char *encoding,
+ const char *errors)
+{
+ if (encoding == NULL)
+ encoding = "ASCII";
+ if (errors == NULL)
+ errors = "strict";
+
+ self->encoding = strdup(encoding);
+ self->errors = strdup(errors);
+ if (self->encoding == NULL || self->errors == NULL) {
+ PyErr_NoMemory();
return -1;
}
- x = PyLong_AsLong(memo_id);
- if (x == -1 && PyErr_Occurred())
+ return 0;
+}
+
+/* Generate a GET opcode for an object stored in the memo. */
+static int
+memo_get(PicklerObject *self, PyObject *key)
+{
+ long *value;
+ char pdata[30];
+ int len;
+
+ value = PyMemoTable_Get(self->memo, key);
+ if (value == NULL) {
+ PyErr_SetObject(PyExc_KeyError, key);
return -1;
+ }
if (!self->bin) {
pdata[0] = GET;
- PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
+ PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", *value);
len = (int)strlen(pdata);
}
else {
- if (x < 256) {
+ if (*value < 256) {
pdata[0] = BINGET;
- pdata[1] = (unsigned char)(x & 0xff);
+ pdata[1] = (unsigned char)(*value & 0xff);
len = 2;
}
- else if (x <= 0xffffffffL) {
+ else if (*value <= 0xffffffffL) {
pdata[0] = LONG_BINGET;
- pdata[1] = (unsigned char)(x & 0xff);
- pdata[2] = (unsigned char)((x >> 8) & 0xff);
- pdata[3] = (unsigned char)((x >> 16) & 0xff);
- pdata[4] = (unsigned char)((x >> 24) & 0xff);
+ pdata[1] = (unsigned char)(*value & 0xff);
+ pdata[2] = (unsigned char)((*value >> 8) & 0xff);
+ pdata[3] = (unsigned char)((*value >> 16) & 0xff);
+ pdata[4] = (unsigned char)((*value >> 24) & 0xff);
len = 5;
}
else { /* unlikely */
@@ -600,7 +1198,7 @@ memo_get(PicklerObject *self, PyObject *key)
}
}
- if (pickler_write(self, pdata, len) < 0)
+ if (_Pickler_Write(self, pdata, len) < 0)
return -1;
return 0;
@@ -611,9 +1209,6 @@ memo_get(PicklerObject *self, PyObject *key)
static int
memo_put(PicklerObject *self, PyObject *obj)
{
- PyObject *key = NULL;
- PyObject *memo_id = NULL;
- PyObject *tuple = NULL;
long x;
char pdata[30];
int len;
@@ -622,23 +1217,8 @@ memo_put(PicklerObject *self, PyObject *obj)
if (self->fast)
return 0;
- key = PyLong_FromVoidPtr(obj);
- if (key == NULL)
- goto error;
- if ((x = PyDict_Size(self->memo)) < 0)
- goto error;
- memo_id = PyLong_FromLong(x);
- if (memo_id == NULL)
- goto error;
- tuple = PyTuple_New(2);
- if (tuple == NULL)
- goto error;
-
- Py_INCREF(memo_id);
- PyTuple_SET_ITEM(tuple, 0, memo_id);
- Py_INCREF(obj);
- PyTuple_SET_ITEM(tuple, 1, obj);
- if (PyDict_SetItem(self->memo, key, tuple) < 0)
+ x = PyMemoTable_Size(self->memo);
+ if (PyMemoTable_Set(self->memo, obj, x) < 0)
goto error;
if (!self->bin) {
@@ -667,7 +1247,7 @@ memo_put(PicklerObject *self, PyObject *obj)
}
}
- if (pickler_write(self, pdata, len) < 0)
+ if (_Pickler_Write(self, pdata, len) < 0)
goto error;
if (0) {
@@ -675,10 +1255,6 @@ memo_put(PicklerObject *self, PyObject *obj)
status = -1;
}
- Py_XDECREF(key);
- Py_XDECREF(memo_id);
- Py_XDECREF(tuple);
-
return status;
}
@@ -821,7 +1397,7 @@ static int
save_none(PicklerObject *self, PyObject *obj)
{
const char none_op = NONE;
- if (pickler_write(self, &none_op, 1) < 0)
+ if (_Pickler_Write(self, &none_op, 1) < 0)
return -1;
return 0;
@@ -836,10 +1412,10 @@ save_bool(PicklerObject *self, PyObject *obj)
if (self->proto >= 2) {
const char bool_op = p ? NEWTRUE : NEWFALSE;
- if (pickler_write(self, &bool_op, 1) < 0)
+ if (_Pickler_Write(self, &bool_op, 1) < 0)
return -1;
}
- else if (pickler_write(self, buf[p], len[p]) < 0)
+ else if (_Pickler_Write(self, buf[p], len[p]) < 0)
return -1;
return 0;
@@ -861,7 +1437,7 @@ save_int(PicklerObject *self, long x)
*/
pdata[0] = LONG; /* use LONG for consistency with pickle.py */
PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
- if (pickler_write(self, pdata, strlen(pdata)) < 0)
+ if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
return -1;
}
else {
@@ -886,7 +1462,7 @@ save_int(PicklerObject *self, long x)
len = 5;
}
- if (pickler_write(self, pdata, len) < 0)
+ if (_Pickler_Write(self, pdata, len) < 0)
return -1;
}
@@ -922,7 +1498,7 @@ save_long(PicklerObject *self, PyObject *obj)
if (sign == 0) {
header[0] = LONG1;
header[1] = 0; /* It's 0 -- an empty bytestring. */
- if (pickler_write(self, header, 2) < 0)
+ if (_Pickler_Write(self, header, 2) < 0)
goto error;
return 0;
}
@@ -983,8 +1559,8 @@ save_long(PicklerObject *self, PyObject *obj)
}
size = 5;
}
- if (pickler_write(self, header, size) < 0 ||
- pickler_write(self, (char *)pdata, (int)nbytes) < 0)
+ if (_Pickler_Write(self, header, size) < 0 ||
+ _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
goto error;
}
else {
@@ -1002,9 +1578,9 @@ save_long(PicklerObject *self, PyObject *obj)
if (string == NULL)
goto error;
- if (pickler_write(self, &long_op, 1) < 0 ||
- pickler_write(self, string, size) < 0 ||
- pickler_write(self, "L\n", 2) < 0)
+ if (_Pickler_Write(self, &long_op, 1) < 0 ||
+ _Pickler_Write(self, string, size) < 0 ||
+ _Pickler_Write(self, "L\n", 2) < 0)
goto error;
}
@@ -1027,7 +1603,7 @@ save_float(PicklerObject *self, PyObject *obj)
pdata[0] = BINFLOAT;
if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
return -1;
- if (pickler_write(self, pdata, 9) < 0)
+ if (_Pickler_Write(self, pdata, 9) < 0)
return -1;
}
else {
@@ -1035,7 +1611,7 @@ save_float(PicklerObject *self, PyObject *obj)
char *buf = NULL;
char op = FLOAT;
- if (pickler_write(self, &op, 1) < 0)
+ if (_Pickler_Write(self, &op, 1) < 0)
goto done;
buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
@@ -1044,10 +1620,10 @@ save_float(PicklerObject *self, PyObject *obj)
goto done;
}
- if (pickler_write(self, buf, strlen(buf)) < 0)
+ if (_Pickler_Write(self, buf, strlen(buf)) < 0)
goto done;
- if (pickler_write(self, "\n", 1) < 0)
+ if (_Pickler_Write(self, "\n", 1) < 0)
goto done;
result = 0;
@@ -1113,10 +1689,10 @@ save_bytes(PicklerObject *self, PyObject *obj)
return -1; /* string too large */
}
- if (pickler_write(self, header, len) < 0)
+ if (_Pickler_Write(self, header, len) < 0)
return -1;
- if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
+ if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
return -1;
if (memo_put(self, obj) < 0)
@@ -1243,10 +1819,10 @@ save_unicode(PicklerObject *self, PyObject *obj)
pdata[3] = (unsigned char)((size >> 16) & 0xff);
pdata[4] = (unsigned char)((size >> 24) & 0xff);
- if (pickler_write(self, pdata, 5) < 0)
+ if (_Pickler_Write(self, pdata, 5) < 0)
goto error;
- if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
+ if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
goto error;
}
else {
@@ -1257,14 +1833,14 @@ save_unicode(PicklerObject *self, PyObject *obj)
if (encoded == NULL)
goto error;
- if (pickler_write(self, &unicode_op, 1) < 0)
+ if (_Pickler_Write(self, &unicode_op, 1) < 0)
goto error;
size = PyBytes_GET_SIZE(encoded);
- if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
+ if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
goto error;
- if (pickler_write(self, "\n", 1) < 0)
+ if (_Pickler_Write(self, "\n", 1) < 0)
goto error;
}
if (memo_put(self, obj) < 0)
@@ -1307,9 +1883,7 @@ store_tuple_elements(PicklerObject *self, PyObject *t, int len)
static int
save_tuple(PicklerObject *self, PyObject *obj)
{
- PyObject *memo_key = NULL;
int len, i;
- int status = 0;
const char mark_op = MARK;
const char tuple_op = TUPLE;
@@ -1332,40 +1906,35 @@ save_tuple(PicklerObject *self, PyObject *obj)
pdata[1] = TUPLE;
len = 2;
}
- if (pickler_write(self, pdata, len) < 0)
+ if (_Pickler_Write(self, pdata, len) < 0)
return -1;
return 0;
}
- /* id(tuple) isn't in the memo now. If it shows up there after
+ /* The tuple isn't in the memo now. If it shows up there after
* saving the tuple elements, the tuple must be recursive, in
* which case we'll pop everything we put on the stack, and fetch
* its value from the memo.
*/
- memo_key = PyLong_FromVoidPtr(obj);
- if (memo_key == NULL)
- return -1;
-
if (len <= 3 && self->proto >= 2) {
/* Use TUPLE{1,2,3} opcodes. */
if (store_tuple_elements(self, obj, len) < 0)
- goto error;
+ return -1;
- if (PyDict_GetItem(self->memo, memo_key)) {
+ if (PyMemoTable_Get(self->memo, obj)) {
/* pop the len elements */
for (i = 0; i < len; i++)
- if (pickler_write(self, &pop_op, 1) < 0)
- goto error;
+ if (_Pickler_Write(self, &pop_op, 1) < 0)
+ return -1;
/* fetch from memo */
- if (memo_get(self, memo_key) < 0)
- goto error;
+ if (memo_get(self, obj) < 0)
+ return -1;
- Py_DECREF(memo_key);
return 0;
}
else { /* Not recursive. */
- if (pickler_write(self, len2opcode + len, 1) < 0)
- goto error;
+ if (_Pickler_Write(self, len2opcode + len, 1) < 0)
+ return -1;
}
goto memoize;
}
@@ -1373,49 +1942,42 @@ save_tuple(PicklerObject *self, PyObject *obj)
/* proto < 2 and len > 0, or proto >= 2 and len > 3.
* Generate MARK e1 e2 ... TUPLE
*/
- if (pickler_write(self, &mark_op, 1) < 0)
- goto error;
+ if (_Pickler_Write(self, &mark_op, 1) < 0)
+ return -1;
if (store_tuple_elements(self, obj, len) < 0)
- goto error;
+ return -1;
- if (PyDict_GetItem(self->memo, memo_key)) {
+ if (PyMemoTable_Get(self->memo, obj)) {
/* pop the stack stuff we pushed */
if (self->bin) {
- if (pickler_write(self, &pop_mark_op, 1) < 0)
- goto error;
+ if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
+ return -1;
}
else {
/* Note that we pop one more than len, to remove
* the MARK too.
*/
for (i = 0; i <= len; i++)
- if (pickler_write(self, &pop_op, 1) < 0)
- goto error;
+ if (_Pickler_Write(self, &pop_op, 1) < 0)
+ return -1;
}
/* fetch from memo */
- if (memo_get(self, memo_key) < 0)
- goto error;
+ if (memo_get(self, obj) < 0)
+ return -1;
- Py_DECREF(memo_key);
return 0;
}
else { /* Not recursive. */
- if (pickler_write(self, &tuple_op, 1) < 0)
- goto error;
+ if (_Pickler_Write(self, &tuple_op, 1) < 0)
+ return -1;
}
memoize:
if (memo_put(self, obj) < 0)
- goto error;
-
- if (0) {
- error:
- status = -1;
- }
+ return -1;
- Py_DECREF(memo_key);
- return status;
+ return 0;
}
/* iter is an iterator giving items, and we batch up chunks of
@@ -1455,7 +2017,7 @@ batch_list(PicklerObject *self, PyObject *iter)
Py_DECREF(obj);
if (i < 0)
return -1;
- if (pickler_write(self, &append_op, 1) < 0)
+ if (_Pickler_Write(self, &append_op, 1) < 0)
return -1;
}
return 0;
@@ -1482,7 +2044,7 @@ batch_list(PicklerObject *self, PyObject *iter)
/* Only one item to write */
if (save(self, firstitem, 0) < 0)
goto error;
- if (pickler_write(self, &append_op, 1) < 0)
+ if (_Pickler_Write(self, &append_op, 1) < 0)
goto error;
Py_CLEAR(firstitem);
break;
@@ -1491,7 +2053,7 @@ batch_list(PicklerObject *self, PyObject *iter)
/* More than one item to write */
/* Pump out MARK, items, APPENDS. */
- if (pickler_write(self, &mark_op, 1) < 0)
+ if (_Pickler_Write(self, &mark_op, 1) < 0)
goto error;
if (save(self, firstitem, 0) < 0)
@@ -1517,7 +2079,7 @@ batch_list(PicklerObject *self, PyObject *iter)
}
}
- if (pickler_write(self, &appends_op, 1) < 0)
+ if (_Pickler_Write(self, &appends_op, 1) < 0)
goto error;
} while (n == BATCHSIZE);
@@ -1529,10 +2091,65 @@ batch_list(PicklerObject *self, PyObject *iter)
return -1;
}
+/* This is a variant of batch_list() above, specialized for lists (with no
+ * support for list subclasses). Like batch_list(), we batch up chunks of
+ * MARK item item ... item APPENDS
+ * opcode sequences. Calling code should have arranged to first create an
+ * empty list, or list-like object, for the APPENDS to operate on.
+ * Returns 0 on success, -1 on error.
+ *
+ * This version is considerably faster than batch_list(), if less general.
+ *
+ * Note that this only works for protocols > 0.
+ */
+static int
+batch_list_exact(PicklerObject *self, PyObject *obj)
+{
+ PyObject *item = NULL;
+ int this_batch, total;
+
+ const char append_op = APPEND;
+ const char appends_op = APPENDS;
+ const char mark_op = MARK;
+
+ assert(obj != NULL);
+ assert(self->proto > 0);
+ assert(PyList_CheckExact(obj));
+
+ if (PyList_GET_SIZE(obj) == 1) {
+ item = PyList_GET_ITEM(obj, 0);
+ if (save(self, item, 0) < 0)
+ return -1;
+ if (_Pickler_Write(self, &append_op, 1) < 0)
+ return -1;
+ return 0;
+ }
+
+ /* Write in batches of BATCHSIZE. */
+ total = 0;
+ do {
+ this_batch = 0;
+ if (_Pickler_Write(self, &mark_op, 1) < 0)
+ return -1;
+ while (total < PyList_GET_SIZE(obj)) {
+ item = PyList_GET_ITEM(obj, total);
+ if (save(self, item, 0) < 0)
+ return -1;
+ total++;
+ if (++this_batch == BATCHSIZE)
+ break;
+ }
+ if (_Pickler_Write(self, &appends_op, 1) < 0)
+ return -1;
+
+ } while (total < PyList_GET_SIZE(obj));
+
+ return 0;
+}
+
static int
save_list(PicklerObject *self, PyObject *obj)
{
- PyObject *iter;
char header[3];
int len;
int status = 0;
@@ -1551,7 +2168,7 @@ save_list(PicklerObject *self, PyObject *obj)
len = 2;
}
- if (pickler_write(self, header, len) < 0)
+ if (_Pickler_Write(self, header, len) < 0)
goto error;
/* Get list length, and bow out early if empty. */
@@ -1562,14 +2179,24 @@ save_list(PicklerObject *self, PyObject *obj)
goto error;
if (len != 0) {
- /* Save the list elements. */
- iter = PyObject_GetIter(obj);
- if (iter == NULL)
- goto error;
- status = batch_list(self, iter);
- Py_DECREF(iter);
- }
+ /* Materialize the list elements. */
+ if (PyList_CheckExact(obj) && self->proto > 0) {
+ if (Py_EnterRecursiveCall(" while pickling an object") == 0) {
+ status = batch_list_exact(self, obj);
+ Py_LeaveRecursiveCall();
+ }
+ } else {
+ PyObject *iter = PyObject_GetIter(obj);
+ if (iter == NULL)
+ goto error;
+ if (Py_EnterRecursiveCall(" while pickling an object") == 0) {
+ status = batch_list(self, iter);
+ Py_LeaveRecursiveCall();
+ }
+ Py_DECREF(iter);
+ }
+ }
if (0) {
error:
status = -1;
@@ -1625,7 +2252,7 @@ batch_dict(PicklerObject *self, PyObject *iter)
Py_DECREF(obj);
if (i < 0)
return -1;
- if (pickler_write(self, &setitem_op, 1) < 0)
+ if (_Pickler_Write(self, &setitem_op, 1) < 0)
return -1;
}
return 0;
@@ -1659,7 +2286,7 @@ batch_dict(PicklerObject *self, PyObject *iter)
goto error;
if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
goto error;
- if (pickler_write(self, &setitem_op, 1) < 0)
+ if (_Pickler_Write(self, &setitem_op, 1) < 0)
goto error;
Py_CLEAR(firstitem);
break;
@@ -1668,7 +2295,7 @@ batch_dict(PicklerObject *self, PyObject *iter)
/* More than one item to write */
/* Pump out MARK, items, SETITEMS. */
- if (pickler_write(self, &mark_op, 1) < 0)
+ if (_Pickler_Write(self, &mark_op, 1) < 0)
goto error;
if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
@@ -1684,7 +2311,7 @@ batch_dict(PicklerObject *self, PyObject *iter)
PyErr_SetString(PyExc_TypeError, "dict items "
"iterator must return 2-tuples");
goto error;
- }
+ }
if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
goto error;
@@ -1702,7 +2329,7 @@ batch_dict(PicklerObject *self, PyObject *iter)
}
}
- if (pickler_write(self, &setitems_op, 1) < 0)
+ if (_Pickler_Write(self, &setitems_op, 1) < 0)
goto error;
} while (n == BATCHSIZE);
@@ -1746,7 +2373,7 @@ batch_dict_exact(PicklerObject *self, PyObject *obj)
return -1;
if (save(self, value, 0) < 0)
return -1;
- if (pickler_write(self, &setitem_op, 1) < 0)
+ if (_Pickler_Write(self, &setitem_op, 1) < 0)
return -1;
return 0;
}
@@ -1754,7 +2381,7 @@ batch_dict_exact(PicklerObject *self, PyObject *obj)
/* Write in batches of BATCHSIZE. */
do {
i = 0;
- if (pickler_write(self, &mark_op, 1) < 0)
+ if (_Pickler_Write(self, &mark_op, 1) < 0)
return -1;
while (PyDict_Next(obj, &ppos, &key, &value)) {
if (save(self, key, 0) < 0)
@@ -1764,7 +2391,7 @@ batch_dict_exact(PicklerObject *self, PyObject *obj)
if (++i == BATCHSIZE)
break;
}
- if (pickler_write(self, &setitems_op, 1) < 0)
+ if (_Pickler_Write(self, &setitems_op, 1) < 0)
return -1;
if (PyDict_Size(obj) != dict_size) {
PyErr_Format(
@@ -1799,7 +2426,7 @@ save_dict(PicklerObject *self, PyObject *obj)
len = 2;
}
- if (pickler_write(self, header, len) < 0)
+ if (_Pickler_Write(self, header, len) < 0)
goto error;
/* Get dict size, and bow out early if empty. */
@@ -1962,7 +2589,7 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name)
n = 5;
}
- if (pickler_write(self, pdata, n) < 0)
+ if (_Pickler_Write(self, pdata, n) < 0)
goto error;
}
else {
@@ -1973,7 +2600,7 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name)
PyObject *(*unicode_encoder)(PyObject *);
gen_global:
- if (pickler_write(self, &global_op, 1) < 0)
+ if (_Pickler_Write(self, &global_op, 1) < 0)
goto error;
/* Since Python 3.0 now supports non-ASCII identifiers, we encode both
@@ -2053,13 +2680,13 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name)
"pickle protocol %i", module_name, self->proto);
goto error;
}
- if (pickler_write(self, PyBytes_AS_STRING(encoded),
+ if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
PyBytes_GET_SIZE(encoded)) < 0) {
Py_DECREF(encoded);
goto error;
}
Py_DECREF(encoded);
- if(pickler_write(self, "\n", 1) < 0)
+ if(_Pickler_Write(self, "\n", 1) < 0)
goto error;
/* Save the name of the module. */
@@ -2071,13 +2698,13 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name)
"pickle protocol %i", global_name, self->proto);
goto error;
}
- if (pickler_write(self, PyBytes_AS_STRING(encoded),
+ if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
PyBytes_GET_SIZE(encoded)) < 0) {
Py_DECREF(encoded);
goto error;
}
Py_DECREF(encoded);
- if(pickler_write(self, "\n", 1) < 0)
+ if(_Pickler_Write(self, "\n", 1) < 0)
goto error;
/* Memoize the object. */
@@ -2106,14 +2733,14 @@ save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
const char binpersid_op = BINPERSID;
Py_INCREF(obj);
- pid = pickler_call(self, func, obj);
+ pid = _Pickler_FastCall(self, func, obj);
if (pid == NULL)
return -1;
if (pid != Py_None) {
if (self->bin) {
if (save(self, pid, 1) < 0 ||
- pickler_write(self, &binpersid_op, 1) < 0)
+ _Pickler_Write(self, &binpersid_op, 1) < 0)
goto error;
}
else {
@@ -2133,9 +2760,9 @@ save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
if (pid_ascii_bytes == NULL)
goto error;
- if (pickler_write(self, &persid_op, 1) < 0 ||
- pickler_write(self, pid_ascii_bytes, size) < 0 ||
- pickler_write(self, "\n", 1) < 0)
+ if (_Pickler_Write(self, &persid_op, 1) < 0 ||
+ _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
+ _Pickler_Write(self, "\n", 1) < 0)
goto error;
}
status = 1;
@@ -2220,6 +2847,8 @@ save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
if (newobj_str == NULL) {
newobj_str = PyUnicode_InternFromString("__newobj__");
+ if (newobj_str == NULL)
+ return -1;
}
name_str = PyObject_GetAttrString(callable, "__name__");
@@ -2312,13 +2941,13 @@ save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
return -1;
/* Add NEWOBJ opcode. */
- if (pickler_write(self, &newobj_op, 1) < 0)
+ if (_Pickler_Write(self, &newobj_op, 1) < 0)
return -1;
}
else { /* Not using NEWOBJ. */
if (save(self, callable, 0) < 0 ||
save(self, argtup, 0) < 0 ||
- pickler_write(self, &reduce_op, 1) < 0)
+ _Pickler_Write(self, &reduce_op, 1) < 0)
return -1;
}
@@ -2337,7 +2966,7 @@ save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
if (state) {
if (save(self, state, 0) < 0 ||
- pickler_write(self, &build_op, 1) < 0)
+ _Pickler_Write(self, &build_op, 1) < 0)
return -1;
}
@@ -2350,7 +2979,6 @@ save(PicklerObject *self, PyObject *obj, int pers_save)
PyTypeObject *type;
PyObject *reduce_func = NULL;
PyObject *reduce_value = NULL;
- PyObject *memo_key = NULL;
int status = 0;
if (Py_EnterRecursiveCall(" while pickling an object") < 0)
@@ -2370,11 +2998,10 @@ save(PicklerObject *self, PyObject *obj, int pers_save)
type = Py_TYPE(obj);
- /* XXX: The old cPickle had an optimization that used switch-case
- statement dispatching on the first letter of the type name. It was
- probably not a bad idea after all. If benchmarks shows that particular
- optimization had some real benefits, it would be nice to add it
- back. */
+ /* The old cPickle had an optimization that used switch-case statement
+ dispatching on the first letter of the type name. This has was removed
+ since benchmarks shown that this optimization was actually slowing
+ things down. */
/* Atom types; these aren't memoized, so don't check the memo. */
@@ -2398,11 +3025,8 @@ save(PicklerObject *self, PyObject *obj, int pers_save)
/* Check the memo to see if it has the object. If so, generate
a GET (or BINGET) opcode, instead of pickling the object
once again. */
- memo_key = PyLong_FromVoidPtr(obj);
- if (memo_key == NULL)
- goto error;
- if (PyDict_GetItem(self->memo, memo_key)) {
- if (memo_get(self, memo_key) < 0)
+ if (PyMemoTable_Get(self->memo, obj)) {
+ if (memo_get(self, obj) < 0)
goto error;
goto done;
}
@@ -2465,7 +3089,7 @@ save(PicklerObject *self, PyObject *obj, int pers_save)
*/
Py_INCREF(reduce_func);
Py_INCREF(obj);
- reduce_value = pickler_call(self, reduce_func, obj);
+ reduce_value = _Pickler_FastCall(self, reduce_func, obj);
}
else {
static PyObject *reduce_str = NULL;
@@ -2495,7 +3119,7 @@ save(PicklerObject *self, PyObject *obj, int pers_save)
PyObject *proto;
proto = PyLong_FromLong(self->proto);
if (proto != NULL) {
- reduce_value = pickler_call(self, reduce_func, proto);
+ reduce_value = _Pickler_FastCall(self, reduce_func, proto);
}
}
else {
@@ -2538,7 +3162,6 @@ save(PicklerObject *self, PyObject *obj, int pers_save)
}
done:
Py_LeaveRecursiveCall();
- Py_XDECREF(memo_key);
Py_XDECREF(reduce_func);
Py_XDECREF(reduce_value);
@@ -2556,13 +3179,12 @@ dump(PicklerObject *self, PyObject *obj)
header[0] = PROTO;
assert(self->proto >= 0 && self->proto < 256);
header[1] = (unsigned char)self->proto;
- if (pickler_write(self, header, 2) < 0)
+ if (_Pickler_Write(self, header, 2) < 0)
return -1;
}
if (save(self, obj, 0) < 0 ||
- pickler_write(self, &stop_op, 1) < 0 ||
- pickler_write(self, NULL, 0) < 0)
+ _Pickler_Write(self, &stop_op, 1) < 0)
return -1;
return 0;
@@ -2580,7 +3202,7 @@ static PyObject *
Pickler_clear_memo(PicklerObject *self)
{
if (self->memo)
- PyDict_Clear(self->memo);
+ PyMemoTable_Clear(self->memo);
Py_RETURN_NONE;
}
@@ -2606,9 +3228,15 @@ Pickler_dump(PicklerObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "O:dump", &obj))
return NULL;
+ if (_Pickler_ClearBuffer(self) < 0)
+ return NULL;
+
if (dump(self, obj) < 0)
return NULL;
+ if (_Pickler_FlushToFile(self) < 0)
+ return NULL;
+
Py_RETURN_NONE;
}
@@ -2625,13 +3253,13 @@ Pickler_dealloc(PicklerObject *self)
{
PyObject_GC_UnTrack(self);
+ Py_XDECREF(self->output_buffer);
Py_XDECREF(self->write);
- Py_XDECREF(self->memo);
Py_XDECREF(self->pers_func);
Py_XDECREF(self->arg);
Py_XDECREF(self->fast_memo);
- PyMem_Free(self->write_buf);
+ PyMemoTable_Del(self->memo);
Py_TYPE(self)->tp_free((PyObject *)self);
}
@@ -2640,7 +3268,6 @@ static int
Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
{
Py_VISIT(self->write);
- Py_VISIT(self->memo);
Py_VISIT(self->pers_func);
Py_VISIT(self->arg);
Py_VISIT(self->fast_memo);
@@ -2650,18 +3277,21 @@ Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
static int
Pickler_clear(PicklerObject *self)
{
+ Py_CLEAR(self->output_buffer);
Py_CLEAR(self->write);
- Py_CLEAR(self->memo);
Py_CLEAR(self->pers_func);
Py_CLEAR(self->arg);
Py_CLEAR(self->fast_memo);
- PyMem_Free(self->write_buf);
- self->write_buf = NULL;
-
+ if (self->memo != NULL) {
+ PyMemoTable *memo = self->memo;
+ self->memo = NULL;
+ PyMemoTable_Del(memo);
+ }
return 0;
}
+
PyDoc_STRVAR(Pickler_doc,
"Pickler(file, protocol=None)"
"\n"
@@ -2692,10 +3322,9 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
PyObject *file;
PyObject *proto_obj = NULL;
- long proto = 0;
- int fix_imports = 1;
+ PyObject *fix_imports = Py_True;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:Pickler",
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
kwlist, &file, &proto_obj, &fix_imports))
return -1;
@@ -2703,44 +3332,31 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
if (self->write != NULL)
(void)Pickler_clear(self);
- if (proto_obj == NULL || proto_obj == Py_None)
- proto = DEFAULT_PROTOCOL;
- else {
- proto = PyLong_AsLong(proto_obj);
- if (proto == -1 && PyErr_Occurred())
- return -1;
- }
+ if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
+ return -1;
- if (proto < 0)
- proto = HIGHEST_PROTOCOL;
- if (proto > HIGHEST_PROTOCOL) {
- PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
- HIGHEST_PROTOCOL);
+ if (_Pickler_SetOutputStream(self, file) < 0)
return -1;
+
+ /* memo and output_buffer may have already been created in _Pickler_New */
+ if (self->memo == NULL) {
+ self->memo = PyMemoTable_New();
+ if (self->memo == NULL)
+ return -1;
+ }
+ self->output_len = 0;
+ if (self->output_buffer == NULL) {
+ self->max_output_len = WRITE_BUF_SIZE;
+ self->output_buffer = PyBytes_FromStringAndSize(NULL,
+ self->max_output_len);
+ if (self->output_buffer == NULL)
+ return -1;
}
- self->proto = proto;
- self->bin = proto > 0;
self->arg = NULL;
self->fast = 0;
self->fast_nesting = 0;
self->fast_memo = NULL;
- self->fix_imports = fix_imports && proto < 3;
-
- if (!PyObject_HasAttrString(file, "write")) {
- PyErr_SetString(PyExc_TypeError,
- "file must have a 'write' attribute");
- return -1;
- }
- self->write = PyObject_GetAttrString(file, "write");
- if (self->write == NULL)
- return -1;
- self->buf_size = 0;
- self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
- if (self->write_buf == NULL) {
- PyErr_NoMemory();
- return -1;
- }
self->pers_func = NULL;
if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
self->pers_func = PyObject_GetAttrString((PyObject *)self,
@@ -2748,44 +3364,248 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
if (self->pers_func == NULL)
return -1;
}
- self->memo = PyDict_New();
- if (self->memo == NULL)
- return -1;
+ return 0;
+}
+
+/* Define a proxy object for the Pickler's internal memo object. This is to
+ * avoid breaking code like:
+ * pickler.memo.clear()
+ * and
+ * pickler.memo = saved_memo
+ * Is this a good idea? Not really, but we don't want to break code that uses
+ * it. Note that we don't implement the entire mapping API here. This is
+ * intentional, as these should be treated as black-box implementation details.
+ */
+
+typedef struct {
+ PyObject_HEAD
+ PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
+} PicklerMemoProxyObject;
+
+PyDoc_STRVAR(pmp_clear_doc,
+"memo.clear() -> None. Remove all items from memo.");
+
+static PyObject *
+pmp_clear(PicklerMemoProxyObject *self)
+{
+ if (self->pickler->memo)
+ PyMemoTable_Clear(self->pickler->memo);
+ Py_RETURN_NONE;
+}
+
+PyDoc_STRVAR(pmp_copy_doc,
+"memo.copy() -> new_memo. Copy the memo to a new object.");
+
+static PyObject *
+pmp_copy(PicklerMemoProxyObject *self)
+{
+ Py_ssize_t i;
+ PyMemoTable *memo;
+ PyObject *new_memo = PyDict_New();
+ if (new_memo == NULL)
+ return NULL;
+
+ memo = self->pickler->memo;
+ for (i = 0; i < memo->mt_allocated; ++i) {
+ PyMemoEntry entry = memo->mt_table[i];
+ if (entry.me_key != NULL) {
+ int status;
+ PyObject *key, *value;
+
+ key = PyLong_FromVoidPtr(entry.me_key);
+ value = Py_BuildValue("lO", entry.me_value, entry.me_key);
+
+ if (key == NULL || value == NULL) {
+ Py_XDECREF(key);
+ Py_XDECREF(value);
+ goto error;
+ }
+ status = PyDict_SetItem(new_memo, key, value);
+ Py_DECREF(key);
+ Py_DECREF(value);
+ if (status < 0)
+ goto error;
+ }
+ }
+ return new_memo;
+
+ error:
+ Py_XDECREF(new_memo);
+ return NULL;
+}
+
+PyDoc_STRVAR(pmp_reduce_doc,
+"memo.__reduce__(). Pickling support.");
+
+static PyObject *
+pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
+{
+ PyObject *reduce_value, *dict_args;
+ PyObject *contents = pmp_copy(self);
+ if (contents == NULL)
+ return NULL;
+
+ reduce_value = PyTuple_New(2);
+ if (reduce_value == NULL) {
+ Py_DECREF(contents);
+ return NULL;
+ }
+ dict_args = PyTuple_New(1);
+ if (dict_args == NULL) {
+ Py_DECREF(contents);
+ Py_DECREF(reduce_value);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(dict_args, 0, contents);
+ Py_INCREF((PyObject *)&PyDict_Type);
+ PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
+ PyTuple_SET_ITEM(reduce_value, 1, dict_args);
+ return reduce_value;
+}
+
+static PyMethodDef picklerproxy_methods[] = {
+ {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
+ {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
+ {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
+ {NULL, NULL} /* sentinel */
+};
+
+static void
+PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
+{
+ PyObject_GC_UnTrack(self);
+ Py_XDECREF(self->pickler);
+ PyObject_GC_Del((PyObject *)self);
+}
+static int
+PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
+ visitproc visit, void *arg)
+{
+ Py_VISIT(self->pickler);
return 0;
}
+static int
+PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
+{
+ Py_CLEAR(self->pickler);
+ return 0;
+}
+
+static PyTypeObject PicklerMemoProxyType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "_pickle.PicklerMemoProxy", /*tp_name*/
+ sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
+ 0,
+ (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ (hashfunc)PyObject_HashNotImplemented, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ PyObject_GenericSetAttr, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+ 0, /* tp_doc */
+ (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
+ (inquiry)PicklerMemoProxy_clear, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ picklerproxy_methods, /* tp_methods */
+};
+
+static PyObject *
+PicklerMemoProxy_New(PicklerObject *pickler)
+{
+ PicklerMemoProxyObject *self;
+
+ self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
+ if (self == NULL)
+ return NULL;
+ Py_INCREF(pickler);
+ self->pickler = pickler;
+ PyObject_GC_Track(self);
+ return (PyObject *)self;
+}
+
+/*****************************************************************************/
+
static PyObject *
Pickler_get_memo(PicklerObject *self)
{
- if (self->memo == NULL)
- PyErr_SetString(PyExc_AttributeError, "memo");
- else
- Py_INCREF(self->memo);
- return self->memo;
+ return PicklerMemoProxy_New(self);
}
static int
-Pickler_set_memo(PicklerObject *self, PyObject *value)
+Pickler_set_memo(PicklerObject *self, PyObject *obj)
{
- PyObject *tmp;
+ PyMemoTable *new_memo = NULL;
- if (value == NULL) {
+ if (obj == NULL) {
PyErr_SetString(PyExc_TypeError,
"attribute deletion is not supported");
return -1;
}
- if (!PyDict_Check(value)) {
- PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
+
+ if (Py_TYPE(obj) == &PicklerMemoProxyType) {
+ PicklerObject *pickler =
+ ((PicklerMemoProxyObject *)obj)->pickler;
+
+ new_memo = PyMemoTable_Copy(pickler->memo);
+ if (new_memo == NULL)
+ return -1;
+ }
+ else if (PyDict_Check(obj)) {
+ Py_ssize_t i = 0;
+ PyObject *key, *value;
+
+ new_memo = PyMemoTable_New();
+ if (new_memo == NULL)
+ return -1;
+
+ while (PyDict_Next(obj, &i, &key, &value)) {
+ long memo_id;
+ PyObject *memo_obj;
+
+ if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
+ PyErr_SetString(PyExc_TypeError,
+ "'memo' values must be 2-item tuples");
+ goto error;
+ }
+ memo_id = PyLong_AsLong(PyTuple_GET_ITEM(value, 0));
+ if (memo_id == -1 && PyErr_Occurred())
+ goto error;
+ memo_obj = PyTuple_GET_ITEM(value, 1);
+ if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
+ goto error;
+ }
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "'memo' attribute must be an PicklerMemoProxy object"
+ "or dict, not %.200s", Py_TYPE(obj)->tp_name);
return -1;
}
- tmp = self->memo;
- Py_INCREF(value);
- self->memo = value;
- Py_XDECREF(tmp);
+ PyMemoTable_Del(self->memo);
+ self->memo = new_memo;
return 0;
+
+ error:
+ if (new_memo)
+ PyMemoTable_Del(new_memo);
+ return -1;
}
static PyObject *
@@ -2926,21 +3746,22 @@ load_int(UnpicklerObject *self)
Py_ssize_t len;
long x;
- if ((len = unpickler_readline(self, &s)) < 0)
+ if ((len = _Unpickler_Readline(self, &s)) < 0)
return -1;
if (len < 2)
return bad_readline();
errno = 0;
- /* XXX: Should the base argument of strtol() be explicitly set to 10? */
+ /* XXX: Should the base argument of strtol() be explicitly set to 10?
+ XXX(avassalotti): Should this uses PyOS_strtol()? */
x = strtol(s, &endptr, 0);
- if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
+ if (errno || (*endptr != '\n' && *endptr != '\0')) {
/* Hm, maybe we've got something long. Let's try reading
* it as a Python long object. */
errno = 0;
/* XXX: Same thing about the base here. */
- value = PyLong_FromString(s, NULL, 0);
+ value = PyLong_FromString(s, NULL, 0);
if (value == NULL) {
PyErr_SetString(PyExc_ValueError,
"could not convert string to int");
@@ -3017,7 +3838,7 @@ load_binint(UnpicklerObject *self)
{
char *s;
- if (unpickler_read(self, &s, 4) < 0)
+ if (_Unpickler_Read(self, &s, 4) < 0)
return -1;
return load_binintx(self, s, 4);
@@ -3028,7 +3849,7 @@ load_binint1(UnpicklerObject *self)
{
char *s;
- if (unpickler_read(self, &s, 1) < 0)
+ if (_Unpickler_Read(self, &s, 1) < 0)
return -1;
return load_binintx(self, s, 1);
@@ -3039,7 +3860,7 @@ load_binint2(UnpicklerObject *self)
{
char *s;
- if (unpickler_read(self, &s, 2) < 0)
+ if (_Unpickler_Read(self, &s, 2) < 0)
return -1;
return load_binintx(self, s, 2);
@@ -3052,7 +3873,7 @@ load_long(UnpicklerObject *self)
char *s;
Py_ssize_t len;
- if ((len = unpickler_readline(self, &s)) < 0)
+ if ((len = _Unpickler_Readline(self, &s)) < 0)
return -1;
if (len < 2)
return bad_readline();
@@ -3061,9 +3882,8 @@ load_long(UnpicklerObject *self)
the 'L' before calling PyLong_FromString. In order to maintain
compatibility with Python 3.0.0, we don't actually *require*
the 'L' to be present. */
- if (s[len-2] == 'L') {
+ if (s[len-2] == 'L')
s[len-2] = '\0';
- }
/* XXX: Should the base argument explicitly set to 10? */
value = PyLong_FromString(s, NULL, 0);
if (value == NULL)
@@ -3084,7 +3904,7 @@ load_counted_long(UnpicklerObject *self, int size)
char *pdata;
assert(size == 1 || size == 4);
- if (unpickler_read(self, &nbytes, size) < 0)
+ if (_Unpickler_Read(self, &nbytes, size) < 0)
return -1;
size = calc_binint(nbytes, size);
@@ -3099,7 +3919,7 @@ load_counted_long(UnpicklerObject *self, int size)
value = PyLong_FromLong(0L);
else {
/* Read the raw little-endian bytes and convert. */
- if (unpickler_read(self, &pdata, size) < 0)
+ if (_Unpickler_Read(self, &pdata, size) < 0)
return -1;
value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
1 /* little endian */ , 1 /* signed */ );
@@ -3118,7 +3938,7 @@ load_float(UnpicklerObject *self)
Py_ssize_t len;
double d;
- if ((len = unpickler_readline(self, &s)) < 0)
+ if ((len = _Unpickler_Readline(self, &s)) < 0)
return -1;
if (len < 2)
return bad_readline();
@@ -3127,7 +3947,7 @@ load_float(UnpicklerObject *self)
d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
if (d == -1.0 && PyErr_Occurred())
return -1;
- if ((endptr[0] != '\n') || (endptr[1] != '\0')) {
+ if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
PyErr_SetString(PyExc_ValueError, "could not convert string to float");
return -1;
}
@@ -3137,7 +3957,7 @@ load_float(UnpicklerObject *self)
PDATA_PUSH(self->stack, value, -1);
return 0;
- }
+}
static int
load_binfloat(UnpicklerObject *self)
@@ -3146,7 +3966,7 @@ load_binfloat(UnpicklerObject *self)
double x;
char *s;
- if (unpickler_read(self, &s, 8) < 0)
+ if (_Unpickler_Read(self, &s, 8) < 0)
return -1;
x = _PyFloat_Unpack8((unsigned char *)s, 0);
@@ -3168,7 +3988,7 @@ load_string(UnpicklerObject *self)
Py_ssize_t len;
char *s, *p;
- if ((len = unpickler_readline(self, &s)) < 0)
+ if ((len = _Unpickler_Readline(self, &s)) < 0)
return -1;
if (len < 3)
return bad_readline();
@@ -3218,7 +4038,7 @@ load_binbytes(UnpicklerObject *self)
long x;
char *s;
- if (unpickler_read(self, &s, 4) < 0)
+ if (_Unpickler_Read(self, &s, 4) < 0)
return -1;
x = calc_binint(s, 4);
@@ -3228,7 +4048,7 @@ load_binbytes(UnpicklerObject *self)
return -1;
}
- if (unpickler_read(self, &s, x) < 0)
+ if (_Unpickler_Read(self, &s, x) < 0)
return -1;
bytes = PyBytes_FromStringAndSize(s, x);
if (bytes == NULL)
@@ -3245,12 +4065,12 @@ load_short_binbytes(UnpicklerObject *self)
unsigned char x;
char *s;
- if (unpickler_read(self, &s, 1) < 0)
+ if (_Unpickler_Read(self, &s, 1) < 0)
return -1;
x = (unsigned char)s[0];
- if (unpickler_read(self, &s, x) < 0)
+ if (_Unpickler_Read(self, &s, x) < 0)
return -1;
bytes = PyBytes_FromStringAndSize(s, x);
@@ -3268,7 +4088,7 @@ load_binstring(UnpicklerObject *self)
long x;
char *s;
- if (unpickler_read(self, &s, 4) < 0)
+ if (_Unpickler_Read(self, &s, 4) < 0)
return -1;
x = calc_binint(s, 4);
@@ -3278,7 +4098,7 @@ load_binstring(UnpicklerObject *self)
return -1;
}
- if (unpickler_read(self, &s, x) < 0)
+ if (_Unpickler_Read(self, &s, x) < 0)
return -1;
/* Convert Python 2.x strings to unicode. */
@@ -3297,12 +4117,12 @@ load_short_binstring(UnpicklerObject *self)
unsigned char x;
char *s;
- if (unpickler_read(self, &s, 1) < 0)
+ if (_Unpickler_Read(self, &s, 1) < 0)
return -1;
x = (unsigned char)s[0];
- if (unpickler_read(self, &s, x) < 0)
+ if (_Unpickler_Read(self, &s, x) < 0)
return -1;
/* Convert Python 2.x strings to unicode. */
@@ -3321,7 +4141,7 @@ load_unicode(UnpicklerObject *self)
Py_ssize_t len;
char *s;
- if ((len = unpickler_readline(self, &s)) < 0)
+ if ((len = _Unpickler_Readline(self, &s)) < 0)
return -1;
if (len < 1)
return bad_readline();
@@ -3341,7 +4161,7 @@ load_binunicode(UnpicklerObject *self)
long size;
char *s;
- if (unpickler_read(self, &s, 4) < 0)
+ if (_Unpickler_Read(self, &s, 4) < 0)
return -1;
size = calc_binint(s, 4);
@@ -3351,7 +4171,7 @@ load_binunicode(UnpicklerObject *self)
return -1;
}
- if (unpickler_read(self, &s, size) < 0)
+ if (_Unpickler_Read(self, &s, size) < 0)
return -1;
str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
@@ -3445,7 +4265,7 @@ load_dict(UnpicklerObject *self)
if ((i = marker(self)) < 0)
return -1;
- j = self->stack->length;
+ j = Py_SIZE(self->stack);
if ((dict = PyDict_New()) == NULL)
return -1;
@@ -3521,7 +4341,7 @@ load_inst(UnpicklerObject *self)
if ((i = marker(self)) < 0)
return -1;
- if ((len = unpickler_readline(self, &s)) < 0)
+ if ((len = _Unpickler_Readline(self, &s)) < 0)
return -1;
if (len < 2)
return bad_readline();
@@ -3533,7 +4353,7 @@ load_inst(UnpicklerObject *self)
if (module_name == NULL)
return -1;
- if ((len = unpickler_readline(self, &s)) >= 0) {
+ if ((len = _Unpickler_Readline(self, &s)) >= 0) {
if (len < 2)
return bad_readline();
class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
@@ -3619,7 +4439,7 @@ load_global(UnpicklerObject *self)
Py_ssize_t len;
char *s;
- if ((len = unpickler_readline(self, &s)) < 0)
+ if ((len = _Unpickler_Readline(self, &s)) < 0)
return -1;
if (len < 2)
return bad_readline();
@@ -3627,7 +4447,7 @@ load_global(UnpicklerObject *self)
if (!module_name)
return -1;
- if ((len = unpickler_readline(self, &s)) >= 0) {
+ if ((len = _Unpickler_Readline(self, &s)) >= 0) {
if (len < 2) {
Py_DECREF(module_name);
return bad_readline();
@@ -3654,7 +4474,7 @@ load_persid(UnpicklerObject *self)
char *s;
if (self->pers_func) {
- if ((len = unpickler_readline(self, &s)) < 0)
+ if ((len = _Unpickler_Readline(self, &s)) < 0)
return -1;
if (len < 2)
return bad_readline();
@@ -3663,9 +4483,9 @@ load_persid(UnpicklerObject *self)
if (pid == NULL)
return -1;
- /* Ugh... this does not leak since unpickler_call() steals the
+ /* Ugh... this does not leak since _Unpickler_FastCall() steals the
reference to pid first. */
- pid = unpickler_call(self, self->pers_func, pid);
+ pid = _Unpickler_FastCall(self, self->pers_func, pid);
if (pid == NULL)
return -1;
@@ -3690,9 +4510,9 @@ load_binpersid(UnpicklerObject *self)
if (pid == NULL)
return -1;
- /* Ugh... this does not leak since unpickler_call() steals the
+ /* Ugh... this does not leak since _Unpickler_FastCall() steals the
reference to pid first. */
- pid = unpickler_call(self, self->pers_func, pid);
+ pid = _Unpickler_FastCall(self, self->pers_func, pid);
if (pid == NULL)
return -1;
@@ -3710,7 +4530,7 @@ load_binpersid(UnpicklerObject *self)
static int
load_pop(UnpicklerObject *self)
{
- int len = self->stack->length;
+ int len = Py_SIZE(self->stack);
/* Note that we split the (pickle.py) stack into two stacks,
* an object stack and a mark stack. We have to be clever and
@@ -3724,7 +4544,7 @@ load_pop(UnpicklerObject *self)
} else if (len > 0) {
len--;
Py_DECREF(self->stack->data[len]);
- self->stack->length = len;
+ Py_SIZE(self->stack) = len;
} else {
return stack_underflow();
}
@@ -3750,7 +4570,7 @@ load_dup(UnpicklerObject *self)
PyObject *last;
int len;
- if ((len = self->stack->length) <= 0)
+ if ((len = Py_SIZE(self->stack)) <= 0)
return stack_underflow();
last = self->stack->data[len - 1];
PDATA_APPEND(self->stack, last, -1);
@@ -3761,10 +4581,11 @@ static int
load_get(UnpicklerObject *self)
{
PyObject *key, *value;
+ Py_ssize_t idx;
Py_ssize_t len;
char *s;
- if ((len = unpickler_readline(self, &s)) < 0)
+ if ((len = _Unpickler_Readline(self, &s)) < 0)
return -1;
if (len < 2)
return bad_readline();
@@ -3772,8 +4593,13 @@ load_get(UnpicklerObject *self)
key = PyLong_FromString(s, NULL, 10);
if (key == NULL)
return -1;
+ idx = PyLong_AsSsize_t(key);
+ if (idx == -1 && PyErr_Occurred()) {
+ Py_DECREF(key);
+ return -1;
+ }
- value = PyDict_GetItemWithError(self->memo, key);
+ value = _Unpickler_MemoGet(self, idx);
if (value == NULL) {
if (!PyErr_Occurred())
PyErr_SetObject(PyExc_KeyError, key);
@@ -3789,25 +4615,23 @@ load_get(UnpicklerObject *self)
static int
load_binget(UnpicklerObject *self)
{
- PyObject *key, *value;
+ PyObject *value;
+ Py_ssize_t idx;
char *s;
- if (unpickler_read(self, &s, 1) < 0)
+ if (_Unpickler_Read(self, &s, 1) < 0)
return -1;
- /* Here, the unsigned cast is necessary to avoid negative values. */
- key = PyLong_FromLong((long)(unsigned char)s[0]);
- if (key == NULL)
- return -1;
+ idx = Py_CHARMASK(s[0]);
- value = PyDict_GetItemWithError(self->memo, key);
+ value = _Unpickler_MemoGet(self, idx);
if (value == NULL) {
+ PyObject *key = PyLong_FromSsize_t(idx);
if (!PyErr_Occurred())
PyErr_SetObject(PyExc_KeyError, key);
Py_DECREF(key);
return -1;
}
- Py_DECREF(key);
PDATA_APPEND(self->stack, value, -1);
return 0;
@@ -3816,30 +4640,26 @@ load_binget(UnpicklerObject *self)
static int
load_long_binget(UnpicklerObject *self)
{
- PyObject *key, *value;
+ PyObject *value;
+ Py_ssize_t idx;
char *s;
- long k;
- if (unpickler_read(self, &s, 4) < 0)
+ if (_Unpickler_Read(self, &s, 4) < 0)
return -1;
- k = (long)(unsigned char)s[0];
- k |= (long)(unsigned char)s[1] << 8;
- k |= (long)(unsigned char)s[2] << 16;
- k |= (long)(unsigned char)s[3] << 24;
-
- key = PyLong_FromLong(k);
- if (key == NULL)
- return -1;
+ idx = (long)Py_CHARMASK(s[0]);
+ idx |= (long)Py_CHARMASK(s[1]) << 8;
+ idx |= (long)Py_CHARMASK(s[2]) << 16;
+ idx |= (long)Py_CHARMASK(s[3]) << 24;
- value = PyDict_GetItemWithError(self->memo, key);
+ value = _Unpickler_MemoGet(self, idx);
if (value == NULL) {
+ PyObject *key = PyLong_FromSsize_t(idx);
if (!PyErr_Occurred())
PyErr_SetObject(PyExc_KeyError, key);
Py_DECREF(key);
return -1;
}
- Py_DECREF(key);
PDATA_APPEND(self->stack, value, -1);
return 0;
@@ -3859,7 +4679,7 @@ load_extension(UnpicklerObject *self, int nbytes)
PyObject *module_name, *class_name;
assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
- if (unpickler_read(self, &codebytes, nbytes) < 0)
+ if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
return -1;
code = calc_binint(codebytes, nbytes);
if (code <= 0) { /* note that 0 is forbidden */
@@ -3920,75 +4740,68 @@ static int
load_put(UnpicklerObject *self)
{
PyObject *key, *value;
+ Py_ssize_t idx;
Py_ssize_t len;
char *s;
- int x;
- if ((len = unpickler_readline(self, &s)) < 0)
+ if ((len = _Unpickler_Readline(self, &s)) < 0)
return -1;
if (len < 2)
return bad_readline();
- if ((x = self->stack->length) <= 0)
+ if (Py_SIZE(self->stack) <= 0)
return stack_underflow();
+ value = self->stack->data[Py_SIZE(self->stack) - 1];
key = PyLong_FromString(s, NULL, 10);
if (key == NULL)
return -1;
- value = self->stack->data[x - 1];
-
- x = PyDict_SetItem(self->memo, key, value);
+ idx = PyLong_AsSsize_t(key);
Py_DECREF(key);
- return x;
+ if (idx == -1 && PyErr_Occurred())
+ return -1;
+
+ return _Unpickler_MemoPut(self, idx, value);
}
static int
load_binput(UnpicklerObject *self)
{
- PyObject *key, *value;
+ PyObject *value;
+ Py_ssize_t idx;
char *s;
- int x;
- if (unpickler_read(self, &s, 1) < 0)
+ if (_Unpickler_Read(self, &s, 1) < 0)
return -1;
- if ((x = self->stack->length) <= 0)
+
+ if (Py_SIZE(self->stack) <= 0)
return stack_underflow();
+ value = self->stack->data[Py_SIZE(self->stack) - 1];
- key = PyLong_FromLong((long)(unsigned char)s[0]);
- if (key == NULL)
- return -1;
- value = self->stack->data[x - 1];
+ idx = Py_CHARMASK(s[0]);
- x = PyDict_SetItem(self->memo, key, value);
- Py_DECREF(key);
- return x;
+ return _Unpickler_MemoPut(self, idx, value);
}
static int
load_long_binput(UnpicklerObject *self)
{
- PyObject *key, *value;
- long k;
+ PyObject *value;
+ Py_ssize_t idx;
char *s;
- int x;
- if (unpickler_read(self, &s, 4) < 0)
+ if (_Unpickler_Read(self, &s, 4) < 0)
return -1;
- if ((x = self->stack->length) <= 0)
- return stack_underflow();
- k = (long)(unsigned char)s[0];
- k |= (long)(unsigned char)s[1] << 8;
- k |= (long)(unsigned char)s[2] << 16;
- k |= (long)(unsigned char)s[3] << 24;
+ if (Py_SIZE(self->stack) <= 0)
+ return stack_underflow();
+ value = self->stack->data[Py_SIZE(self->stack) - 1];
- key = PyLong_FromLong(k);
- if (key == NULL)
- return -1;
- value = self->stack->data[x - 1];
+ idx = (long)Py_CHARMASK(s[0]);
+ idx |= (long)Py_CHARMASK(s[1]) << 8;
+ idx |= (long)Py_CHARMASK(s[2]) << 16;
+ idx |= (long)Py_CHARMASK(s[3]) << 24;
- x = PyDict_SetItem(self->memo, key, value);
- Py_DECREF(key);
- return x;
+ return _Unpickler_MemoPut(self, idx, value);
}
static int
@@ -3998,7 +4811,7 @@ do_append(UnpicklerObject *self, int x)
PyObject *list;
int len, i;
- len = self->stack->length;
+ len = Py_SIZE(self->stack);
if (x > len || x <= 0)
return stack_underflow();
if (len == x) /* nothing to do */
@@ -4028,15 +4841,15 @@ do_append(UnpicklerObject *self, int x)
PyObject *result;
value = self->stack->data[i];
- result = unpickler_call(self, append_func, value);
+ result = _Unpickler_FastCall(self, append_func, value);
if (result == NULL) {
Pdata_clear(self->stack, i + 1);
- self->stack->length = x;
+ Py_SIZE(self->stack) = x;
return -1;
}
Py_DECREF(result);
}
- self->stack->length = x;
+ Py_SIZE(self->stack) = x;
}
return 0;
@@ -4045,7 +4858,7 @@ do_append(UnpicklerObject *self, int x)
static int
load_append(UnpicklerObject *self)
{
- return do_append(self, self->stack->length - 1);
+ return do_append(self, Py_SIZE(self->stack) - 1);
}
static int
@@ -4062,7 +4875,7 @@ do_setitems(UnpicklerObject *self, int x)
int len, i;
int status = 0;
- len = self->stack->length;
+ len = Py_SIZE(self->stack);
if (x > len || x <= 0)
return stack_underflow();
if (len == x) /* nothing to do */
@@ -4093,7 +4906,7 @@ do_setitems(UnpicklerObject *self, int x)
static int
load_setitem(UnpicklerObject *self)
{
- return do_setitems(self, self->stack->length - 2);
+ return do_setitems(self, Py_SIZE(self->stack) - 2);
}
static int
@@ -4112,14 +4925,14 @@ load_build(UnpicklerObject *self)
/* Stack is ... instance, state. We want to leave instance at
* the stack top, possibly mutated via instance.__setstate__(state).
*/
- if (self->stack->length < 2)
+ if (Py_SIZE(self->stack) < 2)
return stack_underflow();
PDATA_POP(self->stack, state);
if (state == NULL)
return -1;
- inst = self->stack->data[self->stack->length - 1];
+ inst = self->stack->data[Py_SIZE(self->stack) - 1];
setstate = PyObject_GetAttrString(inst, "__setstate__");
if (setstate == NULL) {
@@ -4134,9 +4947,9 @@ load_build(UnpicklerObject *self)
PyObject *result;
/* The explicit __setstate__ is responsible for everything. */
- /* Ugh... this does not leak since unpickler_call() steals the
+ /* Ugh... this does not leak since _Unpickler_FastCall() steals the
reference to state first. */
- result = unpickler_call(self, setstate, state);
+ result = _Unpickler_FastCall(self, setstate, state);
Py_DECREF(setstate);
if (result == NULL)
return -1;
@@ -4249,7 +5062,7 @@ load_mark(UnpicklerObject *self)
self->marks_size = (Py_ssize_t)alloc;
}
- self->marks[self->num_marks++] = self->stack->length;
+ self->marks[self->num_marks++] = Py_SIZE(self->stack);
return 0;
}
@@ -4287,7 +5100,7 @@ load_proto(UnpicklerObject *self)
char *s;
int i;
- if (unpickler_read(self, &s, 1) < 0)
+ if (_Unpickler_Read(self, &s, 1) < 0)
return -1;
i = (unsigned char)s[0];
@@ -4308,7 +5121,7 @@ load(UnpicklerObject *self)
char *s;
self->num_marks = 0;
- if (self->stack->length)
+ if (Py_SIZE(self->stack))
Pdata_clear(self->stack, 0);
/* Convenient macros for the dispatch while-switch loop just below. */
@@ -4319,7 +5132,7 @@ load(UnpicklerObject *self)
case opcode: if (load_func(self, (arg)) < 0) break; continue;
while (1) {
- if (unpickler_read(self, &s, 1) < 0)
+ if (_Unpickler_Read(self, &s, 1) < 0)
break;
switch ((enum opcode)s[0]) {
@@ -4543,13 +5356,17 @@ Unpickler_dealloc(UnpicklerObject *self)
PyObject_GC_UnTrack((PyObject *)self);
Py_XDECREF(self->readline);
Py_XDECREF(self->read);
- Py_XDECREF(self->memo);
Py_XDECREF(self->stack);
Py_XDECREF(self->pers_func);
Py_XDECREF(self->arg);
- Py_XDECREF(self->last_string);
+ if (self->buffer.buf != NULL) {
+ PyBuffer_Release(&self->buffer);
+ self->buffer.buf = NULL;
+ }
+ _Unpickler_MemoCleanup(self);
PyMem_Free(self->marks);
+ PyMem_Free(self->input_line);
free(self->encoding);
free(self->errors);
@@ -4561,11 +5378,9 @@ Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
{
Py_VISIT(self->readline);
Py_VISIT(self->read);
- Py_VISIT(self->memo);
Py_VISIT(self->stack);
Py_VISIT(self->pers_func);
Py_VISIT(self->arg);
- Py_VISIT(self->last_string);
return 0;
}
@@ -4574,14 +5389,19 @@ Unpickler_clear(UnpicklerObject *self)
{
Py_CLEAR(self->readline);
Py_CLEAR(self->read);
- Py_CLEAR(self->memo);
Py_CLEAR(self->stack);
Py_CLEAR(self->pers_func);
Py_CLEAR(self->arg);
- Py_CLEAR(self->last_string);
+ if (self->buffer.buf != NULL) {
+ PyBuffer_Release(&self->buffer);
+ self->buffer.buf = NULL;
+ }
+ _Unpickler_MemoCleanup(self);
PyMem_Free(self->marks);
self->marks = NULL;
+ PyMem_Free(self->input_line);
+ self->input_line = NULL;
free(self->encoding);
self->encoding = NULL;
free(self->errors);
@@ -4618,7 +5438,7 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
PyObject *file;
- int fix_imports = 1;
+ PyObject *fix_imports = Py_True;
char *encoding = NULL;
char *errors = NULL;
@@ -4637,7 +5457,7 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
extra careful in the other Unpickler methods, since a subclass could
forget to call Unpickler.__init__() thus breaking our internal
invariants. */
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iss:Unpickler", kwlist,
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
&file, &fix_imports, &encoding, &errors))
return -1;
@@ -4645,22 +5465,15 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
if (self->read != NULL)
(void)Unpickler_clear(self);
- self->read = PyObject_GetAttrString(file, "read");
- self->readline = PyObject_GetAttrString(file, "readline");
- if (self->readline == NULL || self->read == NULL)
+ if (_Unpickler_SetInputStream(self, file) < 0)
return -1;
- if (encoding == NULL)
- encoding = "ASCII";
- if (errors == NULL)
- errors = "strict";
+ if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
+ return -1;
- self->encoding = strdup(encoding);
- self->errors = strdup(errors);
- if (self->encoding == NULL || self->errors == NULL) {
- PyErr_NoMemory();
+ self->fix_imports = PyObject_IsTrue(fix_imports);
+ if (self->fix_imports == -1)
return -1;
- }
if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
self->pers_func = PyObject_GetAttrString((PyObject *)self,
@@ -4676,49 +5489,269 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
if (self->stack == NULL)
return -1;
- self->memo = PyDict_New();
+ self->memo_size = 32;
+ self->memo = _Unpickler_NewMemo(self->memo_size);
if (self->memo == NULL)
return -1;
- self->last_string = NULL;
self->arg = NULL;
self->proto = 0;
- self->fix_imports = fix_imports;
return 0;
}
+/* Define a proxy object for the Unpickler's internal memo object. This is to
+ * avoid breaking code like:
+ * unpickler.memo.clear()
+ * and
+ * unpickler.memo = saved_memo
+ * Is this a good idea? Not really, but we don't want to break code that uses
+ * it. Note that we don't implement the entire mapping API here. This is
+ * intentional, as these should be treated as black-box implementation details.
+ *
+ * We do, however, have to implement pickling/unpickling support because of
+ * real-world code like cvs2svn.
+ */
+
+typedef struct {
+ PyObject_HEAD
+ UnpicklerObject *unpickler;
+} UnpicklerMemoProxyObject;
+
+PyDoc_STRVAR(ump_clear_doc,
+"memo.clear() -> None. Remove all items from memo.");
+
+static PyObject *
+ump_clear(UnpicklerMemoProxyObject *self)
+{
+ _Unpickler_MemoCleanup(self->unpickler);
+ self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
+ if (self->unpickler->memo == NULL)
+ return NULL;
+ Py_RETURN_NONE;
+}
+
+PyDoc_STRVAR(ump_copy_doc,
+"memo.copy() -> new_memo. Copy the memo to a new object.");
+
+static PyObject *
+ump_copy(UnpicklerMemoProxyObject *self)
+{
+ Py_ssize_t i;
+ PyObject *new_memo = PyDict_New();
+ if (new_memo == NULL)
+ return NULL;
+
+ for (i = 0; i < self->unpickler->memo_size; i++) {
+ int status;
+ PyObject *key, *value;
+
+ value = self->unpickler->memo[i];
+ if (value == NULL)
+ continue;
+
+ key = PyLong_FromSsize_t(i);
+ if (key == NULL)
+ goto error;
+ status = PyDict_SetItem(new_memo, key, value);
+ Py_DECREF(key);
+ if (status < 0)
+ goto error;
+ }
+ return new_memo;
+
+error:
+ Py_DECREF(new_memo);
+ return NULL;
+}
+
+PyDoc_STRVAR(ump_reduce_doc,
+"memo.__reduce__(). Pickling support.");
+
+static PyObject *
+ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
+{
+ PyObject *reduce_value;
+ PyObject *constructor_args;
+ PyObject *contents = ump_copy(self);
+ if (contents == NULL)
+ return NULL;
+
+ reduce_value = PyTuple_New(2);
+ if (reduce_value == NULL) {
+ Py_DECREF(contents);
+ return NULL;
+ }
+ constructor_args = PyTuple_New(1);
+ if (constructor_args == NULL) {
+ Py_DECREF(contents);
+ Py_DECREF(reduce_value);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(constructor_args, 0, contents);
+ Py_INCREF((PyObject *)&PyDict_Type);
+ PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
+ PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
+ return reduce_value;
+}
+
+static PyMethodDef unpicklerproxy_methods[] = {
+ {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
+ {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
+ {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
+ {NULL, NULL} /* sentinel */
+};
+
+static void
+UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
+{
+ PyObject_GC_UnTrack(self);
+ Py_XDECREF(self->unpickler);
+ PyObject_GC_Del((PyObject *)self);
+}
+
+static int
+UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
+ visitproc visit, void *arg)
+{
+ Py_VISIT(self->unpickler);
+ return 0;
+}
+
+static int
+UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
+{
+ Py_CLEAR(self->unpickler);
+ return 0;
+}
+
+static PyTypeObject UnpicklerMemoProxyType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "_pickle.UnpicklerMemoProxy", /*tp_name*/
+ sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
+ 0,
+ (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ (hashfunc)PyObject_HashNotImplemented, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ PyObject_GenericSetAttr, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+ 0, /* tp_doc */
+ (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
+ (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ unpicklerproxy_methods, /* tp_methods */
+};
+
+static PyObject *
+UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
+{
+ UnpicklerMemoProxyObject *self;
+
+ self = PyObject_GC_New(UnpicklerMemoProxyObject,
+ &UnpicklerMemoProxyType);
+ if (self == NULL)
+ return NULL;
+ Py_INCREF(unpickler);
+ self->unpickler = unpickler;
+ PyObject_GC_Track(self);
+ return (PyObject *)self;
+}
+
+/*****************************************************************************/
+
+
static PyObject *
Unpickler_get_memo(UnpicklerObject *self)
{
- if (self->memo == NULL)
- PyErr_SetString(PyExc_AttributeError, "memo");
- else
- Py_INCREF(self->memo);
- return self->memo;
+ return UnpicklerMemoProxy_New(self);
}
static int
-Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
+Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
{
- PyObject *tmp;
+ PyObject **new_memo;
+ Py_ssize_t new_memo_size = 0;
+ Py_ssize_t i;
- if (value == NULL) {
+ if (obj == NULL) {
PyErr_SetString(PyExc_TypeError,
"attribute deletion is not supported");
return -1;
}
- if (!PyDict_Check(value)) {
- PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
+
+ if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
+ UnpicklerObject *unpickler =
+ ((UnpicklerMemoProxyObject *)obj)->unpickler;
+
+ new_memo_size = unpickler->memo_size;
+ new_memo = _Unpickler_NewMemo(new_memo_size);
+ if (new_memo == NULL)
+ return -1;
+
+ for (i = 0; i < new_memo_size; i++) {
+ Py_XINCREF(unpickler->memo[i]);
+ new_memo[i] = unpickler->memo[i];
+ }
+ }
+ else if (PyDict_Check(obj)) {
+ Py_ssize_t i = 0;
+ PyObject *key, *value;
+
+ new_memo_size = PyDict_Size(obj);
+ new_memo = _Unpickler_NewMemo(new_memo_size);
+ if (new_memo == NULL)
+ return -1;
+
+ while (PyDict_Next(obj, &i, &key, &value)) {
+ Py_ssize_t idx;
+ if (!PyLong_Check(key)) {
+ PyErr_SetString(PyExc_TypeError,
+ "memo key must be integers");
+ goto error;
+ }
+ idx = PyLong_AsSsize_t(key);
+ if (idx == -1 && PyErr_Occurred())
+ goto error;
+ if (_Unpickler_MemoPut(self, idx, value) < 0)
+ goto error;
+ }
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "'memo' attribute must be an UnpicklerMemoProxy object"
+ "or dict, not %.200s", Py_TYPE(obj)->tp_name);
return -1;
}
- tmp = self->memo;
- Py_INCREF(value);
- self->memo = value;
- Py_XDECREF(tmp);
+ _Unpickler_MemoCleanup(self);
+ self->memo_size = new_memo_size;
+ self->memo = new_memo;
return 0;
+
+ error:
+ if (new_memo_size) {
+ i = new_memo_size;
+ while (--i >= 0) {
+ Py_XDECREF(new_memo[i]);
+ }
+ PyMem_FREE(new_memo);
+ }
+ return -1;
}
static PyObject *
@@ -4771,7 +5804,7 @@ static PyTypeObject Unpickler_Type = {
(destructor)Unpickler_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
- 0, /*tp_setattr*/
+ 0, /*tp_setattr*/
0, /*tp_reserved*/
0, /*tp_repr*/
0, /*tp_as_number*/
@@ -4806,6 +5839,280 @@ static PyTypeObject Unpickler_Type = {
0, /*tp_is_gc*/
};
+PyDoc_STRVAR(pickle_dump_doc,
+"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
+"\n"
+"Write a pickled representation of obj to the open file object file. This\n"
+"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
+"efficient.\n"
+"\n"
+"The optional protocol argument tells the pickler to use the given protocol;\n"
+"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
+"backward-incompatible protocol designed for Python 3.0.\n"
+"\n"
+"Specifying a negative protocol version selects the highest protocol version\n"
+"supported. The higher the protocol used, the more recent the version of\n"
+"Python needed to read the pickle produced.\n"
+"\n"
+"The file argument must have a write() method that accepts a single bytes\n"
+"argument. It can thus be a file object opened for binary writing, a\n"
+"io.BytesIO instance, or any other custom object that meets this interface.\n"
+"\n"
+"If fix_imports is True and protocol is less than 3, pickle will try to\n"
+"map the new Python 3.x names to the old module names used in Python 2.x,\n"
+"so that the pickle data stream is readable with Python 2.x.\n");
+
+static PyObject *
+pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
+ PyObject *obj;
+ PyObject *file;
+ PyObject *proto = NULL;
+ PyObject *fix_imports = Py_True;
+ PicklerObject *pickler;
+
+ /* fix_imports is a keyword-only argument. */
+ if (Py_SIZE(args) > 3) {
+ PyErr_Format(PyExc_TypeError,
+ "pickle.dump() takes at most 3 positional "
+ "argument (%zd given)", Py_SIZE(args));
+ return NULL;
+ }
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
+ &obj, &file, &proto, &fix_imports))
+ return NULL;
+
+ pickler = _Pickler_New();
+ if (pickler == NULL)
+ return NULL;
+
+ if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
+ goto error;
+
+ if (_Pickler_SetOutputStream(pickler, file) < 0)
+ goto error;
+
+ if (dump(pickler, obj) < 0)
+ goto error;
+
+ if (_Pickler_FlushToFile(pickler) < 0)
+ goto error;
+
+ Py_DECREF(pickler);
+ Py_RETURN_NONE;
+
+ error:
+ Py_XDECREF(pickler);
+ return NULL;
+}
+
+PyDoc_STRVAR(pickle_dumps_doc,
+"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
+"\n"
+"Return the pickled representation of the object as a bytes\n"
+"object, instead of writing it to a file.\n"
+"\n"
+"The optional protocol argument tells the pickler to use the given protocol;\n"
+"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
+"backward-incompatible protocol designed for Python 3.0.\n"
+"\n"
+"Specifying a negative protocol version selects the highest protocol version\n"
+"supported. The higher the protocol used, the more recent the version of\n"
+"Python needed to read the pickle produced.\n"
+"\n"
+"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
+"map the new Python 3.x names to the old module names used in Python 2.x,\n"
+"so that the pickle data stream is readable with Python 2.x.\n");
+
+static PyObject *
+pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
+ PyObject *obj;
+ PyObject *proto = NULL;
+ PyObject *result;
+ PyObject *fix_imports = Py_True;
+ PicklerObject *pickler;
+
+ /* fix_imports is a keyword-only argument. */
+ if (Py_SIZE(args) > 2) {
+ PyErr_Format(PyExc_TypeError,
+ "pickle.dumps() takes at most 2 positional "
+ "argument (%zd given)", Py_SIZE(args));
+ return NULL;
+ }
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
+ &obj, &proto, &fix_imports))
+ return NULL;
+
+ pickler = _Pickler_New();
+ if (pickler == NULL)
+ return NULL;
+
+ if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
+ goto error;
+
+ if (dump(pickler, obj) < 0)
+ goto error;
+
+ result = _Pickler_GetString(pickler);
+ Py_DECREF(pickler);
+ return result;
+
+ error:
+ Py_XDECREF(pickler);
+ return NULL;
+}
+
+PyDoc_STRVAR(pickle_load_doc,
+"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
+"\n"
+"Read a pickled object representation from the open file object file and\n"
+"return the reconstituted object hierarchy specified therein. This is\n"
+"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
+"\n"
+"The protocol version of the pickle is detected automatically, so no protocol\n"
+"argument is needed. Bytes past the pickled object's representation are\n"
+"ignored.\n"
+"\n"
+"The argument file must have two methods, a read() method that takes an\n"
+"integer argument, and a readline() method that requires no arguments. Both\n"
+"methods should return bytes. Thus *file* can be a binary file object opened\n"
+"for reading, a BytesIO object, or any other custom object that meets this\n"
+"interface.\n"
+"\n"
+"Optional keyword arguments are fix_imports, encoding and errors,\n"
+"which are used to control compatiblity support for pickle stream generated\n"
+"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
+"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
+"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
+"2.x; these default to 'ASCII' and 'strict', respectively.\n");
+
+static PyObject *
+pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
+ PyObject *file;
+ PyObject *fix_imports = Py_True;
+ PyObject *result;
+ char *encoding = NULL;
+ char *errors = NULL;
+ UnpicklerObject *unpickler;
+
+ /* fix_imports, encoding and errors are a keyword-only argument. */
+ if (Py_SIZE(args) != 1) {
+ PyErr_Format(PyExc_TypeError,
+ "pickle.load() takes exactly one positional "
+ "argument (%zd given)", Py_SIZE(args));
+ return NULL;
+ }
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
+ &file, &fix_imports, &encoding, &errors))
+ return NULL;
+
+ unpickler = _Unpickler_New();
+ if (unpickler == NULL)
+ return NULL;
+
+ if (_Unpickler_SetInputStream(unpickler, file) < 0)
+ goto error;
+
+ if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
+ goto error;
+
+ unpickler->fix_imports = PyObject_IsTrue(fix_imports);
+ if (unpickler->fix_imports == -1)
+ goto error;
+
+ result = load(unpickler);
+ Py_DECREF(unpickler);
+ return result;
+
+ error:
+ Py_XDECREF(unpickler);
+ return NULL;
+}
+
+PyDoc_STRVAR(pickle_loads_doc,
+"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
+"\n"
+"Read a pickled object hierarchy from a bytes object and return the\n"
+"reconstituted object hierarchy specified therein\n"
+"\n"
+"The protocol version of the pickle is detected automatically, so no protocol\n"
+"argument is needed. Bytes past the pickled object's representation are\n"
+"ignored.\n"
+"\n"
+"Optional keyword arguments are fix_imports, encoding and errors, which\n"
+"are used to control compatiblity support for pickle stream generated\n"
+"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
+"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
+"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
+"2.x; these default to 'ASCII' and 'strict', respectively.\n");
+
+static PyObject *
+pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
+ PyObject *input;
+ PyObject *fix_imports = Py_True;
+ PyObject *result;
+ char *encoding = NULL;
+ char *errors = NULL;
+ UnpicklerObject *unpickler;
+
+ /* fix_imports, encoding and errors are a keyword-only argument. */
+ if (Py_SIZE(args) != 1) {
+ PyErr_Format(PyExc_TypeError,
+ "pickle.loads() takes exactly one positional "
+ "argument (%zd given)", Py_SIZE(args));
+ return NULL;
+ }
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
+ &input, &fix_imports, &encoding, &errors))
+ return NULL;
+
+ unpickler = _Unpickler_New();
+ if (unpickler == NULL)
+ return NULL;
+
+ if (_Unpickler_SetStringInput(unpickler, input) < 0)
+ goto error;
+
+ if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
+ goto error;
+
+ unpickler->fix_imports = PyObject_IsTrue(fix_imports);
+ if (unpickler->fix_imports == -1)
+ goto error;
+
+ result = load(unpickler);
+ Py_DECREF(unpickler);
+ return result;
+
+ error:
+ Py_XDECREF(unpickler);
+ return NULL;
+}
+
+
+static struct PyMethodDef pickle_methods[] = {
+ {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
+ pickle_dump_doc},
+ {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
+ pickle_dumps_doc},
+ {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
+ pickle_load_doc},
+ {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
+ pickle_loads_doc},
+ {NULL, NULL} /* sentinel */
+};
+
static int
initmodule(void)
{
@@ -4915,7 +6222,7 @@ static struct PyModuleDef _picklemodule = {
"_pickle",
pickle_module_doc,
-1,
- NULL,
+ pickle_methods,
NULL,
NULL,
NULL,
@@ -4933,6 +6240,10 @@ PyInit__pickle(void)
return NULL;
if (PyType_Ready(&Pdata_Type) < 0)
return NULL;
+ if (PyType_Ready(&PicklerMemoProxyType) < 0)
+ return NULL;
+ if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
+ return NULL;
/* Create the module and add the functions. */
m = PyModule_Create(&_picklemodule);