diff options
Diffstat (limited to 'Modules/_pickle.c')
-rw-r--r-- | Modules/_pickle.c | 3366 |
1 files changed, 2187 insertions, 1179 deletions
diff --git a/Modules/_pickle.c b/Modules/_pickle.c index ba192fb..6445238 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -4,9 +4,33 @@ PyDoc_STRVAR(pickle_module_doc, "Optimized C implementation for the Python pickle module."); +/*[clinic] +module _pickle +class _pickle.Pickler +class _pickle.PicklerMemoProxy +class _pickle.Unpickler +class _pickle.UnpicklerMemoProxy +[clinic]*/ +/*[clinic checksum: da39a3ee5e6b4b0d3255bfef95601890afd80709]*/ + +/*[python] +class PicklerObject_converter(self_converter): + type = "PicklerObject *" + +class PicklerMemoProxyObject_converter(self_converter): + type = "PicklerMemoProxyObject *" + +class UnpicklerObject_converter(self_converter): + type = "UnpicklerObject *" + +class UnpicklerMemoProxyObject_converter(self_converter): + type = "UnpicklerMemoProxyObject *" +[python]*/ +/*[python checksum: da39a3ee5e6b4b0d3255bfef95601890afd80709]*/ + /* Bump this when new opcodes are added to the pickle protocol. */ enum { - HIGHEST_PROTOCOL = 3, + HIGHEST_PROTOCOL = 4, DEFAULT_PROTOCOL = 3 }; @@ -71,19 +95,21 @@ enum opcode { /* Protocol 3 (Python 3.x) */ BINBYTES = 'B', - SHORT_BINBYTES = 'C' + SHORT_BINBYTES = 'C', + + /* Protocol 4 */ + SHORT_BINUNICODE = '\x8c', + BINUNICODE8 = '\x8d', + BINBYTES8 = '\x8e', + EMPTY_SET = '\x8f', + ADDITEMS = '\x90', + FROZENSET = '\x91', + NEWOBJ_EX = '\x92', + STACK_GLOBAL = '\x93', + MEMOIZE = '\x94', + FRAME = '\x95' }; -/* These aren't opcodes -- they're ways to pickle bools before protocol 2 - * so that unpicklers written before bools were introduced unpickle them - * as ints, but unpicklers after can recognize that bools were intended. - * Note that protocol 2 added direct ways to pickle bools. - */ -#undef TRUE -#define TRUE "I01\n" -#undef FALSE -#define FALSE "I00\n" - enum { /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will @@ -98,48 +124,246 @@ enum { /* Initial size of the write buffer of Pickler. */ WRITE_BUF_SIZE = 4096, - /* Maximum size of the write buffer of Pickler when pickling to a - stream. This is ignored for in-memory pickling. */ - MAX_WRITE_BUF_SIZE = 64 * 1024, - /* Prefetch size when unpickling (disabled on unpeekable streams) */ - PREFETCH = 8192 * 16 + PREFETCH = 8192 * 16, + + FRAME_SIZE_TARGET = 64 * 1024, + + FRAME_HEADER_SIZE = 9 }; -/* Exception classes for pickle. These should override the ones defined in - pickle.py, when the C-optimized Pickler and Unpickler are used. */ -static PyObject *PickleError = NULL; -static PyObject *PicklingError = NULL; -static PyObject *UnpicklingError = NULL; - -/* copyreg.dispatch_table, {type_object: pickling_function} */ -static PyObject *dispatch_table = NULL; -/* For EXT[124] opcodes. */ -/* copyreg._extension_registry, {(module_name, function_name): code} */ -static PyObject *extension_registry = NULL; -/* copyreg._inverted_registry, {code: (module_name, function_name)} */ -static PyObject *inverted_registry = NULL; -/* copyreg._extension_cache, {code: object} */ -static PyObject *extension_cache = NULL; - -/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */ -static PyObject *name_mapping_2to3 = NULL; -/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */ -static PyObject *import_mapping_2to3 = NULL; -/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */ -static PyObject *name_mapping_3to2 = NULL; -static PyObject *import_mapping_3to2 = NULL; - -/* XXX: Are these really nescessary? */ -/* As the name says, an empty tuple. */ -static PyObject *empty_tuple = NULL; -/* For looking up name pairs in copyreg._extension_registry. */ -static PyObject *two_tuple = NULL; +/*************************************************************************/ + +/* State of the pickle module, per PEP 3121. */ +typedef struct { + /* Exception classes for pickle. */ + PyObject *PickleError; + PyObject *PicklingError; + PyObject *UnpicklingError; + + /* copyreg.dispatch_table, {type_object: pickling_function} */ + PyObject *dispatch_table; + + /* For the extension opcodes EXT1, EXT2 and EXT4. */ + + /* copyreg._extension_registry, {(module_name, function_name): code} */ + PyObject *extension_registry; + /* copyreg._extension_cache, {code: object} */ + PyObject *extension_cache; + /* copyreg._inverted_registry, {code: (module_name, function_name)} */ + PyObject *inverted_registry; + + /* Import mappings for compatibility with Python 2.x */ + + /* _compat_pickle.NAME_MAPPING, + {(oldmodule, oldname): (newmodule, newname)} */ + PyObject *name_mapping_2to3; + /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */ + PyObject *import_mapping_2to3; + /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */ + PyObject *name_mapping_3to2; + PyObject *import_mapping_3to2; + + /* codecs.encode, used for saving bytes in older protocols */ + PyObject *codecs_encode; +} PickleState; + +/* Forward declaration of the _pickle module definition. */ +static struct PyModuleDef _picklemodule; + +/* Given a module object, get its per-module state. */ +static PickleState * +_Pickle_GetState(PyObject *module) +{ + return (PickleState *)PyModule_GetState(module); +} + +/* Find the module instance imported in the currently running sub-interpreter + and get its state. */ +static PickleState * +_Pickle_GetGlobalState(void) +{ + return _Pickle_GetState(PyState_FindModule(&_picklemodule)); +} + +/* Clear the given pickle module state. */ +static void +_Pickle_ClearState(PickleState *st) +{ + Py_CLEAR(st->PickleError); + Py_CLEAR(st->PicklingError); + Py_CLEAR(st->UnpicklingError); + Py_CLEAR(st->dispatch_table); + Py_CLEAR(st->extension_registry); + Py_CLEAR(st->extension_cache); + Py_CLEAR(st->inverted_registry); + Py_CLEAR(st->name_mapping_2to3); + Py_CLEAR(st->import_mapping_2to3); + Py_CLEAR(st->name_mapping_3to2); + Py_CLEAR(st->import_mapping_3to2); + Py_CLEAR(st->codecs_encode); +} + +/* Initialize the given pickle module state. */ +static int +_Pickle_InitState(PickleState *st) +{ + PyObject *copyreg = NULL; + PyObject *compat_pickle = NULL; + PyObject *codecs = NULL; + + copyreg = PyImport_ImportModule("copyreg"); + if (!copyreg) + goto error; + st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table"); + if (!st->dispatch_table) + goto error; + if (!PyDict_CheckExact(st->dispatch_table)) { + PyErr_Format(PyExc_RuntimeError, + "copyreg.dispatch_table should be a dict, not %.200s", + Py_TYPE(st->dispatch_table)->tp_name); + goto error; + } + st->extension_registry = \ + PyObject_GetAttrString(copyreg, "_extension_registry"); + if (!st->extension_registry) + goto error; + if (!PyDict_CheckExact(st->extension_registry)) { + PyErr_Format(PyExc_RuntimeError, + "copyreg._extension_registry should be a dict, " + "not %.200s", Py_TYPE(st->extension_registry)->tp_name); + goto error; + } + st->inverted_registry = \ + PyObject_GetAttrString(copyreg, "_inverted_registry"); + if (!st->inverted_registry) + goto error; + if (!PyDict_CheckExact(st->inverted_registry)) { + PyErr_Format(PyExc_RuntimeError, + "copyreg._inverted_registry should be a dict, " + "not %.200s", Py_TYPE(st->inverted_registry)->tp_name); + goto error; + } + st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache"); + if (!st->extension_cache) + goto error; + if (!PyDict_CheckExact(st->extension_cache)) { + PyErr_Format(PyExc_RuntimeError, + "copyreg._extension_cache should be a dict, " + "not %.200s", Py_TYPE(st->extension_cache)->tp_name); + goto error; + } + Py_CLEAR(copyreg); + + /* Load the 2.x -> 3.x stdlib module mapping tables */ + compat_pickle = PyImport_ImportModule("_compat_pickle"); + if (!compat_pickle) + goto error; + st->name_mapping_2to3 = \ + PyObject_GetAttrString(compat_pickle, "NAME_MAPPING"); + if (!st->name_mapping_2to3) + goto error; + if (!PyDict_CheckExact(st->name_mapping_2to3)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.NAME_MAPPING should be a dict, not %.200s", + Py_TYPE(st->name_mapping_2to3)->tp_name); + goto error; + } + st->import_mapping_2to3 = \ + PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING"); + if (!st->import_mapping_2to3) + goto error; + if (!PyDict_CheckExact(st->import_mapping_2to3)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.IMPORT_MAPPING should be a dict, " + "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name); + goto error; + } + /* ... and the 3.x -> 2.x mapping tables */ + st->name_mapping_3to2 = \ + PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING"); + if (!st->name_mapping_3to2) + goto error; + if (!PyDict_CheckExact(st->name_mapping_3to2)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, " + "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name); + goto error; + } + st->import_mapping_3to2 = \ + PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING"); + if (!st->import_mapping_3to2) + goto error; + if (!PyDict_CheckExact(st->import_mapping_3to2)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, " + "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name); + goto error; + } + Py_CLEAR(compat_pickle); + + codecs = PyImport_ImportModule("codecs"); + if (codecs == NULL) + goto error; + st->codecs_encode = PyObject_GetAttrString(codecs, "encode"); + if (st->codecs_encode == NULL) { + goto error; + } + if (!PyCallable_Check(st->codecs_encode)) { + PyErr_Format(PyExc_RuntimeError, + "codecs.encode should be a callable, not %.200s", + Py_TYPE(st->codecs_encode)->tp_name); + goto error; + } + Py_CLEAR(codecs); + + return 0; + + error: + Py_CLEAR(copyreg); + Py_CLEAR(compat_pickle); + Py_CLEAR(codecs); + _Pickle_ClearState(st); + return -1; +} + +/* Helper for calling a function with a single argument quickly. + + This function steals the reference of the given argument. */ +static PyObject * +_Pickle_FastCall(PyObject *func, PyObject *obj) +{ + PyObject *result; + PyObject *arg_tuple = PyTuple_New(1); + + /* Note: this function used to reuse the argument tuple. This used to give + a slight performance boost with older pickle implementations where many + unbuffered reads occurred (thus needing many function calls). + + However, this optimization was removed because it was too complicated + to get right. It abused the C API for tuples to mutate them which led + to subtle reference counting and concurrency bugs. Furthermore, the + introduction of protocol 4 and the prefetching optimization via peek() + significantly reduced the number of function calls we do. Thus, the + benefits became marginal at best. */ + + if (arg_tuple == NULL) { + Py_DECREF(obj); + return NULL; + } + PyTuple_SET_ITEM(arg_tuple, 0, obj); + result = PyObject_Call(func, arg_tuple, NULL); + Py_CLEAR(arg_tuple); + return result; +} + +/*************************************************************************/ static int stack_underflow(void) { - PyErr_SetString(UnpicklingError, "unpickling stack underflow"); + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->UnpicklingError, "unpickling stack underflow"); return -1; } @@ -240,8 +464,9 @@ Pdata_grow(Pdata *self) static PyObject * Pdata_pop(Pdata *self) { + PickleState *st = _Pickle_GetGlobalState(); if (Py_SIZE(self) == 0) { - PyErr_SetString(UnpicklingError, "bad pickle data"); + PyErr_SetString(st->UnpicklingError, "bad pickle data"); return NULL; } return self->data[--Py_SIZE(self)]; @@ -320,7 +545,6 @@ typedef struct PicklerObject { pickling. */ PyObject *pers_func; /* persistent_id() method, can be NULL */ PyObject *dispatch_table; /* private dispatch_table, can be NULL */ - PyObject *arg; PyObject *write; /* write() method of the output stream. */ PyObject *output_buffer; /* Write into a local bytearray buffer before @@ -329,7 +553,12 @@ typedef struct PicklerObject { Py_ssize_t max_output_len; /* Allocation size of output_buffer. */ int proto; /* Pickle protocol number, >= 0 */ int bin; /* Boolean, true if proto > 0 */ - Py_ssize_t buf_size; /* Size of the current buffered pickle data */ + int framing; /* True when framing is enabled, proto >= 4 */ + Py_ssize_t frame_start; /* Position in output_buffer where the + where the current frame begins. -1 if there + is no frame currently open. */ + + Py_ssize_t buf_size; /* Size of the current buffered pickle data */ int fast; /* Enable fast mode if set to a true value. The fast mode disable the usage of memo, therefore speeding the pickling process by @@ -349,9 +578,9 @@ typedef struct UnpicklerObject { /* The unpickler memo is just an array of PyObject *s. Using a dict is unnecessary, since the keys are contiguous ints. */ PyObject **memo; - Py_ssize_t memo_size; + Py_ssize_t memo_size; /* Capacity of the memo array */ + Py_ssize_t memo_len; /* Number of objects in the memo */ - PyObject *arg; PyObject *pers_func; /* persistent_load() method, can be NULL. */ Py_buffer buffer; @@ -360,6 +589,7 @@ typedef struct UnpicklerObject { Py_ssize_t input_len; Py_ssize_t next_read_idx; Py_ssize_t prefetched_idx; /* index of first prefetched byte */ + PyObject *read; /* read() method of the input stream. */ PyObject *readline; /* readline() method of the input stream. */ PyObject *peek; /* peek() method of the input stream, or NULL */ @@ -436,6 +666,7 @@ PyMemoTable_Copy(PyMemoTable *self) new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry)); if (new->mt_table == NULL) { PyMem_FREE(new); + PyErr_NoMemory(); return NULL; } for (i = 0; i < self->mt_allocated; i++) { @@ -529,7 +760,7 @@ _PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size) oldtable = self->mt_table; self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry)); if (self->mt_table == NULL) { - PyMem_FREE(oldtable); + self->mt_table = oldtable; PyErr_NoMemory(); return -1; } @@ -605,60 +836,6 @@ PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value) /*************************************************************************/ -/* Helpers for creating the argument tuple passed to functions. This has the - performance advantage of calling PyTuple_New() only once. - - XXX(avassalotti): Inline directly in _Pickler_FastCall() and - _Unpickler_FastCall(). */ -#define ARG_TUP(self, obj) do { \ - if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \ - Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \ - PyTuple_SET_ITEM((self)->arg, 0, (obj)); \ - } \ - else { \ - Py_DECREF((obj)); \ - } \ - } while (0) - -#define FREE_ARG_TUP(self) do { \ - if ((self)->arg->ob_refcnt > 1) \ - Py_CLEAR((self)->arg); \ - } while (0) - -/* A temporary cleaner API for fast single argument function call. - - XXX: Does caching the argument tuple provides any real performance benefits? - - A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with - glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls - when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then - immediately DECREF it) and 1,200,000 calls when allocating brand new tuples - (i.e, call PyTuple_New() and store the returned value in an array), to save - one second (wall clock time). Either ways, the loading time a pickle stream - large enough to generate this number of calls would be massively - overwhelmed by other factors, like I/O throughput, the GC traversal and - object allocation overhead. So, I really doubt these functions provide any - real benefits. - - On the other hand, oprofile reports that pickle spends a lot of time in - these functions. But, that is probably more related to the function call - overhead, than the argument tuple allocation. - - XXX: And, what is the reference behavior of these? Steal, borrow? At first - glance, it seems to steal the reference of 'arg' and borrow the reference - of 'func'. */ -static PyObject * -_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg) -{ - PyObject *result = NULL; - - ARG_TUP(self, arg); - if (self->arg) { - result = PyObject_Call(func, self->arg, NULL); - FREE_ARG_TUP(self); - } - return result; -} static int _Pickler_ClearBuffer(PicklerObject *self) @@ -669,15 +846,71 @@ _Pickler_ClearBuffer(PicklerObject *self) if (self->output_buffer == NULL) return -1; self->output_len = 0; + self->frame_start = -1; return 0; } +static void +_write_size64(char *out, size_t value) +{ + int i; + + assert(sizeof(size_t) <= 8); + + for (i = 0; i < sizeof(size_t); i++) { + out[i] = (unsigned char)((value >> (8 * i)) & 0xff); + } + for (i = sizeof(size_t); i < 8; i++) { + out[i] = 0; + } +} + +static void +_Pickler_WriteFrameHeader(PicklerObject *self, char *qdata, size_t frame_len) +{ + qdata[0] = FRAME; + _write_size64(qdata + 1, frame_len); +} + +static int +_Pickler_CommitFrame(PicklerObject *self) +{ + size_t frame_len; + char *qdata; + + if (!self->framing || self->frame_start == -1) + return 0; + frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE; + qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start; + _Pickler_WriteFrameHeader(self, qdata, frame_len); + self->frame_start = -1; + return 0; +} + +static int +_Pickler_OpcodeBoundary(PicklerObject *self) +{ + Py_ssize_t frame_len; + + if (!self->framing || self->frame_start == -1) + return 0; + frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE; + if (frame_len >= FRAME_SIZE_TARGET) + return _Pickler_CommitFrame(self); + else + return 0; +} + static PyObject * _Pickler_GetString(PicklerObject *self) { PyObject *output_buffer = self->output_buffer; assert(self->output_buffer != NULL); + + if (_Pickler_CommitFrame(self)) + return NULL; + self->output_buffer = NULL; /* Resize down to exact size */ if (_PyBytes_Resize(&output_buffer, self->output_len) < 0) @@ -692,67 +925,64 @@ _Pickler_FlushToFile(PicklerObject *self) assert(self->write != NULL); + /* This will commit the frame first */ output = _Pickler_GetString(self); if (output == NULL) return -1; - result = _Pickler_FastCall(self, self->write, output); + result = _Pickle_FastCall(self->write, output); Py_XDECREF(result); return (result == NULL) ? -1 : 0; } static Py_ssize_t -_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n) +_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len) { - Py_ssize_t i, required; + Py_ssize_t i, n, required; char *buffer; + int need_new_frame; assert(s != NULL); + need_new_frame = (self->framing && self->frame_start == -1); + + if (need_new_frame) + n = data_len + FRAME_HEADER_SIZE; + else + n = data_len; required = self->output_len + n; if (required > self->max_output_len) { - if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) { - /* XXX This reallocates a new buffer every time, which is a bit - wasteful. */ - if (_Pickler_FlushToFile(self) < 0) - return -1; - if (_Pickler_ClearBuffer(self) < 0) - return -1; - } - if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) { - /* we already flushed above, so the buffer is empty */ - PyObject *result; - /* XXX we could spare an intermediate copy and pass - a memoryview instead */ - PyObject *output = PyBytes_FromStringAndSize(s, n); - if (s == NULL) - return -1; - result = _Pickler_FastCall(self, self->write, output); - Py_XDECREF(result); - return (result == NULL) ? -1 : 0; - } - else { - if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) { - PyErr_NoMemory(); - return -1; - } - self->max_output_len = (self->output_len + n) / 2 * 3; - if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0) - return -1; + /* Make place in buffer for the pickle chunk */ + if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) { + PyErr_NoMemory(); + return -1; } + self->max_output_len = (self->output_len + n) / 2 * 3; + if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0) + return -1; } buffer = PyBytes_AS_STRING(self->output_buffer); - if (n < 8) { + if (need_new_frame) { + /* Setup new frame */ + Py_ssize_t frame_start = self->output_len; + self->frame_start = frame_start; + for (i = 0; i < FRAME_HEADER_SIZE; i++) { + /* Write an invalid value, for debugging */ + buffer[frame_start + i] = 0xFE; + } + self->output_len += FRAME_HEADER_SIZE; + } + if (data_len < 8) { /* This is faster than memcpy when the string is short. */ - for (i = 0; i < n; i++) { + for (i = 0; i < data_len; i++) { buffer[self->output_len + i] = s[i]; } } else { - memcpy(buffer + self->output_len, s, n); + memcpy(buffer + self->output_len, s, data_len); } - self->output_len += n; - return n; + self->output_len += data_len; + return data_len; } static PicklerObject * @@ -766,25 +996,23 @@ _Pickler_New(void) self->pers_func = NULL; self->dispatch_table = NULL; - self->arg = NULL; self->write = NULL; self->proto = 0; self->bin = 0; + self->framing = 0; + self->frame_start = -1; self->fast = 0; self->fast_nesting = 0; self->fix_imports = 0; self->fast_memo = NULL; - - self->memo = PyMemoTable_New(); - if (self->memo == NULL) { - Py_DECREF(self); - return NULL; - } self->max_output_len = WRITE_BUF_SIZE; self->output_len = 0; + + self->memo = PyMemoTable_New(); self->output_buffer = PyBytes_FromStringAndSize(NULL, self->max_output_len); - if (self->output_buffer == NULL) { + + if (self->memo == NULL || self->output_buffer == NULL) { Py_DECREF(self); return NULL; } @@ -792,34 +1020,29 @@ _Pickler_New(void) } static int -_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj, - PyObject *fix_imports_obj) +_Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports) { - long proto = 0; - int fix_imports; + long proto; - if (proto_obj == NULL || proto_obj == Py_None) + if (protocol == NULL || protocol == Py_None) { proto = DEFAULT_PROTOCOL; + } else { - proto = PyLong_AsLong(proto_obj); - if (proto == -1 && PyErr_Occurred()) + proto = PyLong_AsLong(protocol); + if (proto < 0) { + if (proto == -1 && PyErr_Occurred()) + return -1; + proto = HIGHEST_PROTOCOL; + } + else if (proto > HIGHEST_PROTOCOL) { + PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d", + HIGHEST_PROTOCOL); return -1; + } } - if (proto < 0) - proto = HIGHEST_PROTOCOL; - if (proto > HIGHEST_PROTOCOL) { - PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d", - HIGHEST_PROTOCOL); - return -1; - } - fix_imports = PyObject_IsTrue(fix_imports_obj); - if (fix_imports == -1) - return -1; - - self->proto = proto; + self->proto = (int)proto; self->bin = proto > 0; self->fix_imports = fix_imports && proto < 3; - return 0; } @@ -841,20 +1064,6 @@ _Pickler_SetOutputStream(PicklerObject *self, PyObject *file) return 0; } -/* See documentation for _Pickler_FastCall(). */ -static PyObject * -_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg) -{ - PyObject *result = NULL; - - ARG_TUP(self, arg); - if (self->arg) { - result = PyObject_Call(func, self->arg, NULL); - FREE_ARG_TUP(self); - } - return result; -} - /* Returns the size of the input on success, -1 on failure. This takes its own reference to `input`. */ static Py_ssize_t @@ -874,18 +1083,21 @@ _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input) static int _Unpickler_SkipConsumed(UnpicklerObject *self) { - Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx; + Py_ssize_t consumed; + PyObject *r; - if (consumed > 0) { - PyObject *r; - assert(self->peek); /* otherwise we did something wrong */ - /* This makes an useless copy... */ - r = PyObject_CallFunction(self->read, "n", consumed); - if (r == NULL) - return -1; - Py_DECREF(r); - self->prefetched_idx = self->next_read_idx; - } + consumed = self->next_read_idx - self->prefetched_idx; + if (consumed <= 0) + return 0; + + assert(self->peek); /* otherwise we did something wrong */ + /* This makes an useless copy... */ + r = PyObject_CallFunction(self->read, "n", consumed); + if (r == NULL) + return -1; + Py_DECREF(r); + + self->prefetched_idx = self->next_read_idx; return 0; } @@ -909,56 +1121,51 @@ static Py_ssize_t _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n) { PyObject *data; - Py_ssize_t read_size, prefetched_size = 0; + Py_ssize_t read_size; assert(self->read != NULL); if (_Unpickler_SkipConsumed(self) < 0) return -1; - if (n == READ_WHOLE_LINE) + if (n == READ_WHOLE_LINE) { + PyObject *empty_tuple = PyTuple_New(0); data = PyObject_Call(self->readline, empty_tuple, NULL); - else { - PyObject *len = PyLong_FromSsize_t(n); - if (len == NULL) - return -1; - data = _Unpickler_FastCall(self, self->read, len); + Py_DECREF(empty_tuple); } - if (data == NULL) - return -1; - - /* Prefetch some data without advancing the file pointer, if possible */ - if (self->peek) { - PyObject *len, *prefetched; - len = PyLong_FromSsize_t(PREFETCH); - if (len == NULL) { - Py_DECREF(data); - return -1; - } - prefetched = _Unpickler_FastCall(self, self->peek, len); - if (prefetched == NULL) { - if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) { + else { + PyObject *len; + /* Prefetch some data without advancing the file pointer, if possible */ + if (self->peek && n < PREFETCH) { + len = PyLong_FromSsize_t(PREFETCH); + if (len == NULL) + return -1; + data = _Pickle_FastCall(self->peek, len); + if (data == NULL) { + if (!PyErr_ExceptionMatches(PyExc_NotImplementedError)) + return -1; /* peek() is probably not supported by the given file object */ PyErr_Clear(); Py_CLEAR(self->peek); } else { + read_size = _Unpickler_SetStringInput(self, data); Py_DECREF(data); - return -1; + self->prefetched_idx = 0; + if (n <= read_size) + return n; } } - else { - assert(PyBytes_Check(prefetched)); - prefetched_size = PyBytes_GET_SIZE(prefetched); - PyBytes_ConcatAndDel(&data, prefetched); - if (data == NULL) - return -1; - } + len = PyLong_FromSsize_t(n); + if (len == NULL) + return -1; + data = _Pickle_FastCall(self->read, len); } + if (data == NULL) + return -1; - read_size = _Unpickler_SetStringInput(self, data) - prefetched_size; + read_size = _Unpickler_SetStringInput(self, data); Py_DECREF(data); - self->prefetched_idx = read_size; return read_size; } @@ -1006,8 +1213,10 @@ _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len, char **result) { char *input_line = PyMem_Realloc(self->input_line, len + 1); - if (input_line == NULL) + if (input_line == NULL) { + PyErr_NoMemory(); return -1; + } memcpy(input_line, line, len); input_line[len] = '\0'; @@ -1096,7 +1305,12 @@ _Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value) Py_INCREF(value); old_item = self->memo[idx]; self->memo[idx] = value; - Py_XDECREF(old_item); + if (old_item != NULL) { + Py_DECREF(old_item); + } + else { + self->memo_len++; + } return 0; } @@ -1104,8 +1318,10 @@ static PyObject ** _Unpickler_NewMemo(Py_ssize_t new_size) { PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *)); - if (memo == NULL) + if (memo == NULL) { + PyErr_NoMemory(); return NULL; + } memset(memo, 0, new_size * sizeof(PyObject *)); return memo; } @@ -1136,21 +1352,6 @@ _Unpickler_New(void) if (self == NULL) return NULL; - self->stack = (Pdata *)Pdata_New(); - if (self->stack == NULL) { - Py_DECREF(self); - return NULL; - } - memset(&self->buffer, 0, sizeof(Py_buffer)); - - self->memo_size = 32; - self->memo = _Unpickler_NewMemo(self->memo_size); - if (self->memo == NULL) { - Py_DECREF(self); - return NULL; - } - - self->arg = NULL; self->pers_func = NULL; self->input_buffer = NULL; self->input_line = NULL; @@ -1167,6 +1368,16 @@ _Unpickler_New(void) self->marks_size = 0; self->proto = 0; self->fix_imports = 0; + memset(&self->buffer, 0, sizeof(Py_buffer)); + self->memo_size = 32; + self->memo_len = 0; + self->memo = _Unpickler_NewMemo(self->memo_size); + self->stack = (Pdata *)Pdata_New(); + + if (self->memo == NULL || self->stack == NULL) { + Py_DECREF(self); + return NULL; + } return self; } @@ -1213,8 +1424,8 @@ _Unpickler_SetInputEncoding(UnpicklerObject *self, if (errors == NULL) errors = "strict"; - self->encoding = strdup(encoding); - self->errors = strdup(errors); + self->encoding = _PyMem_Strdup(encoding); + self->errors = _PyMem_Strdup(errors); if (self->encoding == NULL || self->errors == NULL) { PyErr_NoMemory(); return -1; @@ -1257,7 +1468,8 @@ memo_get(PicklerObject *self, PyObject *key) len = 5; } else { /* unlikely */ - PyErr_SetString(PicklingError, + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->PicklingError, "memo id too large for LONG_BINGET"); return -1; } @@ -1274,128 +1486,175 @@ memo_get(PicklerObject *self, PyObject *key) static int memo_put(PicklerObject *self, PyObject *obj) { - Py_ssize_t x; char pdata[30]; Py_ssize_t len; - int status = 0; + Py_ssize_t idx; + + const char memoize_op = MEMOIZE; if (self->fast) return 0; - x = PyMemoTable_Size(self->memo); - if (PyMemoTable_Set(self->memo, obj, x) < 0) - goto error; + idx = PyMemoTable_Size(self->memo); + if (PyMemoTable_Set(self->memo, obj, idx) < 0) + return -1; - if (!self->bin) { + if (self->proto >= 4) { + if (_Pickler_Write(self, &memoize_op, 1) < 0) + return -1; + return 0; + } + else if (!self->bin) { pdata[0] = PUT; PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, - "%" PY_FORMAT_SIZE_T "d\n", x); + "%" PY_FORMAT_SIZE_T "d\n", idx); len = strlen(pdata); } else { - if (x < 256) { + if (idx < 256) { pdata[0] = BINPUT; - pdata[1] = (unsigned char)x; + pdata[1] = (unsigned char)idx; len = 2; } - else if (x <= 0xffffffffL) { + else if (idx <= 0xffffffffL) { pdata[0] = LONG_BINPUT; - pdata[1] = (unsigned char)(x & 0xff); - pdata[2] = (unsigned char)((x >> 8) & 0xff); - pdata[3] = (unsigned char)((x >> 16) & 0xff); - pdata[4] = (unsigned char)((x >> 24) & 0xff); + pdata[1] = (unsigned char)(idx & 0xff); + pdata[2] = (unsigned char)((idx >> 8) & 0xff); + pdata[3] = (unsigned char)((idx >> 16) & 0xff); + pdata[4] = (unsigned char)((idx >> 24) & 0xff); len = 5; } else { /* unlikely */ - PyErr_SetString(PicklingError, + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->PicklingError, "memo id too large for LONG_BINPUT"); return -1; } } - if (_Pickler_Write(self, pdata, len) < 0) - goto error; + return -1; - if (0) { - error: - status = -1; - } + return 0; +} - return status; +static PyObject * +getattribute(PyObject *obj, PyObject *name, int allow_qualname) { + PyObject *dotted_path; + Py_ssize_t i; + _Py_static_string(PyId_dot, "."); + _Py_static_string(PyId_locals, "<locals>"); + + dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1); + if (dotted_path == NULL) { + return NULL; + } + assert(Py_SIZE(dotted_path) >= 1); + if (!allow_qualname && Py_SIZE(dotted_path) > 1) { + PyErr_Format(PyExc_AttributeError, + "Can't get qualified attribute %R on %R;" + "use protocols >= 4 to enable support", + name, obj); + Py_DECREF(dotted_path); + return NULL; + } + Py_INCREF(obj); + for (i = 0; i < Py_SIZE(dotted_path); i++) { + PyObject *subpath = PyList_GET_ITEM(dotted_path, i); + PyObject *tmp; + PyObject *result = PyUnicode_RichCompare( + subpath, _PyUnicode_FromId(&PyId_locals), Py_EQ); + int is_equal = (result == Py_True); + assert(PyBool_Check(result)); + Py_DECREF(result); + if (is_equal) { + PyErr_Format(PyExc_AttributeError, + "Can't get local attribute %R on %R", name, obj); + Py_DECREF(dotted_path); + Py_DECREF(obj); + return NULL; + } + tmp = PyObject_GetAttr(obj, subpath); + Py_DECREF(obj); + if (tmp == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + PyErr_Format(PyExc_AttributeError, + "Can't get attribute %R on %R", name, obj); + } + Py_DECREF(dotted_path); + return NULL; + } + obj = tmp; + } + Py_DECREF(dotted_path); + return obj; } static PyObject * -whichmodule(PyObject *global, PyObject *global_name) +whichmodule(PyObject *global, PyObject *global_name, int allow_qualname) { - Py_ssize_t i, j; - static PyObject *module_str = NULL; - static PyObject *main_str = NULL; PyObject *module_name; PyObject *modules_dict; PyObject *module; PyObject *obj; + Py_ssize_t i, j; + _Py_IDENTIFIER(__module__); + _Py_IDENTIFIER(modules); + _Py_IDENTIFIER(__main__); - if (module_str == NULL) { - module_str = PyUnicode_InternFromString("__module__"); - if (module_str == NULL) - return NULL; - main_str = PyUnicode_InternFromString("__main__"); - if (main_str == NULL) - return NULL; - } - - module_name = PyObject_GetAttr(global, module_str); + module_name = _PyObject_GetAttrId(global, &PyId___module__); - /* In some rare cases (e.g., bound methods of extension types), - __module__ can be None. If it is so, then search sys.modules - for the module of global. */ - if (module_name == Py_None) { - Py_DECREF(module_name); - goto search; - } - - if (module_name) { - return module_name; - } - if (PyErr_ExceptionMatches(PyExc_AttributeError)) + if (module_name == NULL) { + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) + return NULL; PyErr_Clear(); - else - return NULL; - - search: - modules_dict = PySys_GetObject("modules"); - if (modules_dict == NULL) + } + else { + /* In some rare cases (e.g., bound methods of extension types), + __module__ can be None. If it is so, then search sys.modules for + the module of global. */ + if (module_name != Py_None) + return module_name; + Py_CLEAR(module_name); + } + assert(module_name == NULL); + + modules_dict = _PySys_GetObjectId(&PyId_modules); + if (modules_dict == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules"); return NULL; + } i = 0; - module_name = NULL; while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) { - if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1) + PyObject *result = PyUnicode_RichCompare( + module_name, _PyUnicode_FromId(&PyId___main__), Py_EQ); + int is_equal = (result == Py_True); + assert(PyBool_Check(result)); + Py_DECREF(result); + if (is_equal) + continue; + if (module == Py_None) continue; - obj = PyObject_GetAttr(module, global_name); + obj = getattribute(module, global_name, allow_qualname); if (obj == NULL) { - if (PyErr_ExceptionMatches(PyExc_AttributeError)) - PyErr_Clear(); - else + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) return NULL; + PyErr_Clear(); continue; } - if (obj != global) { + if (obj == global) { Py_DECREF(obj); - continue; + Py_INCREF(module_name); + return module_name; } - Py_DECREF(obj); - break; } /* If no module is found, use __main__. */ - if (!j) { - module_name = main_str; - } - + module_name = _PyUnicode_FromId(&PyId___main__); Py_INCREF(module_name); return module_name; } @@ -1424,7 +1683,7 @@ fast_save_enter(PicklerObject *self, PyObject *obj) key = PyLong_FromVoidPtr(obj); if (key == NULL) return 0; - if (PyDict_GetItem(self->fast_memo, key)) { + if (PyDict_GetItemWithError(self->fast_memo, key)) { Py_DECREF(key); PyErr_Format(PyExc_ValueError, "fast mode: can't pickle cyclic objects " @@ -1433,6 +1692,9 @@ fast_save_enter(PicklerObject *self, PyObject *obj) self->fast_nesting = -1; return 0; } + if (PyErr_Occurred()) { + return 0; + } if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) { Py_DECREF(key); self->fast_nesting = -1; @@ -1472,46 +1734,58 @@ save_none(PicklerObject *self, PyObject *obj) static int save_bool(PicklerObject *self, PyObject *obj) { - static const char *buf[2] = { FALSE, TRUE }; - const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1}; - int p = (obj == Py_True); - if (self->proto >= 2) { - const char bool_op = p ? NEWTRUE : NEWFALSE; + const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE; if (_Pickler_Write(self, &bool_op, 1) < 0) return -1; } - else if (_Pickler_Write(self, buf[p], len[p]) < 0) - return -1; - + else { + /* These aren't opcodes -- they're ways to pickle bools before protocol 2 + * so that unpicklers written before bools were introduced unpickle them + * as ints, but unpicklers after can recognize that bools were intended. + * Note that protocol 2 added direct ways to pickle bools. + */ + const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n"; + if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0) + return -1; + } return 0; } static int -save_int(PicklerObject *self, long x) -{ - char pdata[32]; - Py_ssize_t len = 0; - - if (!self->bin -#if SIZEOF_LONG > 4 - || x > 0x7fffffffL || x < -0x80000000L -#endif - ) { - /* Text-mode pickle, or long too big to fit in the 4-byte - * signed BININT format: store as a string. - */ - pdata[0] = LONG; /* use LONG for consistency with pickle.py */ - PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x); - if (_Pickler_Write(self, pdata, strlen(pdata)) < 0) - return -1; +save_long(PicklerObject *self, PyObject *obj) +{ + PyObject *repr = NULL; + Py_ssize_t size; + long val; + int status = 0; + + const char long_op = LONG; + + val= PyLong_AsLong(obj); + if (val == -1 && PyErr_Occurred()) { + /* out of range for int pickling */ + PyErr_Clear(); } - else { - /* Binary pickle and x fits in a signed 4-byte int. */ - pdata[1] = (unsigned char)(x & 0xff); - pdata[2] = (unsigned char)((x >> 8) & 0xff); - pdata[3] = (unsigned char)((x >> 16) & 0xff); - pdata[4] = (unsigned char)((x >> 24) & 0xff); + else if (self->bin && + (sizeof(long) <= 4 || + (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1)))) { + /* result fits in a signed 4-byte integer. + + Note: we can't use -0x80000000L in the above condition because some + compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type + before applying the unary minus when sizeof(long) <= 4. The + resulting value stays unsigned which is commonly not what we want, + so MSVC happily warns us about it. However, that result would have + been fine because we guard for sizeof(long) <= 4 which turns the + condition true in that particular case. */ + char pdata[32]; + Py_ssize_t len = 0; + + pdata[1] = (unsigned char)(val & 0xff); + pdata[2] = (unsigned char)((val >> 8) & 0xff); + pdata[3] = (unsigned char)((val >> 16) & 0xff); + pdata[4] = (unsigned char)((val >> 24) & 0xff); if ((pdata[4] == 0) && (pdata[3] == 0)) { if (pdata[2] == 0) { @@ -1530,30 +1804,9 @@ save_int(PicklerObject *self, long x) if (_Pickler_Write(self, pdata, len) < 0) return -1; - } - - return 0; -} - -static int -save_long(PicklerObject *self, PyObject *obj) -{ - PyObject *repr = NULL; - Py_ssize_t size; - long val = PyLong_AsLong(obj); - int status = 0; - const char long_op = LONG; - - if (val == -1 && PyErr_Occurred()) { - /* out of range for int pickling */ - PyErr_Clear(); + return 0; } - else -#if SIZEOF_LONG > 4 - if (val <= 0x7fffffffL && val >= -0x80000000L) -#endif - return save_int(self, val); if (self->proto >= 2) { /* Linear-time pickling. */ @@ -1719,40 +1972,25 @@ save_bytes(PicklerObject *self, PyObject *obj) Python 2 *and* the appropriate 'bytes' object when unpickled using Python 3. Again this is a hack and we don't need to do this with newer protocols. */ - static PyObject *codecs_encode = NULL; PyObject *reduce_value = NULL; int status; - if (codecs_encode == NULL) { - PyObject *codecs_module = PyImport_ImportModule("codecs"); - if (codecs_module == NULL) { - return -1; - } - codecs_encode = PyObject_GetAttrString(codecs_module, "encode"); - Py_DECREF(codecs_module); - if (codecs_encode == NULL) { - return -1; - } - } - if (PyBytes_GET_SIZE(obj) == 0) { reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type); } else { - static PyObject *latin1 = NULL; + PickleState *st = _Pickle_GetGlobalState(); PyObject *unicode_str = PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), "strict"); + _Py_IDENTIFIER(latin1); + if (unicode_str == NULL) return -1; - if (latin1 == NULL) { - latin1 = PyUnicode_InternFromString("latin1"); - if (latin1 == NULL) - return -1; - } reduce_value = Py_BuildValue("(O(OO))", - codecs_encode, unicode_str, latin1); + st->codecs_encode, unicode_str, + _PyUnicode_FromId(&PyId_latin1)); Py_DECREF(unicode_str); } @@ -1766,14 +2004,14 @@ save_bytes(PicklerObject *self, PyObject *obj) } else { Py_ssize_t size; - char header[5]; + char header[9]; Py_ssize_t len; size = PyBytes_GET_SIZE(obj); if (size < 0) return -1; - if (size < 256) { + if (size <= 0xff) { header[0] = SHORT_BINBYTES; header[1] = (unsigned char)size; len = 2; @@ -1786,6 +2024,11 @@ save_bytes(PicklerObject *self, PyObject *obj) header[4] = (unsigned char)((size >> 24) & 0xff); len = 5; } + else if (self->proto >= 4) { + header[0] = BINBYTES8; + _write_size64(header + 1, size); + len = 8; + } else { PyErr_SetString(PyExc_OverflowError, "cannot serialize a bytes object larger than 4 GiB"); @@ -1873,63 +2116,106 @@ done: } static int -save_unicode(PicklerObject *self, PyObject *obj) +write_utf8(PicklerObject *self, char *data, Py_ssize_t size) { - Py_ssize_t size; - PyObject *encoded = NULL; + char header[9]; + Py_ssize_t len; - if (self->bin) { - char pdata[5]; + if (size <= 0xff && self->proto >= 4) { + header[0] = SHORT_BINUNICODE; + header[1] = (unsigned char)(size & 0xff); + len = 2; + } + else if (size <= 0xffffffffUL) { + header[0] = BINUNICODE; + header[1] = (unsigned char)(size & 0xff); + header[2] = (unsigned char)((size >> 8) & 0xff); + header[3] = (unsigned char)((size >> 16) & 0xff); + header[4] = (unsigned char)((size >> 24) & 0xff); + len = 5; + } + else if (self->proto >= 4) { + header[0] = BINUNICODE8; + _write_size64(header + 1, size); + len = 9; + } + else { + PyErr_SetString(PyExc_OverflowError, + "cannot serialize a string larger than 4GiB"); + return -1; + } - encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass"); - if (encoded == NULL) - goto error; + if (_Pickler_Write(self, header, len) < 0) + return -1; + if (_Pickler_Write(self, data, size) < 0) + return -1; - size = PyBytes_GET_SIZE(encoded); - if (size > 0xffffffffL) { - PyErr_SetString(PyExc_OverflowError, - "cannot serialize a string larger than 4 GiB"); - goto error; /* string too large */ - } + return 0; +} - pdata[0] = BINUNICODE; - pdata[1] = (unsigned char)(size & 0xff); - pdata[2] = (unsigned char)((size >> 8) & 0xff); - pdata[3] = (unsigned char)((size >> 16) & 0xff); - pdata[4] = (unsigned char)((size >> 24) & 0xff); +static int +write_unicode_binary(PicklerObject *self, PyObject *obj) +{ + PyObject *encoded = NULL; + Py_ssize_t size; + char *data; + int r; - if (_Pickler_Write(self, pdata, 5) < 0) - goto error; + if (PyUnicode_READY(obj)) + return -1; - if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) - goto error; + data = PyUnicode_AsUTF8AndSize(obj, &size); + if (data != NULL) + return write_utf8(self, data, size); + + /* Issue #8383: for strings with lone surrogates, fallback on the + "surrogatepass" error handler. */ + PyErr_Clear(); + encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass"); + if (encoded == NULL) + return -1; + + r = write_utf8(self, PyBytes_AS_STRING(encoded), + PyBytes_GET_SIZE(encoded)); + Py_DECREF(encoded); + return r; +} + +static int +save_unicode(PicklerObject *self, PyObject *obj) +{ + if (self->bin) { + if (write_unicode_binary(self, obj) < 0) + return -1; } else { + PyObject *encoded; + Py_ssize_t size; const char unicode_op = UNICODE; encoded = raw_unicode_escape(obj); if (encoded == NULL) - goto error; + return -1; - if (_Pickler_Write(self, &unicode_op, 1) < 0) - goto error; + if (_Pickler_Write(self, &unicode_op, 1) < 0) { + Py_DECREF(encoded); + return -1; + } size = PyBytes_GET_SIZE(encoded); - if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) - goto error; + if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) { + Py_DECREF(encoded); + return -1; + } + Py_DECREF(encoded); if (_Pickler_Write(self, "\n", 1) < 0) - goto error; + return -1; } if (memo_put(self, obj) < 0) - goto error; + return -1; - Py_DECREF(encoded); return 0; - - error: - Py_XDECREF(encoded); - return -1; } /* A helper for save_tuple. Push the len elements in tuple t on the stack. */ @@ -2557,34 +2843,252 @@ save_dict(PicklerObject *self, PyObject *obj) } static int +save_set(PicklerObject *self, PyObject *obj) +{ + PyObject *item; + int i; + Py_ssize_t set_size, ppos = 0; + Py_hash_t hash; + + const char empty_set_op = EMPTY_SET; + const char mark_op = MARK; + const char additems_op = ADDITEMS; + + if (self->proto < 4) { + PyObject *items; + PyObject *reduce_value; + int status; + + items = PySequence_List(obj); + if (items == NULL) { + return -1; + } + reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items); + Py_DECREF(items); + if (reduce_value == NULL) { + return -1; + } + /* save_reduce() will memoize the object automatically. */ + status = save_reduce(self, reduce_value, obj); + Py_DECREF(reduce_value); + return status; + } + + if (_Pickler_Write(self, &empty_set_op, 1) < 0) + return -1; + + if (memo_put(self, obj) < 0) + return -1; + + set_size = PySet_GET_SIZE(obj); + if (set_size == 0) + return 0; /* nothing to do */ + + /* Write in batches of BATCHSIZE. */ + do { + i = 0; + if (_Pickler_Write(self, &mark_op, 1) < 0) + return -1; + while (_PySet_NextEntry(obj, &ppos, &item, &hash)) { + if (save(self, item, 0) < 0) + return -1; + if (++i == BATCHSIZE) + break; + } + if (_Pickler_Write(self, &additems_op, 1) < 0) + return -1; + if (PySet_GET_SIZE(obj) != set_size) { + PyErr_Format( + PyExc_RuntimeError, + "set changed size during iteration"); + return -1; + } + } while (i == BATCHSIZE); + + return 0; +} + +static int +save_frozenset(PicklerObject *self, PyObject *obj) +{ + PyObject *iter; + + const char mark_op = MARK; + const char frozenset_op = FROZENSET; + + if (self->fast && !fast_save_enter(self, obj)) + return -1; + + if (self->proto < 4) { + PyObject *items; + PyObject *reduce_value; + int status; + + items = PySequence_List(obj); + if (items == NULL) { + return -1; + } + reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type, + items); + Py_DECREF(items); + if (reduce_value == NULL) { + return -1; + } + /* save_reduce() will memoize the object automatically. */ + status = save_reduce(self, reduce_value, obj); + Py_DECREF(reduce_value); + return status; + } + + if (_Pickler_Write(self, &mark_op, 1) < 0) + return -1; + + iter = PyObject_GetIter(obj); + if (iter == NULL) { + return -1; + } + for (;;) { + PyObject *item; + + item = PyIter_Next(iter); + if (item == NULL) { + if (PyErr_Occurred()) { + Py_DECREF(iter); + return -1; + } + break; + } + if (save(self, item, 0) < 0) { + Py_DECREF(item); + Py_DECREF(iter); + return -1; + } + Py_DECREF(item); + } + Py_DECREF(iter); + + /* If the object is already in the memo, this means it is + recursive. In this case, throw away everything we put on the + stack, and fetch the object back from the memo. */ + if (PyMemoTable_Get(self->memo, obj)) { + const char pop_mark_op = POP_MARK; + + if (_Pickler_Write(self, &pop_mark_op, 1) < 0) + return -1; + if (memo_get(self, obj) < 0) + return -1; + return 0; + } + + if (_Pickler_Write(self, &frozenset_op, 1) < 0) + return -1; + if (memo_put(self, obj) < 0) + return -1; + + return 0; +} + +static int +fix_imports(PyObject **module_name, PyObject **global_name) +{ + PyObject *key; + PyObject *item; + PickleState *st = _Pickle_GetGlobalState(); + + key = PyTuple_Pack(2, *module_name, *global_name); + if (key == NULL) + return -1; + item = PyDict_GetItemWithError(st->name_mapping_3to2, key); + Py_DECREF(key); + if (item) { + PyObject *fixed_module_name; + PyObject *fixed_global_name; + + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING values " + "should be 2-tuples, not %.200s", + Py_TYPE(item)->tp_name); + return -1; + } + fixed_module_name = PyTuple_GET_ITEM(item, 0); + fixed_global_name = PyTuple_GET_ITEM(item, 1); + if (!PyUnicode_Check(fixed_module_name) || + !PyUnicode_Check(fixed_global_name)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING values " + "should be pairs of str, not (%.200s, %.200s)", + Py_TYPE(fixed_module_name)->tp_name, + Py_TYPE(fixed_global_name)->tp_name); + return -1; + } + + Py_CLEAR(*module_name); + Py_CLEAR(*global_name); + Py_INCREF(fixed_module_name); + Py_INCREF(fixed_global_name); + *module_name = fixed_module_name; + *global_name = fixed_global_name; + } + else if (PyErr_Occurred()) { + return -1; + } + + item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name); + if (item) { + if (!PyUnicode_Check(item)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_IMPORT_MAPPING values " + "should be strings, not %.200s", + Py_TYPE(item)->tp_name); + return -1; + } + Py_CLEAR(*module_name); + Py_INCREF(item); + *module_name = item; + } + else if (PyErr_Occurred()) { + return -1; + } + + return 0; +} + +static int save_global(PicklerObject *self, PyObject *obj, PyObject *name) { - static PyObject *name_str = NULL; PyObject *global_name = NULL; PyObject *module_name = NULL; PyObject *module = NULL; PyObject *cls; + PickleState *st = _Pickle_GetGlobalState(); int status = 0; + _Py_IDENTIFIER(__name__); + _Py_IDENTIFIER(__qualname__); const char global_op = GLOBAL; - if (name_str == NULL) { - name_str = PyUnicode_InternFromString("__name__"); - if (name_str == NULL) - goto error; - } - if (name) { + Py_INCREF(name); global_name = name; - Py_INCREF(global_name); } else { - global_name = PyObject_GetAttr(obj, name_str); - if (global_name == NULL) - goto error; + if (self->proto >= 4) { + global_name = _PyObject_GetAttrId(obj, &PyId___qualname__); + if (global_name == NULL) { + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) + goto error; + PyErr_Clear(); + } + } + if (global_name == NULL) { + global_name = _PyObject_GetAttrId(obj, &PyId___name__); + if (global_name == NULL) + goto error; + } } - module_name = whichmodule(obj, global_name); + module_name = whichmodule(obj, global_name, self->proto >= 4); if (module_name == NULL) goto error; @@ -2598,21 +3102,21 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name) extra parameters of __import__ to fix that. */ module = PyImport_Import(module_name); if (module == NULL) { - PyErr_Format(PicklingError, + PyErr_Format(st->PicklingError, "Can't pickle %R: import of module %R failed", obj, module_name); goto error; } - cls = PyObject_GetAttr(module, global_name); + cls = getattribute(module, global_name, self->proto >= 4); if (cls == NULL) { - PyErr_Format(PicklingError, - "Can't pickle %R: attribute lookup %S.%S failed", - obj, module_name, global_name); + PyErr_Format(st->PicklingError, + "Can't pickle %R: attribute lookup %S on %S failed", + obj, global_name, module_name); goto error; } if (cls != obj) { Py_DECREF(cls); - PyErr_Format(PicklingError, + PyErr_Format(st->PicklingError, "Can't pickle %R: it's not the same object as %S.%S", obj, module_name, global_name); goto error; @@ -2623,18 +3127,27 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name) /* See whether this is in the extension registry, and if * so generate an EXT opcode. */ + PyObject *extension_key; PyObject *code_obj; /* extension code as Python object */ long code; /* extension code as C value */ char pdata[5]; Py_ssize_t n; - PyTuple_SET_ITEM(two_tuple, 0, module_name); - PyTuple_SET_ITEM(two_tuple, 1, global_name); - code_obj = PyDict_GetItem(extension_registry, two_tuple); + extension_key = PyTuple_Pack(2, module_name, global_name); + if (extension_key == NULL) { + goto error; + } + code_obj = PyDict_GetItemWithError(st->extension_registry, + extension_key); + Py_DECREF(extension_key); /* The object is not registered in the extension registry. This is the most likely code path. */ - if (code_obj == NULL) + if (code_obj == NULL) { + if (PyErr_Occurred()) { + goto error; + } goto gen_global; + } /* XXX: pickle.py doesn't check neither the type, nor the range of the value returned by the extension_registry. It should for @@ -2642,7 +3155,7 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name) /* Verify code_obj has the right type and value. */ if (!PyLong_Check(code_obj)) { - PyErr_Format(PicklingError, + PyErr_Format(st->PicklingError, "Can't pickle %R: extension code %R isn't an integer", obj, code_obj); goto error; @@ -2650,9 +3163,8 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name) code = PyLong_AS_LONG(code_obj); if (code <= 0 || code > 0x7fffffffL) { if (!PyErr_Occurred()) - PyErr_Format(PicklingError, - "Can't pickle %R: extension code %ld is out of range", - obj, code); + PyErr_Format(st->PicklingError, "Can't pickle %R: extension " + "code %ld is out of range", obj, code); goto error; } @@ -2681,120 +3193,84 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name) goto error; } else { - /* Generate a normal global opcode if we are using a pickle - protocol <= 2, or if the object is not registered in the - extension registry. */ - PyObject *encoded; - PyObject *(*unicode_encoder)(PyObject *); - gen_global: - if (_Pickler_Write(self, &global_op, 1) < 0) - goto error; + if (self->proto >= 4) { + const char stack_global_op = STACK_GLOBAL; + + if (save(self, module_name, 0) < 0) + goto error; + if (save(self, global_name, 0) < 0) + goto error; - /* Since Python 3.0 now supports non-ASCII identifiers, we encode both - the module name and the global name using UTF-8. We do so only when - we are using the pickle protocol newer than version 3. This is to - ensure compatibility with older Unpickler running on Python 2.x. */ - if (self->proto >= 3) { - unicode_encoder = PyUnicode_AsUTF8String; + if (_Pickler_Write(self, &stack_global_op, 1) < 0) + goto error; } else { - unicode_encoder = PyUnicode_AsASCIIString; - } + /* Generate a normal global opcode if we are using a pickle + protocol < 4, or if the object is not registered in the + extension registry. */ + PyObject *encoded; + PyObject *(*unicode_encoder)(PyObject *); - /* For protocol < 3 and if the user didn't request against doing so, - we convert module names to the old 2.x module names. */ - if (self->fix_imports) { - PyObject *key; - PyObject *item; - - key = PyTuple_Pack(2, module_name, global_name); - if (key == NULL) + if (_Pickler_Write(self, &global_op, 1) < 0) goto error; - item = PyDict_GetItemWithError(name_mapping_3to2, key); - Py_DECREF(key); - if (item) { - if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { - PyErr_Format(PyExc_RuntimeError, - "_compat_pickle.REVERSE_NAME_MAPPING values " - "should be 2-tuples, not %.200s", - Py_TYPE(item)->tp_name); - goto error; - } - Py_CLEAR(module_name); - Py_CLEAR(global_name); - module_name = PyTuple_GET_ITEM(item, 0); - global_name = PyTuple_GET_ITEM(item, 1); - if (!PyUnicode_Check(module_name) || - !PyUnicode_Check(global_name)) { - PyErr_Format(PyExc_RuntimeError, - "_compat_pickle.REVERSE_NAME_MAPPING values " - "should be pairs of str, not (%.200s, %.200s)", - Py_TYPE(module_name)->tp_name, - Py_TYPE(global_name)->tp_name); + + /* For protocol < 3 and if the user didn't request against doing + so, we convert module names to the old 2.x module names. */ + if (self->proto < 3 && self->fix_imports) { + if (fix_imports(&module_name, &global_name) < 0) { goto error; } - Py_INCREF(module_name); - Py_INCREF(global_name); - } - else if (PyErr_Occurred()) { - goto error; } - item = PyDict_GetItemWithError(import_mapping_3to2, module_name); - if (item) { - if (!PyUnicode_Check(item)) { - PyErr_Format(PyExc_RuntimeError, - "_compat_pickle.REVERSE_IMPORT_MAPPING values " - "should be strings, not %.200s", - Py_TYPE(item)->tp_name); - goto error; - } - Py_CLEAR(module_name); - module_name = item; - Py_INCREF(module_name); + /* Since Python 3.0 now supports non-ASCII identifiers, we encode + both the module name and the global name using UTF-8. We do so + only when we are using the pickle protocol newer than version + 3. This is to ensure compatibility with older Unpickler running + on Python 2.x. */ + if (self->proto == 3) { + unicode_encoder = PyUnicode_AsUTF8String; + } + else { + unicode_encoder = PyUnicode_AsASCIIString; } - else if (PyErr_Occurred()) { + encoded = unicode_encoder(module_name); + if (encoded == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) + PyErr_Format(st->PicklingError, + "can't pickle module identifier '%S' using " + "pickle protocol %i", + module_name, self->proto); + goto error; + } + if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), + PyBytes_GET_SIZE(encoded)) < 0) { + Py_DECREF(encoded); goto error; } - } - - /* Save the name of the module. */ - encoded = unicode_encoder(module_name); - if (encoded == NULL) { - if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) - PyErr_Format(PicklingError, - "can't pickle module identifier '%S' using " - "pickle protocol %i", module_name, self->proto); - goto error; - } - if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), - PyBytes_GET_SIZE(encoded)) < 0) { Py_DECREF(encoded); - goto error; - } - Py_DECREF(encoded); - if(_Pickler_Write(self, "\n", 1) < 0) - goto error; + if(_Pickler_Write(self, "\n", 1) < 0) + goto error; - /* Save the name of the module. */ - encoded = unicode_encoder(global_name); - if (encoded == NULL) { - if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) - PyErr_Format(PicklingError, - "can't pickle global identifier '%S' using " - "pickle protocol %i", global_name, self->proto); - goto error; - } - if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), - PyBytes_GET_SIZE(encoded)) < 0) { + /* Save the name of the module. */ + encoded = unicode_encoder(global_name); + if (encoded == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) + PyErr_Format(st->PicklingError, + "can't pickle global identifier '%S' using " + "pickle protocol %i", + global_name, self->proto); + goto error; + } + if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), + PyBytes_GET_SIZE(encoded)) < 0) { + Py_DECREF(encoded); + goto error; + } Py_DECREF(encoded); - goto error; + if (_Pickler_Write(self, "\n", 1) < 0) + goto error; } - Py_DECREF(encoded); - if(_Pickler_Write(self, "\n", 1) < 0) - goto error; - /* Memoize the object. */ if (memo_put(self, obj) < 0) goto error; @@ -2812,30 +3288,6 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name) } static int -save_ellipsis(PicklerObject *self, PyObject *obj) -{ - PyObject *str = PyUnicode_FromString("Ellipsis"); - int res; - if (str == NULL) - return -1; - res = save_global(self, Py_Ellipsis, str); - Py_DECREF(str); - return res; -} - -static int -save_notimplemented(PicklerObject *self, PyObject *obj) -{ - PyObject *str = PyUnicode_FromString("NotImplemented"); - int res; - if (str == NULL) - return -1; - res = save_global(self, Py_NotImplemented, str); - Py_DECREF(str); - return res; -} - -static int save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton) { PyObject *reduce_value; @@ -2875,7 +3327,7 @@ save_pers(PicklerObject *self, PyObject *obj, PyObject *func) const char binpersid_op = BINPERSID; Py_INCREF(obj); - pid = _Pickler_FastCall(self, func, obj); + pid = _Pickle_FastCall(func, obj); if (pid == NULL) return -1; @@ -2923,14 +3375,9 @@ static PyObject * get_class(PyObject *obj) { PyObject *cls; - static PyObject *str_class; + _Py_IDENTIFIER(__class__); - if (str_class == NULL) { - str_class = PyUnicode_InternFromString("__class__"); - if (str_class == NULL) - return NULL; - } - cls = PyObject_GetAttr(obj, str_class); + cls = _PyObject_GetAttrId(obj, &PyId___class__); if (cls == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) { PyErr_Clear(); @@ -2952,17 +3399,18 @@ save_reduce(PicklerObject *self, PyObject *args, PyObject *obj) PyObject *state = NULL; PyObject *listitems = Py_None; PyObject *dictitems = Py_None; + PickleState *st = _Pickle_GetGlobalState(); Py_ssize_t size; - - int use_newobj = self->proto >= 2; + int use_newobj = 0, use_newobj_ex = 0; const char reduce_op = REDUCE; const char build_op = BUILD; const char newobj_op = NEWOBJ; + const char newobj_ex_op = NEWOBJ_EX; size = PyTuple_Size(args); if (size < 2 || size > 5) { - PyErr_SetString(PicklingError, "tuple returned by " + PyErr_SetString(st->PicklingError, "tuple returned by " "__reduce__ must contain 2 through 5 elements"); return -1; } @@ -2972,12 +3420,12 @@ save_reduce(PicklerObject *self, PyObject *args, PyObject *obj) return -1; if (!PyCallable_Check(callable)) { - PyErr_SetString(PicklingError, "first item of the tuple " + PyErr_SetString(st->PicklingError, "first item of the tuple " "returned by __reduce__ must be callable"); return -1; } if (!PyTuple_Check(argtup)) { - PyErr_SetString(PicklingError, "second item of the tuple " + PyErr_SetString(st->PicklingError, "second item of the tuple " "returned by __reduce__ must be a tuple"); return -1; } @@ -2988,7 +3436,7 @@ save_reduce(PicklerObject *self, PyObject *args, PyObject *obj) if (listitems == Py_None) listitems = NULL; else if (!PyIter_Check(listitems)) { - PyErr_Format(PicklingError, "fourth element of the tuple " + PyErr_Format(st->PicklingError, "fourth element of the tuple " "returned by __reduce__ must be an iterator, not %s", Py_TYPE(listitems)->tp_name); return -1; @@ -2997,40 +3445,81 @@ save_reduce(PicklerObject *self, PyObject *args, PyObject *obj) if (dictitems == Py_None) dictitems = NULL; else if (!PyIter_Check(dictitems)) { - PyErr_Format(PicklingError, "fifth element of the tuple " + PyErr_Format(st->PicklingError, "fifth element of the tuple " "returned by __reduce__ must be an iterator, not %s", Py_TYPE(dictitems)->tp_name); return -1; } - /* Protocol 2 special case: if callable's name is __newobj__, use - NEWOBJ. */ - if (use_newobj) { - static PyObject *newobj_str = NULL, *name_str = NULL; + if (self->proto >= 2) { PyObject *name; + _Py_IDENTIFIER(__name__); - if (newobj_str == NULL) { - newobj_str = PyUnicode_InternFromString("__newobj__"); - name_str = PyUnicode_InternFromString("__name__"); - if (newobj_str == NULL || name_str == NULL) - return -1; - } - - name = PyObject_GetAttr(callable, name_str); + name = _PyObject_GetAttrId(callable, &PyId___name__); if (name == NULL) { - if (PyErr_ExceptionMatches(PyExc_AttributeError)) - PyErr_Clear(); - else + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { return -1; - use_newobj = 0; + } + PyErr_Clear(); + } + else if (self->proto >= 4) { + _Py_IDENTIFIER(__newobj_ex__); + use_newobj_ex = PyUnicode_Check(name) && + PyUnicode_Compare( + name, _PyUnicode_FromId(&PyId___newobj_ex__)) == 0; + Py_DECREF(name); } else { + _Py_IDENTIFIER(__newobj__); use_newobj = PyUnicode_Check(name) && - PyUnicode_Compare(name, newobj_str) == 0; + PyUnicode_Compare( + name, _PyUnicode_FromId(&PyId___newobj__)) == 0; Py_DECREF(name); } } - if (use_newobj) { + + if (use_newobj_ex) { + PyObject *cls; + PyObject *args; + PyObject *kwargs; + + if (Py_SIZE(argtup) != 3) { + PyErr_Format(st->PicklingError, + "length of the NEWOBJ_EX argument tuple must be " + "exactly 3, not %zd", Py_SIZE(argtup)); + return -1; + } + + cls = PyTuple_GET_ITEM(argtup, 0); + if (!PyType_Check(cls)) { + PyErr_Format(st->PicklingError, + "first item from NEWOBJ_EX argument tuple must " + "be a class, not %.200s", Py_TYPE(cls)->tp_name); + return -1; + } + args = PyTuple_GET_ITEM(argtup, 1); + if (!PyTuple_Check(args)) { + PyErr_Format(st->PicklingError, + "second item from NEWOBJ_EX argument tuple must " + "be a tuple, not %.200s", Py_TYPE(args)->tp_name); + return -1; + } + kwargs = PyTuple_GET_ITEM(argtup, 2); + if (!PyDict_Check(kwargs)) { + PyErr_Format(st->PicklingError, + "third item from NEWOBJ_EX argument tuple must " + "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name); + return -1; + } + + if (save(self, cls, 0) < 0 || + save(self, args, 0) < 0 || + save(self, kwargs, 0) < 0 || + _Pickler_Write(self, &newobj_ex_op, 1) < 0) { + return -1; + } + } + else if (use_newobj) { PyObject *cls; PyObject *newargtup; PyObject *obj_class; @@ -3038,13 +3527,13 @@ save_reduce(PicklerObject *self, PyObject *args, PyObject *obj) /* Sanity checks. */ if (Py_SIZE(argtup) < 1) { - PyErr_SetString(PicklingError, "__newobj__ arglist is empty"); + PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty"); return -1; } cls = PyTuple_GET_ITEM(argtup, 0); if (!PyType_Check(cls)) { - PyErr_SetString(PicklingError, "args[0] from " + PyErr_SetString(st->PicklingError, "args[0] from " "__newobj__ args is not a type"); return -1; } @@ -3054,7 +3543,7 @@ save_reduce(PicklerObject *self, PyObject *args, PyObject *obj) p = obj_class != cls; /* true iff a problem */ Py_DECREF(obj_class); if (p) { - PyErr_SetString(PicklingError, "args[0] from " + PyErr_SetString(st->PicklingError, "args[0] from " "__newobj__ args has the wrong class"); return -1; } @@ -3114,8 +3603,23 @@ save_reduce(PicklerObject *self, PyObject *args, PyObject *obj) the caller do not want to memoize the object. Not particularly useful, but that is to mimic the behavior save_reduce() in pickle.py when obj is None. */ - if (obj && memo_put(self, obj) < 0) - return -1; + if (obj != NULL) { + /* If the object is already in the memo, this means it is + recursive. In this case, throw away everything we put on the + stack, and fetch the object back from the memo. */ + if (PyMemoTable_Get(self->memo, obj)) { + const char pop_op = POP; + + if (_Pickler_Write(self, &pop_op, 1) < 0) + return -1; + if (memo_get(self, obj) < 0) + return -1; + + return 0; + } + else if (memo_put(self, obj) < 0) + return -1; + } if (listitems && batch_list(self, listitems) < 0) return -1; @@ -3140,6 +3644,9 @@ save(PicklerObject *self, PyObject *obj, int pers_save) PyObject *reduce_value = NULL; int status = 0; + if (_Pickler_OpcodeBoundary(self) < 0) + return -1; + if (Py_EnterRecursiveCall(" while pickling an object")) return -1; @@ -3168,14 +3675,6 @@ save(PicklerObject *self, PyObject *obj, int pers_save) status = save_none(self, obj); goto done; } - else if (obj == Py_Ellipsis) { - status = save_ellipsis(self, obj); - goto done; - } - else if (obj == Py_NotImplemented) { - status = save_notimplemented(self, obj); - goto done; - } else if (obj == Py_False || obj == Py_True) { status = save_bool(self, obj); goto done; @@ -3210,6 +3709,14 @@ save(PicklerObject *self, PyObject *obj, int pers_save) status = save_dict(self, obj); goto done; } + else if (type == &PySet_Type) { + status = save_set(self, obj); + goto done; + } + else if (type == &PyFrozenSet_Type) { + status = save_frozenset(self, obj); + goto done; + } else if (type == &PyList_Type) { status = save_list(self, obj); goto done; @@ -3224,16 +3731,6 @@ save(PicklerObject *self, PyObject *obj, int pers_save) } else if (type == &PyFunction_Type) { status = save_global(self, obj, NULL); - if (status < 0 && PyErr_ExceptionMatches(PickleError)) { - /* fall back to reduce */ - PyErr_Clear(); - } - else { - goto done; - } - } - else if (type == &PyCFunction_Type) { - status = save_global(self, obj, NULL); goto done; } @@ -3244,12 +3741,22 @@ save(PicklerObject *self, PyObject *obj, int pers_save) * __reduce_ex__ method, or the object's __reduce__ method. */ if (self->dispatch_table == NULL) { - reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type); - /* PyDict_GetItem() unlike PyObject_GetItem() and - PyObject_GetAttr() returns a borrowed ref */ - Py_XINCREF(reduce_func); + PickleState *st = _Pickle_GetGlobalState(); + reduce_func = PyDict_GetItemWithError(st->dispatch_table, + (PyObject *)type); + if (reduce_func == NULL) { + if (PyErr_Occurred()) { + goto error; + } + } else { + /* PyDict_GetItemWithError() returns a borrowed reference. + Increase the reference count to be consistent with + PyObject_GetItem and _PyObject_GetAttrId used below. */ + Py_INCREF(reduce_func); + } } else { - reduce_func = PyObject_GetItem(self->dispatch_table, (PyObject *)type); + reduce_func = PyObject_GetItem(self->dispatch_table, + (PyObject *)type); if (reduce_func == NULL) { if (PyErr_ExceptionMatches(PyExc_KeyError)) PyErr_Clear(); @@ -3259,25 +3766,16 @@ save(PicklerObject *self, PyObject *obj, int pers_save) } if (reduce_func != NULL) { Py_INCREF(obj); - reduce_value = _Pickler_FastCall(self, reduce_func, obj); + reduce_value = _Pickle_FastCall(reduce_func, obj); } else if (PyType_IsSubtype(type, &PyType_Type)) { status = save_global(self, obj, NULL); goto done; } else { - static PyObject *reduce_str = NULL; - static PyObject *reduce_ex_str = NULL; + _Py_IDENTIFIER(__reduce__); + _Py_IDENTIFIER(__reduce_ex__); - /* Cache the name of the reduce methods. */ - if (reduce_str == NULL) { - reduce_str = PyUnicode_InternFromString("__reduce__"); - if (reduce_str == NULL) - goto error; - reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__"); - if (reduce_ex_str == NULL) - goto error; - } /* XXX: If the __reduce__ method is defined, __reduce_ex__ is automatically defined as __reduce__. While this is convenient, this @@ -3288,26 +3786,34 @@ save(PicklerObject *self, PyObject *obj, int pers_save) don't actually have to check for a __reduce__ method. */ /* Check for a __reduce_ex__ method. */ - reduce_func = PyObject_GetAttr(obj, reduce_ex_str); + reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce_ex__); if (reduce_func != NULL) { PyObject *proto; proto = PyLong_FromLong(self->proto); if (proto != NULL) { - reduce_value = _Pickler_FastCall(self, reduce_func, proto); + reduce_value = _Pickle_FastCall(reduce_func, proto); } } else { - if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PickleState *st = _Pickle_GetGlobalState(); + + if (PyErr_ExceptionMatches(PyExc_AttributeError)) { PyErr_Clear(); - else + } + else { goto error; + } /* Check for a __reduce__ method. */ - reduce_func = PyObject_GetAttr(obj, reduce_str); + reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce__); if (reduce_func != NULL) { - reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL); + PyObject *empty_tuple = PyTuple_New(0); + reduce_value = PyObject_Call(reduce_func, empty_tuple, + NULL); + Py_DECREF(empty_tuple); } else { - PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R", + PyErr_Format(st->PicklingError, + "can't pickle '%.200s' object: %R", type->tp_name, obj); goto error; } @@ -3323,7 +3829,8 @@ save(PicklerObject *self, PyObject *obj, int pers_save) } if (!PyTuple_Check(reduce_value)) { - PyErr_SetString(PicklingError, + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->PicklingError, "__reduce__ must return a string or tuple"); goto error; } @@ -3335,6 +3842,7 @@ save(PicklerObject *self, PyObject *obj, int pers_save) status = -1; } done: + Py_LeaveRecursiveCall(); Py_XDECREF(reduce_func); Py_XDECREF(reduce_value); @@ -3355,6 +3863,8 @@ dump(PicklerObject *self, PyObject *obj) header[1] = (unsigned char)self->proto; if (_Pickler_Write(self, header, 2) < 0) return -1; + if (self->proto >= 4) + self->framing = 1; } if (save(self, obj, 0) < 0 || @@ -3364,16 +3874,35 @@ dump(PicklerObject *self, PyObject *obj) return 0; } -PyDoc_STRVAR(Pickler_clear_memo_doc, -"clear_memo() -> None. Clears the pickler's \"memo\"." +/*[clinic] + +_pickle.Pickler.clear_memo + + self: PicklerObject + +Clears the pickler's "memo". + +The memo is the data structure that remembers which objects the +pickler has already seen, so that shared or recursive objects are +pickled by reference and not by value. This method is useful when +re-using picklers. +[clinic]*/ + +PyDoc_STRVAR(_pickle_Pickler_clear_memo__doc__, +"clear_memo()\n" +"Clears the pickler\'s \"memo\".\n" "\n" "The memo is the data structure that remembers which objects the\n" "pickler has already seen, so that shared or recursive objects are\n" "pickled by reference and not by value. This method is useful when\n" "re-using picklers."); +#define _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF \ + {"clear_memo", (PyCFunction)_pickle_Pickler_clear_memo, METH_NOARGS, _pickle_Pickler_clear_memo__doc__}, + static PyObject * -Pickler_clear_memo(PicklerObject *self) +_pickle_Pickler_clear_memo(PicklerObject *self) +/*[clinic checksum: 9c32be7e7a17ff82a81aae409d0d4f469033a5b2]*/ { if (self->memo) PyMemoTable_Clear(self->memo); @@ -3381,27 +3910,39 @@ Pickler_clear_memo(PicklerObject *self) Py_RETURN_NONE; } -PyDoc_STRVAR(Pickler_dump_doc, -"dump(obj) -> None. Write a pickled representation of obj to the open file."); +/*[clinic] + +_pickle.Pickler.dump + + self: PicklerObject + obj: object + / + +Write a pickled representation of the given object to the open file. +[clinic]*/ + +PyDoc_STRVAR(_pickle_Pickler_dump__doc__, +"dump(obj)\n" +"Write a pickled representation of the given object to the open file."); + +#define _PICKLE_PICKLER_DUMP_METHODDEF \ + {"dump", (PyCFunction)_pickle_Pickler_dump, METH_O, _pickle_Pickler_dump__doc__}, static PyObject * -Pickler_dump(PicklerObject *self, PyObject *args) +_pickle_Pickler_dump(PicklerObject *self, PyObject *obj) +/*[clinic checksum: b72a69ec98737fabf66dae7c5a3210178bdbd3e6]*/ { - PyObject *obj; - /* Check whether the Pickler was initialized correctly (issue3664). Developers often forget to call __init__() in their subclasses, which would trigger a segfault without this check. */ if (self->write == NULL) { - PyErr_Format(PicklingError, + PickleState *st = _Pickle_GetGlobalState(); + PyErr_Format(st->PicklingError, "Pickler.__init__() was not called by %s.__init__()", Py_TYPE(self)->tp_name); return NULL; } - if (!PyArg_ParseTuple(args, "O:dump", &obj)) - return NULL; - if (_Pickler_ClearBuffer(self) < 0) return NULL; @@ -3415,10 +3956,8 @@ Pickler_dump(PicklerObject *self, PyObject *args) } static struct PyMethodDef Pickler_methods[] = { - {"dump", (PyCFunction)Pickler_dump, METH_VARARGS, - Pickler_dump_doc}, - {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS, - Pickler_clear_memo_doc}, + _PICKLE_PICKLER_DUMP_METHODDEF + _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -3431,7 +3970,6 @@ Pickler_dealloc(PicklerObject *self) Py_XDECREF(self->write); Py_XDECREF(self->pers_func); Py_XDECREF(self->dispatch_table); - Py_XDECREF(self->arg); Py_XDECREF(self->fast_memo); PyMemoTable_Del(self->memo); @@ -3445,7 +3983,6 @@ Pickler_traverse(PicklerObject *self, visitproc visit, void *arg) Py_VISIT(self->write); Py_VISIT(self->pers_func); Py_VISIT(self->dispatch_table); - Py_VISIT(self->arg); Py_VISIT(self->fast_memo); return 0; } @@ -3457,7 +3994,6 @@ Pickler_clear(PicklerObject *self) Py_CLEAR(self->write); Py_CLEAR(self->pers_func); Py_CLEAR(self->dispatch_table); - Py_CLEAR(self->arg); Py_CLEAR(self->fast_memo); if (self->memo != NULL) { @@ -3469,15 +4005,45 @@ Pickler_clear(PicklerObject *self) } -PyDoc_STRVAR(Pickler_doc, -"Pickler(file, protocol=None)" -"\n" +/*[clinic] + +_pickle.Pickler.__init__ + + self: PicklerObject + file: object + protocol: object = NULL + fix_imports: bool = True + +This takes a binary file for writing a pickle data stream. + +The optional protocol argument tells the pickler to use the +given protocol; supported protocols are 0, 1, 2, 3 and 4. The +default protocol is 3; a backward-incompatible protocol designed for +Python 3. + +Specifying a negative protocol version selects the highest +protocol version supported. The higher the protocol used, the +more recent the version of Python needed to read the pickle +produced. + +The file argument must have a write() method that accepts a single +bytes argument. It can thus be a file object opened for binary +writing, a io.BytesIO instance, or any other custom object that +meets this interface. + +If fix_imports is True and protocol is less than 3, pickle will try to +map the new Python 3 names to the old module names used in Python 2, +so that the pickle data stream is readable with Python 2. +[clinic]*/ + +PyDoc_STRVAR(_pickle_Pickler___init____doc__, +"__init__(file, protocol=None, fix_imports=True)\n" "This takes a binary file for writing a pickle data stream.\n" "\n" "The optional protocol argument tells the pickler to use the\n" -"given protocol; supported protocols are 0, 1, 2, 3. The default\n" -"protocol is 3; a backward-incompatible protocol designed for\n" -"Python 3.0.\n" +"given protocol; supported protocols are 0, 1, 2, 3 and 4. The\n" +"default protocol is 3; a backward-incompatible protocol designed for\n" +"Python 3.\n" "\n" "Specifying a negative protocol version selects the highest\n" "protocol version supported. The higher the protocol used, the\n" @@ -3490,38 +4056,56 @@ PyDoc_STRVAR(Pickler_doc, "meets this interface.\n" "\n" "If fix_imports is True and protocol is less than 3, pickle will try to\n" -"map the new Python 3.x names to the old module names used in Python\n" -"2.x, so that the pickle data stream is readable with Python 2.x.\n"); +"map the new Python 3 names to the old module names used in Python 2,\n" +"so that the pickle data stream is readable with Python 2."); -static int -Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds) +#define _PICKLE_PICKLER___INIT___METHODDEF \ + {"__init__", (PyCFunction)_pickle_Pickler___init__, METH_VARARGS|METH_KEYWORDS, _pickle_Pickler___init____doc__}, + +static PyObject * +_pickle_Pickler___init___impl(PicklerObject *self, PyObject *file, PyObject *protocol, int fix_imports); + +static PyObject * +_pickle_Pickler___init__(PyObject *self, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = {"file", "protocol", "fix_imports", 0}; + PyObject *return_value = NULL; + static char *_keywords[] = {"file", "protocol", "fix_imports", NULL}; PyObject *file; - PyObject *proto_obj = NULL; - PyObject *fix_imports = Py_True; + PyObject *protocol = NULL; + int fix_imports = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "O|Op:__init__", _keywords, + &file, &protocol, &fix_imports)) + goto exit; + return_value = _pickle_Pickler___init___impl((PicklerObject *)self, file, protocol, fix_imports); + +exit: + return return_value; +} + +static PyObject * +_pickle_Pickler___init___impl(PicklerObject *self, PyObject *file, PyObject *protocol, int fix_imports) +/*[clinic checksum: c99ff417bd703a74affc4b708167e56e135e8969]*/ +{ _Py_IDENTIFIER(persistent_id); _Py_IDENTIFIER(dispatch_table); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler", - kwlist, &file, &proto_obj, &fix_imports)) - return -1; - /* In case of multiple __init__() calls, clear previous content. */ if (self->write != NULL) (void)Pickler_clear(self); - if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0) - return -1; + if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0) + return NULL; if (_Pickler_SetOutputStream(self, file) < 0) - return -1; + return NULL; /* memo and output_buffer may have already been created in _Pickler_New */ if (self->memo == NULL) { self->memo = PyMemoTable_New(); if (self->memo == NULL) - return -1; + return NULL; } self->output_len = 0; if (self->output_buffer == NULL) { @@ -3529,10 +4113,9 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds) self->output_buffer = PyBytes_FromStringAndSize(NULL, self->max_output_len); if (self->output_buffer == NULL) - return -1; + return NULL; } - self->arg = NULL; self->fast = 0; self->fast_nesting = 0; self->fast_memo = NULL; @@ -3541,15 +4124,28 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds) self->pers_func = _PyObject_GetAttrId((PyObject *)self, &PyId_persistent_id); if (self->pers_func == NULL) - return -1; + return NULL; } self->dispatch_table = NULL; if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) { self->dispatch_table = _PyObject_GetAttrId((PyObject *)self, &PyId_dispatch_table); if (self->dispatch_table == NULL) - return -1; + return NULL; + } + + Py_RETURN_NONE; +} + +/* Wrap the Clinic generated signature to slot it in tp_init. */ +static int +Pickler_init(PyObject *self, PyObject *args, PyObject *kwargs) +{ + PyObject *result = _pickle_Pickler___init__(self, args, kwargs); + if (result == NULL) { + return -1; } + Py_DECREF(result); return 0; } @@ -3568,22 +4164,48 @@ typedef struct { PicklerObject *pickler; /* Pickler whose memo table we're proxying. */ } PicklerMemoProxyObject; -PyDoc_STRVAR(pmp_clear_doc, -"memo.clear() -> None. Remove all items from memo."); +/*[clinic] +_pickle.PicklerMemoProxy.clear + + self: PicklerMemoProxyObject + +Remove all items from memo. +[clinic]*/ + +PyDoc_STRVAR(_pickle_PicklerMemoProxy_clear__doc__, +"clear()\n" +"Remove all items from memo."); + +#define _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF \ + {"clear", (PyCFunction)_pickle_PicklerMemoProxy_clear, METH_NOARGS, _pickle_PicklerMemoProxy_clear__doc__}, static PyObject * -pmp_clear(PicklerMemoProxyObject *self) +_pickle_PicklerMemoProxy_clear(PicklerMemoProxyObject *self) +/*[clinic checksum: 507f13938721992e175a3e58b5ad02620045a1cc]*/ { if (self->pickler->memo) PyMemoTable_Clear(self->pickler->memo); Py_RETURN_NONE; } -PyDoc_STRVAR(pmp_copy_doc, -"memo.copy() -> new_memo. Copy the memo to a new object."); +/*[clinic] +_pickle.PicklerMemoProxy.copy + + self: PicklerMemoProxyObject + +Copy the memo to a new object. +[clinic]*/ + +PyDoc_STRVAR(_pickle_PicklerMemoProxy_copy__doc__, +"copy()\n" +"Copy the memo to a new object."); + +#define _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF \ + {"copy", (PyCFunction)_pickle_PicklerMemoProxy_copy, METH_NOARGS, _pickle_PicklerMemoProxy_copy__doc__}, static PyObject * -pmp_copy(PicklerMemoProxyObject *self) +_pickle_PicklerMemoProxy_copy(PicklerMemoProxyObject *self) +/*[clinic checksum: 73a5117ab354290ebdbe07bd0bf7232d0936a69d]*/ { Py_ssize_t i; PyMemoTable *memo; @@ -3620,14 +4242,27 @@ pmp_copy(PicklerMemoProxyObject *self) return NULL; } -PyDoc_STRVAR(pmp_reduce_doc, -"memo.__reduce__(). Pickling support."); +/*[clinic] +_pickle.PicklerMemoProxy.__reduce__ + + self: PicklerMemoProxyObject + +Implement pickle support. +[clinic]*/ + +PyDoc_STRVAR(_pickle_PicklerMemoProxy___reduce____doc__, +"__reduce__()\n" +"Implement pickle support."); + +#define _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF \ + {"__reduce__", (PyCFunction)_pickle_PicklerMemoProxy___reduce__, METH_NOARGS, _pickle_PicklerMemoProxy___reduce____doc__}, static PyObject * -pmp_reduce(PicklerMemoProxyObject *self, PyObject *args) +_pickle_PicklerMemoProxy___reduce__(PicklerMemoProxyObject *self) +/*[clinic checksum: 40f0bf7a9b161e77130674f0481bda0a0184dcce]*/ { PyObject *reduce_value, *dict_args; - PyObject *contents = pmp_copy(self); + PyObject *contents = _pickle_PicklerMemoProxy_copy(self); if (contents == NULL) return NULL; @@ -3650,9 +4285,9 @@ pmp_reduce(PicklerMemoProxyObject *self, PyObject *args) } static PyMethodDef picklerproxy_methods[] = { - {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc}, - {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc}, - {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc}, + _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF + _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF + _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF {NULL, NULL} /* sentinel */ }; @@ -3864,7 +4499,7 @@ static PyTypeObject Pickler_Type = { 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, - Pickler_doc, /*tp_doc*/ + _pickle_Pickler___init____doc__, /*tp_doc*/ (traverseproc)Pickler_traverse, /*tp_traverse*/ (inquiry)Pickler_clear, /*tp_clear*/ 0, /*tp_richcompare*/ @@ -3879,7 +4514,7 @@ static PyTypeObject Pickler_Type = { 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, /*tp_dictoffset*/ - (initproc)Pickler_init, /*tp_init*/ + Pickler_init, /*tp_init*/ PyType_GenericAlloc, /*tp_alloc*/ PyType_GenericNew, /*tp_new*/ PyObject_GC_Del, /*tp_free*/ @@ -3905,8 +4540,9 @@ find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name) static Py_ssize_t marker(UnpicklerObject *self) { + PickleState *st = _Pickle_GetGlobalState(); if (self->num_marks < 1) { - PyErr_SetString(UnpicklingError, "could not find MARK"); + PyErr_SetString(st->UnpicklingError, "could not find MARK"); return -1; } @@ -3923,7 +4559,8 @@ load_none(UnpicklerObject *self) static int bad_readline(void) { - PyErr_SetString(UnpicklingError, "pickle data was truncated"); + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->UnpicklingError, "pickle data was truncated"); return -1; } @@ -3984,17 +4621,15 @@ load_bool(UnpicklerObject *self, PyObject *boolean) * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX. */ static Py_ssize_t -calc_binsize(char *bytes, int size) +calc_binsize(char *bytes, int nbytes) { unsigned char *s = (unsigned char *)bytes; + int i; size_t x = 0; - assert(size == 4); - - x = (size_t) s[0]; - x |= (size_t) s[1] << 8; - x |= (size_t) s[2] << 16; - x |= (size_t) s[3] << 24; + for (i = 0; i < nbytes && i < sizeof(size_t); i++) { + x |= (size_t) s[i] << (8 * i); + } if (x > PY_SSIZE_T_MAX) return -1; @@ -4008,21 +4643,21 @@ calc_binsize(char *bytes, int size) * of x-platform bugs. */ static long -calc_binint(char *bytes, int size) +calc_binint(char *bytes, int nbytes) { unsigned char *s = (unsigned char *)bytes; - int i = size; + int i; long x = 0; - for (i = 0; i < size; i++) { - x |= (long)s[i] << (i * 8); + for (i = 0; i < nbytes; i++) { + x |= (long)s[i] << (8 * i); } /* Unlike BININT1 and BININT2, BININT (more accurately BININT4) * is signed, so on a box with longs bigger than 4 bytes we need * to extend a BININT's sign bit to the full width. */ - if (SIZEOF_LONG > 4 && size == 4) { + if (SIZEOF_LONG > 4 && nbytes == 4) { x |= -(x & (1L << 31)); } @@ -4120,8 +4755,9 @@ load_counted_long(UnpicklerObject *self, int size) size = calc_binint(nbytes, size); if (size < 0) { + PickleState *st = _Pickle_GetGlobalState(); /* Corrupt or hostile pickle -- we never write one like this */ - PyErr_SetString(UnpicklingError, + PyErr_SetString(st->UnpicklingError, "LONG pickle has negative byte count"); return -1; } @@ -4201,36 +4837,24 @@ load_string(UnpicklerObject *self) if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; - if (len < 2) - return bad_readline(); - if ((s = strdup(s)) == NULL) { - PyErr_NoMemory(); - return -1; - } - + /* Strip the newline */ + len--; /* Strip outermost quotes */ - while (len > 0 && s[len - 1] <= ' ') - len--; - if (len > 1 && s[0] == '"' && s[len - 1] == '"') { - s[len - 1] = '\0'; - p = s + 1; - len -= 2; - } - else if (len > 1 && s[0] == '\'' && s[len - 1] == '\'') { - s[len - 1] = '\0'; + if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) { p = s + 1; len -= 2; } else { - free(s); - PyErr_SetString(PyExc_ValueError, "insecure string pickle"); + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->UnpicklingError, + "the STRING opcode argument must be quoted"); return -1; } + assert(len >= 0); /* Use the PyBytes API to decode the string, since that is what is used to encode, and then coerce the result to Unicode. */ bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL); - free(s); if (bytes == NULL) return -1; str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors); @@ -4243,50 +4867,27 @@ load_string(UnpicklerObject *self) } static int -load_binbytes(UnpicklerObject *self) +load_counted_binbytes(UnpicklerObject *self, int nbytes) { PyObject *bytes; - Py_ssize_t x; + Py_ssize_t size; char *s; - if (_Unpickler_Read(self, &s, 4) < 0) + if (_Unpickler_Read(self, &s, nbytes) < 0) return -1; - x = calc_binsize(s, 4); - if (x < 0) { + size = calc_binsize(s, nbytes); + if (size < 0) { PyErr_Format(PyExc_OverflowError, "BINBYTES exceeds system's maximum size of %zd bytes", - PY_SSIZE_T_MAX - ); + PY_SSIZE_T_MAX); return -1; } - if (_Unpickler_Read(self, &s, x) < 0) - return -1; - bytes = PyBytes_FromStringAndSize(s, x); - if (bytes == NULL) - return -1; - - PDATA_PUSH(self->stack, bytes, -1); - return 0; -} - -static int -load_short_binbytes(UnpicklerObject *self) -{ - PyObject *bytes; - Py_ssize_t x; - char *s; - - if (_Unpickler_Read(self, &s, 1) < 0) - return -1; - - x = (unsigned char)s[0]; - - if (_Unpickler_Read(self, &s, x) < 0) + if (_Unpickler_Read(self, &s, size) < 0) return -1; - bytes = PyBytes_FromStringAndSize(s, x); + bytes = PyBytes_FromStringAndSize(s, size); if (bytes == NULL) return -1; @@ -4295,51 +4896,28 @@ load_short_binbytes(UnpicklerObject *self) } static int -load_binstring(UnpicklerObject *self) +load_counted_binstring(UnpicklerObject *self, int nbytes) { PyObject *str; - Py_ssize_t x; + Py_ssize_t size; char *s; - if (_Unpickler_Read(self, &s, 4) < 0) + if (_Unpickler_Read(self, &s, nbytes) < 0) return -1; - x = calc_binint(s, 4); - if (x < 0) { - PyErr_SetString(UnpicklingError, - "BINSTRING pickle has negative byte count"); + size = calc_binsize(s, nbytes); + if (size < 0) { + PickleState *st = _Pickle_GetGlobalState(); + PyErr_Format(st->UnpicklingError, + "BINSTRING exceeds system's maximum size of %zd bytes", + PY_SSIZE_T_MAX); return -1; } - if (_Unpickler_Read(self, &s, x) < 0) - return -1; - - /* Convert Python 2.x strings to unicode. */ - str = PyUnicode_Decode(s, x, self->encoding, self->errors); - if (str == NULL) - return -1; - - PDATA_PUSH(self->stack, str, -1); - return 0; -} - -static int -load_short_binstring(UnpicklerObject *self) -{ - PyObject *str; - Py_ssize_t x; - char *s; - - if (_Unpickler_Read(self, &s, 1) < 0) - return -1; - - x = (unsigned char)s[0]; - - if (_Unpickler_Read(self, &s, x) < 0) + if (_Unpickler_Read(self, &s, size) < 0) return -1; - /* Convert Python 2.x strings to unicode. */ - str = PyUnicode_Decode(s, x, self->encoding, self->errors); + str = PyUnicode_Decode(s, size, self->encoding, self->errors); if (str == NULL) return -1; @@ -4368,25 +4946,23 @@ load_unicode(UnpicklerObject *self) } static int -load_binunicode(UnpicklerObject *self) +load_counted_binunicode(UnpicklerObject *self, int nbytes) { PyObject *str; Py_ssize_t size; char *s; - if (_Unpickler_Read(self, &s, 4) < 0) + if (_Unpickler_Read(self, &s, nbytes) < 0) return -1; - size = calc_binsize(s, 4); + size = calc_binsize(s, nbytes); if (size < 0) { PyErr_Format(PyExc_OverflowError, "BINUNICODE exceeds system's maximum size of %zd bytes", - PY_SSIZE_T_MAX - ); + PY_SSIZE_T_MAX); return -1; } - if (_Unpickler_Read(self, &s, size) < 0) return -1; @@ -4458,6 +5034,17 @@ load_empty_dict(UnpicklerObject *self) } static int +load_empty_set(UnpicklerObject *self) +{ + PyObject *set; + + if ((set = PySet_New(NULL)) == NULL) + return -1; + PDATA_PUSH(self->stack, set, -1); + return 0; +} + +static int load_list(UnpicklerObject *self) { PyObject *list; @@ -4499,6 +5086,29 @@ load_dict(UnpicklerObject *self) return 0; } +static int +load_frozenset(UnpicklerObject *self) +{ + PyObject *items; + PyObject *frozenset; + Py_ssize_t i; + + if ((i = marker(self)) < 0) + return -1; + + items = Pdata_poptuple(self->stack, i); + if (items == NULL) + return -1; + + frozenset = PyFrozenSet_New(items); + Py_DECREF(items); + if (frozenset == NULL) + return -1; + + PDATA_PUSH(self->stack, frozenset, -1); + return 0; +} + static PyObject * instantiate(PyObject *cls, PyObject *args) { @@ -4606,6 +5216,7 @@ load_newobj(UnpicklerObject *self) PyObject *clsraw = NULL; PyTypeObject *cls; /* clsraw cast to its true type */ PyObject *obj; + PickleState *st = _Pickle_GetGlobalState(); /* Stack is ... cls argtuple, and we want to call * cls.__new__(cls, *argtuple). @@ -4614,7 +5225,8 @@ load_newobj(UnpicklerObject *self) if (args == NULL) goto error; if (!PyTuple_Check(args)) { - PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple."); + PyErr_SetString(st->UnpicklingError, + "NEWOBJ expected an arg " "tuple."); goto error; } @@ -4623,12 +5235,12 @@ load_newobj(UnpicklerObject *self) if (cls == NULL) goto error; if (!PyType_Check(cls)) { - PyErr_SetString(UnpicklingError, "NEWOBJ class argument " + PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument " "isn't a type object"); goto error; } if (cls->tp_new == NULL) { - PyErr_SetString(UnpicklingError, "NEWOBJ class argument " + PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument " "has NULL tp_new"); goto error; } @@ -4650,6 +5262,58 @@ load_newobj(UnpicklerObject *self) } static int +load_newobj_ex(UnpicklerObject *self) +{ + PyObject *cls, *args, *kwargs; + PyObject *obj; + PickleState *st = _Pickle_GetGlobalState(); + + PDATA_POP(self->stack, kwargs); + if (kwargs == NULL) { + return -1; + } + PDATA_POP(self->stack, args); + if (args == NULL) { + Py_DECREF(kwargs); + return -1; + } + PDATA_POP(self->stack, cls); + if (cls == NULL) { + Py_DECREF(kwargs); + Py_DECREF(args); + return -1; + } + + if (!PyType_Check(cls)) { + Py_DECREF(kwargs); + Py_DECREF(args); + Py_DECREF(cls); + PyErr_Format(st->UnpicklingError, + "NEWOBJ_EX class argument must be a type, not %.200s", + Py_TYPE(cls)->tp_name); + return -1; + } + + if (((PyTypeObject *)cls)->tp_new == NULL) { + Py_DECREF(kwargs); + Py_DECREF(args); + Py_DECREF(cls); + PyErr_SetString(st->UnpicklingError, + "NEWOBJ_EX class argument doesn't have __new__"); + return -1; + } + obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs); + Py_DECREF(kwargs); + Py_DECREF(args); + Py_DECREF(cls); + if (obj == NULL) { + return -1; + } + PDATA_PUSH(self->stack, obj, -1); + return 0; +} + +static int load_global(UnpicklerObject *self) { PyObject *global = NULL; @@ -4686,6 +5350,32 @@ load_global(UnpicklerObject *self) } static int +load_stack_global(UnpicklerObject *self) +{ + PyObject *global; + PyObject *module_name; + PyObject *global_name; + + PDATA_POP(self->stack, global_name); + PDATA_POP(self->stack, module_name); + if (module_name == NULL || !PyUnicode_CheckExact(module_name) || + global_name == NULL || !PyUnicode_CheckExact(global_name)) { + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str"); + Py_XDECREF(global_name); + Py_XDECREF(module_name); + return -1; + } + global = find_class(self, module_name, global_name); + Py_DECREF(global_name); + Py_DECREF(module_name); + if (global == NULL) + return -1; + PDATA_PUSH(self->stack, global, -1); + return 0; +} + +static int load_persid(UnpicklerObject *self) { PyObject *pid; @@ -4702,9 +5392,9 @@ load_persid(UnpicklerObject *self) if (pid == NULL) return -1; - /* Ugh... this does not leak since _Unpickler_FastCall() steals the - reference to pid first. */ - pid = _Unpickler_FastCall(self, self->pers_func, pid); + /* This does not leak since _Pickle_FastCall() steals the reference + to pid first. */ + pid = _Pickle_FastCall(self->pers_func, pid); if (pid == NULL) return -1; @@ -4712,7 +5402,8 @@ load_persid(UnpicklerObject *self) return 0; } else { - PyErr_SetString(UnpicklingError, + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->UnpicklingError, "A load persistent id instruction was encountered,\n" "but no persistent_load function was specified."); return -1; @@ -4729,9 +5420,9 @@ load_binpersid(UnpicklerObject *self) if (pid == NULL) return -1; - /* Ugh... this does not leak since _Unpickler_FastCall() steals the + /* This does not leak since _Pickle_FastCall() steals the reference to pid first. */ - pid = _Unpickler_FastCall(self, self->pers_func, pid); + pid = _Pickle_FastCall(self->pers_func, pid); if (pid == NULL) return -1; @@ -4739,7 +5430,8 @@ load_binpersid(UnpicklerObject *self) return 0; } else { - PyErr_SetString(UnpicklingError, + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->UnpicklingError, "A load persistent id instruction was encountered,\n" "but no persistent_load function was specified."); return -1; @@ -4895,6 +5587,7 @@ load_extension(UnpicklerObject *self, int nbytes) PyObject *obj; /* the object to push */ PyObject *pair; /* (module_name, class_name) */ PyObject *module_name, *class_name; + PickleState *st = _Pickle_GetGlobalState(); assert(nbytes == 1 || nbytes == 2 || nbytes == 4); if (_Unpickler_Read(self, &codebytes, nbytes) < 0) @@ -4902,7 +5595,7 @@ load_extension(UnpicklerObject *self, int nbytes) code = calc_binint(codebytes, nbytes); if (code <= 0) { /* note that 0 is forbidden */ /* Corrupt or hostile pickle. */ - PyErr_SetString(UnpicklingError, "EXT specifies code <= 0"); + PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0"); return -1; } @@ -4910,20 +5603,26 @@ load_extension(UnpicklerObject *self, int nbytes) py_code = PyLong_FromLong(code); if (py_code == NULL) return -1; - obj = PyDict_GetItem(extension_cache, py_code); + obj = PyDict_GetItemWithError(st->extension_cache, py_code); if (obj != NULL) { /* Bingo. */ Py_DECREF(py_code); PDATA_APPEND(self->stack, obj, -1); return 0; } + if (PyErr_Occurred()) { + Py_DECREF(py_code); + return -1; + } /* Look up the (module_name, class_name) pair. */ - pair = PyDict_GetItem(inverted_registry, py_code); + pair = PyDict_GetItemWithError(st->inverted_registry, py_code); if (pair == NULL) { Py_DECREF(py_code); - PyErr_Format(PyExc_ValueError, "unregistered extension " - "code %ld", code); + if (!PyErr_Occurred()) { + PyErr_Format(PyExc_ValueError, "unregistered extension " + "code %ld", code); + } return -1; } /* Since the extension registry is manipulable via Python code, @@ -4944,7 +5643,7 @@ load_extension(UnpicklerObject *self, int nbytes) return -1; } /* Cache code -> obj. */ - code = PyDict_SetItem(extension_cache, py_code, obj); + code = PyDict_SetItem(st->extension_cache, py_code, obj); Py_DECREF(py_code); if (code < 0) { Py_DECREF(obj); @@ -5029,6 +5728,18 @@ load_long_binput(UnpicklerObject *self) } static int +load_memoize(UnpicklerObject *self) +{ + PyObject *value; + + if (Py_SIZE(self->stack) <= 0) + return stack_underflow(); + value = self->stack->data[Py_SIZE(self->stack) - 1]; + + return _Unpickler_MemoPut(self, self->memo_len, value); +} + +static int do_append(UnpicklerObject *self, Py_ssize_t x) { PyObject *value; @@ -5067,7 +5778,7 @@ do_append(UnpicklerObject *self, Py_ssize_t x) PyObject *result; value = self->stack->data[i]; - result = _Unpickler_FastCall(self, append_func, value); + result = _Pickle_FastCall(append_func, value); if (result == NULL) { Pdata_clear(self->stack, i + 1); Py_SIZE(self->stack) = x; @@ -5109,8 +5820,10 @@ do_setitems(UnpicklerObject *self, Py_ssize_t x) if (len == x) /* nothing to do */ return 0; if ((len - x) % 2 != 0) { + PickleState *st = _Pickle_GetGlobalState(); /* Currupt or hostile pickle -- we never write one like this. */ - PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS"); + PyErr_SetString(st->UnpicklingError, + "odd number of items for SETITEMS"); return -1; } @@ -5144,6 +5857,59 @@ load_setitems(UnpicklerObject *self) } static int +load_additems(UnpicklerObject *self) +{ + PyObject *set; + Py_ssize_t mark, len, i; + + mark = marker(self); + len = Py_SIZE(self->stack); + if (mark > len || mark <= 0) + return stack_underflow(); + if (len == mark) /* nothing to do */ + return 0; + + set = self->stack->data[mark - 1]; + + if (PySet_Check(set)) { + PyObject *items; + int status; + + items = Pdata_poptuple(self->stack, mark); + if (items == NULL) + return -1; + + status = _PySet_Update(set, items); + Py_DECREF(items); + return status; + } + else { + PyObject *add_func; + _Py_IDENTIFIER(add); + + add_func = _PyObject_GetAttrId(set, &PyId_add); + if (add_func == NULL) + return -1; + for (i = mark; i < len; i++) { + PyObject *result; + PyObject *item; + + item = self->stack->data[i]; + result = _Pickle_FastCall(add_func, item); + if (result == NULL) { + Pdata_clear(self->stack, i + 1); + Py_SIZE(self->stack) = mark; + return -1; + } + Py_DECREF(result); + } + Py_SIZE(self->stack) = mark; + } + + return 0; +} + +static int load_build(UnpicklerObject *self) { PyObject *state, *inst, *slotstate; @@ -5176,9 +5942,7 @@ load_build(UnpicklerObject *self) PyObject *result; /* The explicit __setstate__ is responsible for everything. */ - /* Ugh... this does not leak since _Unpickler_FastCall() steals the - reference to state first. */ - result = _Unpickler_FastCall(self, setstate, state); + result = _Pickle_FastCall(setstate, state); Py_DECREF(setstate); if (result == NULL) return -1; @@ -5209,7 +5973,8 @@ load_build(UnpicklerObject *self) _Py_IDENTIFIER(__dict__); if (!PyDict_Check(state)) { - PyErr_SetString(UnpicklingError, "state is not a dictionary"); + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->UnpicklingError, "state is not a dictionary"); goto error; } dict = _PyObject_GetAttrId(inst, &PyId___dict__); @@ -5238,7 +6003,8 @@ load_build(UnpicklerObject *self) Py_ssize_t i; if (!PyDict_Check(slotstate)) { - PyErr_SetString(UnpicklingError, + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->UnpicklingError, "slot state is not a dictionary"); goto error; } @@ -5344,14 +6110,39 @@ load_proto(UnpicklerObject *self) return -1; } +static int +load_frame(UnpicklerObject *self) +{ + char *s; + Py_ssize_t frame_len; + + if (_Unpickler_Read(self, &s, 8) < 0) + return -1; + + frame_len = calc_binsize(s, 8); + if (frame_len < 0) { + PyErr_Format(PyExc_OverflowError, + "FRAME length exceeds system's maximum of %zd bytes", + PY_SSIZE_T_MAX); + return -1; + } + + if (_Unpickler_Read(self, &s, frame_len) < 0) + return -1; + + /* Rewind to start of frame */ + self->next_read_idx -= frame_len; + return 0; +} + static PyObject * load(UnpicklerObject *self) { - PyObject *err; PyObject *value = NULL; char *s; self->num_marks = 0; + self->proto = 0; if (Py_SIZE(self->stack)) Pdata_clear(self->stack, 0); @@ -5377,13 +6168,16 @@ load(UnpicklerObject *self) OP_ARG(LONG4, load_counted_long, 4) OP(FLOAT, load_float) OP(BINFLOAT, load_binfloat) - OP(BINBYTES, load_binbytes) - OP(SHORT_BINBYTES, load_short_binbytes) - OP(BINSTRING, load_binstring) - OP(SHORT_BINSTRING, load_short_binstring) + OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1) + OP_ARG(BINBYTES, load_counted_binbytes, 4) + OP_ARG(BINBYTES8, load_counted_binbytes, 8) + OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1) + OP_ARG(BINSTRING, load_counted_binstring, 4) OP(STRING, load_string) OP(UNICODE, load_unicode) - OP(BINUNICODE, load_binunicode) + OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1) + OP_ARG(BINUNICODE, load_counted_binunicode, 4) + OP_ARG(BINUNICODE8, load_counted_binunicode, 8) OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0) OP_ARG(TUPLE1, load_counted_tuple, 1) OP_ARG(TUPLE2, load_counted_tuple, 2) @@ -5393,10 +6187,15 @@ load(UnpicklerObject *self) OP(LIST, load_list) OP(EMPTY_DICT, load_empty_dict) OP(DICT, load_dict) + OP(EMPTY_SET, load_empty_set) + OP(ADDITEMS, load_additems) + OP(FROZENSET, load_frozenset) OP(OBJ, load_obj) OP(INST, load_inst) OP(NEWOBJ, load_newobj) + OP(NEWOBJ_EX, load_newobj_ex) OP(GLOBAL, load_global) + OP(STACK_GLOBAL, load_stack_global) OP(APPEND, load_append) OP(APPENDS, load_appends) OP(BUILD, load_build) @@ -5408,6 +6207,7 @@ load(UnpicklerObject *self) OP(BINPUT, load_binput) OP(LONG_BINPUT, load_long_binput) OP(PUT, load_put) + OP(MEMOIZE, load_memoize) OP(POP, load_pop) OP(POP_MARK, load_pop_mark) OP(SETITEM, load_setitem) @@ -5416,6 +6216,7 @@ load(UnpicklerObject *self) OP(BINPERSID, load_binpersid) OP(REDUCE, load_reduce) OP(PROTO, load_proto) + OP(FRAME, load_frame) OP_ARG(EXT1, load_extension, 1) OP_ARG(EXT2, load_extension, 2) OP_ARG(EXT4, load_extension, 4) @@ -5426,81 +6227,137 @@ load(UnpicklerObject *self) break; default: - if (s[0] == '\0') + if (s[0] == '\0') { PyErr_SetNone(PyExc_EOFError); - else - PyErr_Format(UnpicklingError, + } + else { + PickleState *st = _Pickle_GetGlobalState(); + PyErr_Format(st->UnpicklingError, "invalid load key, '%c'.", s[0]); + } return NULL; } break; /* and we are done! */ } - if (_Unpickler_SkipConsumed(self) < 0) + if (PyErr_Occurred()) { return NULL; + } - /* XXX: It is not clear what this is actually for. */ - if ((err = PyErr_Occurred())) { - if (err == PyExc_EOFError) { - PyErr_SetNone(PyExc_EOFError); - } + if (_Unpickler_SkipConsumed(self) < 0) return NULL; - } PDATA_POP(self->stack, value); return value; } -PyDoc_STRVAR(Unpickler_load_doc, -"load() -> object. Load a pickle." +/*[clinic] + +_pickle.Unpickler.load + +Load a pickle. + +Read a pickled object representation from the open file object given in +the constructor, and return the reconstituted object hierarchy specified +therein. +[clinic]*/ + +PyDoc_STRVAR(_pickle_Unpickler_load__doc__, +"load()\n" +"Load a pickle.\n" "\n" "Read a pickled object representation from the open file object given in\n" "the constructor, and return the reconstituted object hierarchy specified\n" -"therein.\n"); +"therein."); + +#define _PICKLE_UNPICKLER_LOAD_METHODDEF \ + {"load", (PyCFunction)_pickle_Unpickler_load, METH_NOARGS, _pickle_Unpickler_load__doc__}, static PyObject * -Unpickler_load(UnpicklerObject *self) +_pickle_Unpickler_load(PyObject *self) +/*[clinic checksum: 9a30ba4e4d9221d4dcd705e1471ab11b2c9e3ac6]*/ { + UnpicklerObject *unpickler = (UnpicklerObject*)self; + /* Check whether the Unpickler was initialized correctly. This prevents segfaulting if a subclass overridden __init__ with a function that does not call Unpickler.__init__(). Here, we simply ensure that self->read is not NULL. */ - if (self->read == NULL) { - PyErr_Format(UnpicklingError, + if (unpickler->read == NULL) { + PickleState *st = _Pickle_GetGlobalState(); + PyErr_Format(st->UnpicklingError, "Unpickler.__init__() was not called by %s.__init__()", - Py_TYPE(self)->tp_name); + Py_TYPE(unpickler)->tp_name); return NULL; } - return load(self); + return load(unpickler); } /* The name of find_class() is misleading. In newer pickle protocols, this function is used for loading any global (i.e., functions), not just classes. The name is kept only for backward compatibility. */ -PyDoc_STRVAR(Unpickler_find_class_doc, -"find_class(module_name, global_name) -> object.\n" +/*[clinic] + +_pickle.Unpickler.find_class + + self: UnpicklerObject + module_name: object + global_name: object + / + +Return an object from a specified module. + +If necessary, the module will be imported. Subclasses may override this +method (e.g. to restrict unpickling of arbitrary classes and functions). + +This method is called whenever a class or a function object is +needed. Both arguments passed are str objects. +[clinic]*/ + +PyDoc_STRVAR(_pickle_Unpickler_find_class__doc__, +"find_class(module_name, global_name)\n" +"Return an object from a specified module.\n" "\n" -"Return an object from a specified module, importing the module if\n" -"necessary. Subclasses may override this method (e.g. to restrict\n" -"unpickling of arbitrary classes and functions).\n" +"If necessary, the module will be imported. Subclasses may override this\n" +"method (e.g. to restrict unpickling of arbitrary classes and functions).\n" "\n" "This method is called whenever a class or a function object is\n" -"needed. Both arguments passed are str objects.\n"); +"needed. Both arguments passed are str objects."); + +#define _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF \ + {"find_class", (PyCFunction)_pickle_Unpickler_find_class, METH_VARARGS, _pickle_Unpickler_find_class__doc__}, static PyObject * -Unpickler_find_class(UnpicklerObject *self, PyObject *args) +_pickle_Unpickler_find_class_impl(UnpicklerObject *self, PyObject *module_name, PyObject *global_name); + +static PyObject * +_pickle_Unpickler_find_class(PyObject *self, PyObject *args) +{ + PyObject *return_value = NULL; + PyObject *module_name; + PyObject *global_name; + + if (!PyArg_ParseTuple(args, + "OO:find_class", + &module_name, &global_name)) + goto exit; + return_value = _pickle_Unpickler_find_class_impl((UnpicklerObject *)self, module_name, global_name); + +exit: + return return_value; +} + +static PyObject * +_pickle_Unpickler_find_class_impl(UnpicklerObject *self, PyObject *module_name, PyObject *global_name) +/*[clinic checksum: b7d05d4dd8adc698e5780c1ac2be0f5062d33915]*/ { PyObject *global; PyObject *modules_dict; PyObject *module; - PyObject *module_name, *global_name; - - if (!PyArg_UnpackTuple(args, "find_class", 2, 2, - &module_name, &global_name)) - return NULL; + _Py_IDENTIFIER(modules); /* Try to map the old names used in Python 2.x to the new ones used in Python 3.x. We do this only with old pickle protocols and when the @@ -5508,13 +6365,14 @@ Unpickler_find_class(UnpicklerObject *self, PyObject *args) if (self->proto < 3 && self->fix_imports) { PyObject *key; PyObject *item; + PickleState *st = _Pickle_GetGlobalState(); /* Check if the global (i.e., a function or a class) was renamed or moved to another module. */ key = PyTuple_Pack(2, module_name, global_name); if (key == NULL) return NULL; - item = PyDict_GetItemWithError(name_mapping_2to3, key); + item = PyDict_GetItemWithError(st->name_mapping_2to3, key); Py_DECREF(key); if (item) { if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { @@ -5540,7 +6398,7 @@ Unpickler_find_class(UnpicklerObject *self, PyObject *args) } /* Check if the module was renamed. */ - item = PyDict_GetItemWithError(import_mapping_2to3, module_name); + item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name); if (item) { if (!PyUnicode_Check(item)) { PyErr_Format(PyExc_RuntimeError, @@ -5555,9 +6413,11 @@ Unpickler_find_class(UnpicklerObject *self, PyObject *args) } } - modules_dict = PySys_GetObject("modules"); - if (modules_dict == NULL) + modules_dict = _PySys_GetObjectId(&PyId_modules); + if (modules_dict == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules"); return NULL; + } module = PyDict_GetItemWithError(modules_dict, module_name); if (module == NULL) { @@ -5566,20 +6426,18 @@ Unpickler_find_class(UnpicklerObject *self, PyObject *args) module = PyImport_Import(module_name); if (module == NULL) return NULL; - global = PyObject_GetAttr(module, global_name); + global = getattribute(module, global_name, self->proto >= 4); Py_DECREF(module); } else { - global = PyObject_GetAttr(module, global_name); + global = getattribute(module, global_name, self->proto >= 4); } return global; } static struct PyMethodDef Unpickler_methods[] = { - {"load", (PyCFunction)Unpickler_load, METH_NOARGS, - Unpickler_load_doc}, - {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS, - Unpickler_find_class_doc}, + _PICKLE_UNPICKLER_LOAD_METHODDEF + _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -5592,7 +6450,6 @@ Unpickler_dealloc(UnpicklerObject *self) Py_XDECREF(self->peek); Py_XDECREF(self->stack); Py_XDECREF(self->pers_func); - Py_XDECREF(self->arg); if (self->buffer.buf != NULL) { PyBuffer_Release(&self->buffer); self->buffer.buf = NULL; @@ -5601,8 +6458,8 @@ Unpickler_dealloc(UnpicklerObject *self) _Unpickler_MemoCleanup(self); PyMem_Free(self->marks); PyMem_Free(self->input_line); - free(self->encoding); - free(self->errors); + PyMem_Free(self->encoding); + PyMem_Free(self->errors); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -5615,7 +6472,6 @@ Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg) Py_VISIT(self->peek); Py_VISIT(self->stack); Py_VISIT(self->pers_func); - Py_VISIT(self->arg); return 0; } @@ -5627,7 +6483,6 @@ Unpickler_clear(UnpicklerObject *self) Py_CLEAR(self->peek); Py_CLEAR(self->stack); Py_CLEAR(self->pers_func); - Py_CLEAR(self->arg); if (self->buffer.buf != NULL) { PyBuffer_Release(&self->buffer); self->buffer.buf = NULL; @@ -5638,17 +6493,49 @@ Unpickler_clear(UnpicklerObject *self) self->marks = NULL; PyMem_Free(self->input_line); self->input_line = NULL; - free(self->encoding); + PyMem_Free(self->encoding); self->encoding = NULL; - free(self->errors); + PyMem_Free(self->errors); self->errors = NULL; return 0; } -PyDoc_STRVAR(Unpickler_doc, -"Unpickler(file, *, encoding='ASCII', errors='strict')" -"\n" +/*[clinic] + +_pickle.Unpickler.__init__ + + self: UnpicklerObject + file: object + * + fix_imports: bool = True + encoding: str = 'ASCII' + errors: str = 'strict' + +This takes a binary file for reading a pickle data stream. + +The protocol version of the pickle is detected automatically, so no +proto argument is needed. + +The file-like object must have two methods, a read() method +that takes an integer argument, and a readline() method that +requires no arguments. Both methods should return bytes. +Thus file-like object can be a binary file object opened for +reading, a BytesIO object, or any other custom object that +meets this interface. + +Optional keyword arguments are *fix_imports*, *encoding* and *errors*, +which are used to control compatiblity support for pickle stream +generated by Python 2.x. If *fix_imports* is True, pickle will try to +map the old Python 2.x names to the new names used in Python 3.x. The +*encoding* and *errors* tell pickle how to decode 8-bit string +instances pickled by Python 2.x; these default to 'ASCII' and +'strict', respectively. + +[clinic]*/ + +PyDoc_STRVAR(_pickle_Unpickler___init____doc__, +"__init__(file, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\')\n" "This takes a binary file for reading a pickle data stream.\n" "\n" "The protocol version of the pickle is detected automatically, so no\n" @@ -5666,57 +6553,60 @@ PyDoc_STRVAR(Unpickler_doc, "generated by Python 2.x. If *fix_imports* is True, pickle will try to\n" "map the old Python 2.x names to the new names used in Python 3.x. The\n" "*encoding* and *errors* tell pickle how to decode 8-bit string\n" -"instances pickled by Python 2.x; these default to 'ASCII' and\n" -"'strict', respectively.\n"); +"instances pickled by Python 2.x; these default to \'ASCII\' and\n" +"\'strict\', respectively."); -static int -Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) +#define _PICKLE_UNPICKLER___INIT___METHODDEF \ + {"__init__", (PyCFunction)_pickle_Unpickler___init__, METH_VARARGS|METH_KEYWORDS, _pickle_Unpickler___init____doc__}, + +static PyObject * +_pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file, int fix_imports, const char *encoding, const char *errors); + +static PyObject * +_pickle_Unpickler___init__(PyObject *self, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0}; + PyObject *return_value = NULL; + static char *_keywords[] = {"file", "fix_imports", "encoding", "errors", NULL}; PyObject *file; - PyObject *fix_imports = Py_True; - char *encoding = NULL; - char *errors = NULL; - _Py_IDENTIFIER(persistent_load); + int fix_imports = 1; + const char *encoding = "ASCII"; + const char *errors = "strict"; - /* XXX: That is an horrible error message. But, I don't know how to do - better... */ - if (Py_SIZE(args) != 1) { - PyErr_Format(PyExc_TypeError, - "%s takes exactly one positional argument (%zd given)", - Py_TYPE(self)->tp_name, Py_SIZE(args)); - return -1; - } + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "O|$pss:__init__", _keywords, + &file, &fix_imports, &encoding, &errors)) + goto exit; + return_value = _pickle_Unpickler___init___impl((UnpicklerObject *)self, file, fix_imports, encoding, errors); - /* Arguments parsing needs to be done in the __init__() method to allow - subclasses to define their own __init__() method, which may (or may - not) support Unpickler arguments. However, this means we need to be - extra careful in the other Unpickler methods, since a subclass could - forget to call Unpickler.__init__() thus breaking our internal - invariants. */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist, - &file, &fix_imports, &encoding, &errors)) - return -1; +exit: + return return_value; +} + +static PyObject * +_pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file, int fix_imports, const char *encoding, const char *errors) +/*[clinic checksum: bed0d8bbe1c647960ccc6f997b33bf33935fa56f]*/ +{ + _Py_IDENTIFIER(persistent_load); /* In case of multiple __init__() calls, clear previous content. */ if (self->read != NULL) (void)Unpickler_clear(self); if (_Unpickler_SetInputStream(self, file) < 0) - return -1; + return NULL; if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0) - return -1; + return NULL; - self->fix_imports = PyObject_IsTrue(fix_imports); + self->fix_imports = fix_imports; if (self->fix_imports == -1) - return -1; + return NULL; if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) { self->pers_func = _PyObject_GetAttrId((PyObject *)self, &PyId_persistent_load); if (self->pers_func == NULL) - return -1; + return NULL; } else { self->pers_func = NULL; @@ -5724,16 +6614,27 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) self->stack = (Pdata *)Pdata_New(); if (self->stack == NULL) - return -1; + return NULL; self->memo_size = 32; self->memo = _Unpickler_NewMemo(self->memo_size); if (self->memo == NULL) - return -1; + return NULL; - self->arg = NULL; self->proto = 0; + Py_RETURN_NONE; +} + +/* Wrap the Clinic generated signature to slot it in tp_init. */ +static int +Unpickler_init(PyObject *self, PyObject *args, PyObject *kwargs) +{ + PyObject *result = _pickle_Unpickler___init__(self, args, kwargs); + if (result == NULL) { + return -1; + } + Py_DECREF(result); return 0; } @@ -5755,11 +6656,24 @@ typedef struct { UnpicklerObject *unpickler; } UnpicklerMemoProxyObject; -PyDoc_STRVAR(ump_clear_doc, -"memo.clear() -> None. Remove all items from memo."); +/*[clinic] +_pickle.UnpicklerMemoProxy.clear + + self: UnpicklerMemoProxyObject + +Remove all items from memo. +[clinic]*/ + +PyDoc_STRVAR(_pickle_UnpicklerMemoProxy_clear__doc__, +"clear()\n" +"Remove all items from memo."); + +#define _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF \ + {"clear", (PyCFunction)_pickle_UnpicklerMemoProxy_clear, METH_NOARGS, _pickle_UnpicklerMemoProxy_clear__doc__}, static PyObject * -ump_clear(UnpicklerMemoProxyObject *self) +_pickle_UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self) +/*[clinic checksum: 46fecf4e33c0c873124f845edf6cc3a2e9864bd5]*/ { _Unpickler_MemoCleanup(self->unpickler); self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size); @@ -5768,11 +6682,24 @@ ump_clear(UnpicklerMemoProxyObject *self) Py_RETURN_NONE; } -PyDoc_STRVAR(ump_copy_doc, -"memo.copy() -> new_memo. Copy the memo to a new object."); +/*[clinic] +_pickle.UnpicklerMemoProxy.copy + + self: UnpicklerMemoProxyObject + +Copy the memo to a new object. +[clinic]*/ + +PyDoc_STRVAR(_pickle_UnpicklerMemoProxy_copy__doc__, +"copy()\n" +"Copy the memo to a new object."); + +#define _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF \ + {"copy", (PyCFunction)_pickle_UnpicklerMemoProxy_copy, METH_NOARGS, _pickle_UnpicklerMemoProxy_copy__doc__}, static PyObject * -ump_copy(UnpicklerMemoProxyObject *self) +_pickle_UnpicklerMemoProxy_copy(UnpicklerMemoProxyObject *self) +/*[clinic checksum: f8856c4e8a33540886dfbb245f286af3008fa0ad]*/ { Py_ssize_t i; PyObject *new_memo = PyDict_New(); @@ -5802,15 +6729,28 @@ error: return NULL; } -PyDoc_STRVAR(ump_reduce_doc, -"memo.__reduce__(). Pickling support."); +/*[clinic] +_pickle.UnpicklerMemoProxy.__reduce__ + + self: UnpicklerMemoProxyObject + +Implement pickling support. +[clinic]*/ + +PyDoc_STRVAR(_pickle_UnpicklerMemoProxy___reduce____doc__, +"__reduce__()\n" +"Implement pickling support."); + +#define _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF \ + {"__reduce__", (PyCFunction)_pickle_UnpicklerMemoProxy___reduce__, METH_NOARGS, _pickle_UnpicklerMemoProxy___reduce____doc__}, static PyObject * -ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args) +_pickle_UnpicklerMemoProxy___reduce__(UnpicklerMemoProxyObject *self) +/*[clinic checksum: ab5516a77659144e1191c7dd70a0c6c7455660bc]*/ { PyObject *reduce_value; PyObject *constructor_args; - PyObject *contents = ump_copy(self); + PyObject *contents = _pickle_UnpicklerMemoProxy_copy(self); if (contents == NULL) return NULL; @@ -5833,9 +6773,9 @@ ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args) } static PyMethodDef unpicklerproxy_methods[] = { - {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc}, - {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc}, - {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc}, + _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF + _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF + _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF {NULL, NULL} /* sentinel */ }; @@ -6059,7 +6999,7 @@ static PyTypeObject Unpickler_Type = { 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, - Unpickler_doc, /*tp_doc*/ + _pickle_Unpickler___init____doc__, /*tp_doc*/ (traverseproc)Unpickler_traverse, /*tp_traverse*/ (inquiry)Unpickler_clear, /*tp_clear*/ 0, /*tp_richcompare*/ @@ -6074,21 +7014,53 @@ static PyTypeObject Unpickler_Type = { 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, /*tp_dictoffset*/ - (initproc)Unpickler_init, /*tp_init*/ + Unpickler_init, /*tp_init*/ PyType_GenericAlloc, /*tp_alloc*/ PyType_GenericNew, /*tp_new*/ PyObject_GC_Del, /*tp_free*/ 0, /*tp_is_gc*/ }; -PyDoc_STRVAR(pickle_dump_doc, -"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n" +/*[clinic] + +_pickle.dump + + obj: object + file: object + protocol: object = NULL + * + fix_imports: bool = True + +Write a pickled representation of obj to the open file object file. + +This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more +efficient. + +The optional protocol argument tells the pickler to use the given protocol +supported protocols are 0, 1, 2, 3. The default protocol is 3; a +backward-incompatible protocol designed for Python 3.0. + +Specifying a negative protocol version selects the highest protocol version +supported. The higher the protocol used, the more recent the version of +Python needed to read the pickle produced. + +The file argument must have a write() method that accepts a single bytes +argument. It can thus be a file object opened for binary writing, a +io.BytesIO instance, or any other custom object that meets this interface. + +If fix_imports is True and protocol is less than 3, pickle will try to +map the new Python 3.x names to the old module names used in Python 2.x, +so that the pickle data stream is readable with Python 2.x. +[clinic]*/ + +PyDoc_STRVAR(_pickle_dump__doc__, +"dump(obj, file, protocol=None, *, fix_imports=True)\n" +"Write a pickled representation of obj to the open file object file.\n" "\n" -"Write a pickled representation of obj to the open file object file. This\n" -"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n" +"This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n" "efficient.\n" "\n" -"The optional protocol argument tells the pickler to use the given protocol;\n" +"The optional protocol argument tells the pickler to use the given protocol\n" "supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n" "backward-incompatible protocol designed for Python 3.0.\n" "\n" @@ -6102,35 +7074,44 @@ PyDoc_STRVAR(pickle_dump_doc, "\n" "If fix_imports is True and protocol is less than 3, pickle will try to\n" "map the new Python 3.x names to the old module names used in Python 2.x,\n" -"so that the pickle data stream is readable with Python 2.x.\n"); +"so that the pickle data stream is readable with Python 2.x."); + +#define _PICKLE_DUMP_METHODDEF \ + {"dump", (PyCFunction)_pickle_dump, METH_VARARGS|METH_KEYWORDS, _pickle_dump__doc__}, static PyObject * -pickle_dump(PyObject *self, PyObject *args, PyObject *kwds) +_pickle_dump_impl(PyModuleDef *module, PyObject *obj, PyObject *file, PyObject *protocol, int fix_imports); + +static PyObject * +_pickle_dump(PyModuleDef *module, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0}; + PyObject *return_value = NULL; + static char *_keywords[] = {"obj", "file", "protocol", "fix_imports", NULL}; PyObject *obj; PyObject *file; - PyObject *proto = NULL; - PyObject *fix_imports = Py_True; - PicklerObject *pickler; + PyObject *protocol = NULL; + int fix_imports = 1; - /* fix_imports is a keyword-only argument. */ - if (Py_SIZE(args) > 3) { - PyErr_Format(PyExc_TypeError, - "pickle.dump() takes at most 3 positional " - "argument (%zd given)", Py_SIZE(args)); - return NULL; - } + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "OO|O$p:dump", _keywords, + &obj, &file, &protocol, &fix_imports)) + goto exit; + return_value = _pickle_dump_impl(module, obj, file, protocol, fix_imports); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist, - &obj, &file, &proto, &fix_imports)) - return NULL; +exit: + return return_value; +} + +static PyObject * +_pickle_dump_impl(PyModuleDef *module, PyObject *obj, PyObject *file, PyObject *protocol, int fix_imports) +/*[clinic checksum: e442721b16052d921b5e3fbd146d0a62e94a459e]*/ +{ + PicklerObject *pickler = _Pickler_New(); - pickler = _Pickler_New(); if (pickler == NULL) return NULL; - if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0) + if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0) goto error; if (_Pickler_SetOutputStream(pickler, file) < 0) @@ -6150,11 +7131,33 @@ pickle_dump(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } -PyDoc_STRVAR(pickle_dumps_doc, -"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n" -"\n" -"Return the pickled representation of the object as a bytes\n" -"object, instead of writing it to a file.\n" +/*[clinic] + +_pickle.dumps + + obj: object + protocol: object = NULL + * + fix_imports: bool = True + +Return the pickled representation of the object as a bytes object. + +The optional protocol argument tells the pickler to use the given protocol; +supported protocols are 0, 1, 2, 3. The default protocol is 3; a +backward-incompatible protocol designed for Python 3.0. + +Specifying a negative protocol version selects the highest protocol version +supported. The higher the protocol used, the more recent the version of +Python needed to read the pickle produced. + +If fix_imports is True and *protocol* is less than 3, pickle will try to +map the new Python 3.x names to the old module names used in Python 2.x, +so that the pickle data stream is readable with Python 2.x. +[clinic]*/ + +PyDoc_STRVAR(_pickle_dumps__doc__, +"dumps(obj, protocol=None, *, fix_imports=True)\n" +"Return the pickled representation of the object as a bytes object.\n" "\n" "The optional protocol argument tells the pickler to use the given protocol;\n" "supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n" @@ -6166,35 +7169,44 @@ PyDoc_STRVAR(pickle_dumps_doc, "\n" "If fix_imports is True and *protocol* is less than 3, pickle will try to\n" "map the new Python 3.x names to the old module names used in Python 2.x,\n" -"so that the pickle data stream is readable with Python 2.x.\n"); +"so that the pickle data stream is readable with Python 2.x."); + +#define _PICKLE_DUMPS_METHODDEF \ + {"dumps", (PyCFunction)_pickle_dumps, METH_VARARGS|METH_KEYWORDS, _pickle_dumps__doc__}, + +static PyObject * +_pickle_dumps_impl(PyModuleDef *module, PyObject *obj, PyObject *protocol, int fix_imports); static PyObject * -pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds) +_pickle_dumps(PyModuleDef *module, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = {"obj", "protocol", "fix_imports", 0}; + PyObject *return_value = NULL; + static char *_keywords[] = {"obj", "protocol", "fix_imports", NULL}; PyObject *obj; - PyObject *proto = NULL; - PyObject *result; - PyObject *fix_imports = Py_True; - PicklerObject *pickler; + PyObject *protocol = NULL; + int fix_imports = 1; - /* fix_imports is a keyword-only argument. */ - if (Py_SIZE(args) > 2) { - PyErr_Format(PyExc_TypeError, - "pickle.dumps() takes at most 2 positional " - "argument (%zd given)", Py_SIZE(args)); - return NULL; - } + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "O|O$p:dumps", _keywords, + &obj, &protocol, &fix_imports)) + goto exit; + return_value = _pickle_dumps_impl(module, obj, protocol, fix_imports); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist, - &obj, &proto, &fix_imports)) - return NULL; +exit: + return return_value; +} + +static PyObject * +_pickle_dumps_impl(PyModuleDef *module, PyObject *obj, PyObject *protocol, int fix_imports) +/*[clinic checksum: df6262c4c487f537f47aec8a1709318204c1e174]*/ +{ + PyObject *result; + PicklerObject *pickler = _Pickler_New(); - pickler = _Pickler_New(); if (pickler == NULL) return NULL; - if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0) + if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0) goto error; if (dump(pickler, obj) < 0) @@ -6209,15 +7221,46 @@ pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } -PyDoc_STRVAR(pickle_load_doc, -"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n" +/*[clinic] + +_pickle.load + + file: object + * + fix_imports: bool = True + encoding: str = 'ASCII' + errors: str = 'strict' + +Return a reconstituted object from the pickle data stored in a file. + +This is equivalent to ``Unpickler(file).load()``, but may be more efficient. + +The protocol version of the pickle is detected automatically, so no protocol +argument is needed. Bytes past the pickled object's representation are +ignored. + +The argument file must have two methods, a read() method that takes an +integer argument, and a readline() method that requires no arguments. Both +methods should return bytes. Thus *file* can be a binary file object opened +for reading, a BytesIO object, or any other custom object that meets this +interface. + +Optional keyword arguments are fix_imports, encoding and errors, +which are used to control compatiblity support for pickle stream generated +by Python 2.x. If fix_imports is True, pickle will try to map the old +Python 2.x names to the new names used in Python 3.x. The encoding and +errors tell pickle how to decode 8-bit string instances pickled by Python +2.x; these default to 'ASCII' and 'strict', respectively. +[clinic]*/ + +PyDoc_STRVAR(_pickle_load__doc__, +"load(file, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\')\n" +"Return a reconstituted object from the pickle data stored in a file.\n" "\n" -"Read a pickled object representation from the open file object file and\n" -"return the reconstituted object hierarchy specified therein. This is\n" -"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n" +"This is equivalent to ``Unpickler(file).load()``, but may be more efficient.\n" "\n" "The protocol version of the pickle is detected automatically, so no protocol\n" -"argument is needed. Bytes past the pickled object's representation are\n" +"argument is needed. Bytes past the pickled object\'s representation are\n" "ignored.\n" "\n" "The argument file must have two methods, a read() method that takes an\n" @@ -6231,32 +7274,41 @@ PyDoc_STRVAR(pickle_load_doc, "by Python 2.x. If fix_imports is True, pickle will try to map the old\n" "Python 2.x names to the new names used in Python 3.x. The encoding and\n" "errors tell pickle how to decode 8-bit string instances pickled by Python\n" -"2.x; these default to 'ASCII' and 'strict', respectively.\n"); +"2.x; these default to \'ASCII\' and \'strict\', respectively."); + +#define _PICKLE_LOAD_METHODDEF \ + {"load", (PyCFunction)_pickle_load, METH_VARARGS|METH_KEYWORDS, _pickle_load__doc__}, static PyObject * -pickle_load(PyObject *self, PyObject *args, PyObject *kwds) +_pickle_load_impl(PyModuleDef *module, PyObject *file, int fix_imports, const char *encoding, const char *errors); + +static PyObject * +_pickle_load(PyModuleDef *module, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0}; + PyObject *return_value = NULL; + static char *_keywords[] = {"file", "fix_imports", "encoding", "errors", NULL}; PyObject *file; - PyObject *fix_imports = Py_True; - PyObject *result; - char *encoding = NULL; - char *errors = NULL; - UnpicklerObject *unpickler; + int fix_imports = 1; + const char *encoding = "ASCII"; + const char *errors = "strict"; - /* fix_imports, encoding and errors are a keyword-only argument. */ - if (Py_SIZE(args) != 1) { - PyErr_Format(PyExc_TypeError, - "pickle.load() takes exactly one positional " - "argument (%zd given)", Py_SIZE(args)); - return NULL; - } + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "O|$pss:load", _keywords, + &file, &fix_imports, &encoding, &errors)) + goto exit; + return_value = _pickle_load_impl(module, file, fix_imports, encoding, errors); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist, - &file, &fix_imports, &encoding, &errors)) - return NULL; +exit: + return return_value; +} + +static PyObject * +_pickle_load_impl(PyModuleDef *module, PyObject *file, int fix_imports, const char *encoding, const char *errors) +/*[clinic checksum: e10796f6765b22ce48dca6940f11b3933853ca35]*/ +{ + PyObject *result; + UnpicklerObject *unpickler = _Unpickler_New(); - unpickler = _Unpickler_New(); if (unpickler == NULL) return NULL; @@ -6266,9 +7318,7 @@ pickle_load(PyObject *self, PyObject *args, PyObject *kwds) if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0) goto error; - unpickler->fix_imports = PyObject_IsTrue(fix_imports); - if (unpickler->fix_imports == -1) - goto error; + unpickler->fix_imports = fix_imports; result = load(unpickler); Py_DECREF(unpickler); @@ -6279,14 +7329,36 @@ pickle_load(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } -PyDoc_STRVAR(pickle_loads_doc, -"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n" -"\n" -"Read a pickled object hierarchy from a bytes object and return the\n" -"reconstituted object hierarchy specified therein\n" +/*[clinic] + +_pickle.loads + + data: object + * + fix_imports: bool = True + encoding: str = 'ASCII' + errors: str = 'strict' + +Return a reconstituted object from the given pickle data. + +The protocol version of the pickle is detected automatically, so no protocol +argument is needed. Bytes past the pickled object's representation are +ignored. + +Optional keyword arguments are fix_imports, encoding and errors, which +are used to control compatiblity support for pickle stream generated +by Python 2.x. If fix_imports is True, pickle will try to map the old +Python 2.x names to the new names used in Python 3.x. The encoding and +errors tell pickle how to decode 8-bit string instances pickled by Python +2.x; these default to 'ASCII' and 'strict', respectively. +[clinic]*/ + +PyDoc_STRVAR(_pickle_loads__doc__, +"loads(data, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\')\n" +"Return a reconstituted object from the given pickle data.\n" "\n" "The protocol version of the pickle is detected automatically, so no protocol\n" -"argument is needed. Bytes past the pickled object's representation are\n" +"argument is needed. Bytes past the pickled object\'s representation are\n" "ignored.\n" "\n" "Optional keyword arguments are fix_imports, encoding and errors, which\n" @@ -6294,44 +7366,51 @@ PyDoc_STRVAR(pickle_loads_doc, "by Python 2.x. If fix_imports is True, pickle will try to map the old\n" "Python 2.x names to the new names used in Python 3.x. The encoding and\n" "errors tell pickle how to decode 8-bit string instances pickled by Python\n" -"2.x; these default to 'ASCII' and 'strict', respectively.\n"); +"2.x; these default to \'ASCII\' and \'strict\', respectively."); + +#define _PICKLE_LOADS_METHODDEF \ + {"loads", (PyCFunction)_pickle_loads, METH_VARARGS|METH_KEYWORDS, _pickle_loads__doc__}, + +static PyObject * +_pickle_loads_impl(PyModuleDef *module, PyObject *data, int fix_imports, const char *encoding, const char *errors); static PyObject * -pickle_loads(PyObject *self, PyObject *args, PyObject *kwds) +_pickle_loads(PyModuleDef *module, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0}; - PyObject *input; - PyObject *fix_imports = Py_True; - PyObject *result; - char *encoding = NULL; - char *errors = NULL; - UnpicklerObject *unpickler; + PyObject *return_value = NULL; + static char *_keywords[] = {"data", "fix_imports", "encoding", "errors", NULL}; + PyObject *data; + int fix_imports = 1; + const char *encoding = "ASCII"; + const char *errors = "strict"; - /* fix_imports, encoding and errors are a keyword-only argument. */ - if (Py_SIZE(args) != 1) { - PyErr_Format(PyExc_TypeError, - "pickle.loads() takes exactly one positional " - "argument (%zd given)", Py_SIZE(args)); - return NULL; - } + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "O|$pss:loads", _keywords, + &data, &fix_imports, &encoding, &errors)) + goto exit; + return_value = _pickle_loads_impl(module, data, fix_imports, encoding, errors); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist, - &input, &fix_imports, &encoding, &errors)) - return NULL; +exit: + return return_value; +} + +static PyObject * +_pickle_loads_impl(PyModuleDef *module, PyObject *data, int fix_imports, const char *encoding, const char *errors) +/*[clinic checksum: 29ee725efcbf51a3533c19cb8261a8e267b7080a]*/ +{ + PyObject *result; + UnpicklerObject *unpickler = _Unpickler_New(); - unpickler = _Unpickler_New(); if (unpickler == NULL) return NULL; - if (_Unpickler_SetStringInput(unpickler, input) < 0) + if (_Unpickler_SetStringInput(unpickler, data) < 0) goto error; if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0) goto error; - unpickler->fix_imports = PyObject_IsTrue(fix_imports); - if (unpickler->fix_imports == -1) - goto error; + unpickler->fix_imports = fix_imports; result = load(unpickler); Py_DECREF(unpickler); @@ -6342,139 +7421,63 @@ pickle_loads(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - static struct PyMethodDef pickle_methods[] = { - {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS, - pickle_dump_doc}, - {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS, - pickle_dumps_doc}, - {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS, - pickle_load_doc}, - {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS, - pickle_loads_doc}, + _PICKLE_DUMP_METHODDEF + _PICKLE_DUMPS_METHODDEF + _PICKLE_LOAD_METHODDEF + _PICKLE_LOADS_METHODDEF {NULL, NULL} /* sentinel */ }; static int -initmodule(void) +pickle_clear(PyObject *m) { - PyObject *copyreg = NULL; - PyObject *compat_pickle = NULL; - - /* XXX: We should ensure that the types of the dictionaries imported are - exactly PyDict objects. Otherwise, it is possible to crash the pickle - since we use the PyDict API directly to access these dictionaries. */ - - copyreg = PyImport_ImportModule("copyreg"); - if (!copyreg) - goto error; - dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table"); - if (!dispatch_table) - goto error; - extension_registry = \ - PyObject_GetAttrString(copyreg, "_extension_registry"); - if (!extension_registry) - goto error; - inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry"); - if (!inverted_registry) - goto error; - extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache"); - if (!extension_cache) - goto error; - Py_CLEAR(copyreg); - - /* Load the 2.x -> 3.x stdlib module mapping tables */ - compat_pickle = PyImport_ImportModule("_compat_pickle"); - if (!compat_pickle) - goto error; - name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING"); - if (!name_mapping_2to3) - goto error; - if (!PyDict_CheckExact(name_mapping_2to3)) { - PyErr_Format(PyExc_RuntimeError, - "_compat_pickle.NAME_MAPPING should be a dict, not %.200s", - Py_TYPE(name_mapping_2to3)->tp_name); - goto error; - } - import_mapping_2to3 = PyObject_GetAttrString(compat_pickle, - "IMPORT_MAPPING"); - if (!import_mapping_2to3) - goto error; - if (!PyDict_CheckExact(import_mapping_2to3)) { - PyErr_Format(PyExc_RuntimeError, - "_compat_pickle.IMPORT_MAPPING should be a dict, " - "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name); - goto error; - } - /* ... and the 3.x -> 2.x mapping tables */ - name_mapping_3to2 = PyObject_GetAttrString(compat_pickle, - "REVERSE_NAME_MAPPING"); - if (!name_mapping_3to2) - goto error; - if (!PyDict_CheckExact(name_mapping_3to2)) { - PyErr_Format(PyExc_RuntimeError, - "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, " - "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name); - goto error; - } - import_mapping_3to2 = PyObject_GetAttrString(compat_pickle, - "REVERSE_IMPORT_MAPPING"); - if (!import_mapping_3to2) - goto error; - if (!PyDict_CheckExact(import_mapping_3to2)) { - PyErr_Format(PyExc_RuntimeError, - "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, " - "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name); - goto error; - } - Py_CLEAR(compat_pickle); - - empty_tuple = PyTuple_New(0); - if (empty_tuple == NULL) - goto error; - two_tuple = PyTuple_New(2); - if (two_tuple == NULL) - goto error; - /* We use this temp container with no regard to refcounts, or to - * keeping containees alive. Exempt from GC, because we don't - * want anything looking at two_tuple() by magic. - */ - PyObject_GC_UnTrack(two_tuple); - + _Pickle_ClearState(_Pickle_GetState(m)); return 0; +} - error: - Py_CLEAR(copyreg); - Py_CLEAR(dispatch_table); - Py_CLEAR(extension_registry); - Py_CLEAR(inverted_registry); - Py_CLEAR(extension_cache); - Py_CLEAR(compat_pickle); - Py_CLEAR(name_mapping_2to3); - Py_CLEAR(import_mapping_2to3); - Py_CLEAR(name_mapping_3to2); - Py_CLEAR(import_mapping_3to2); - Py_CLEAR(empty_tuple); - Py_CLEAR(two_tuple); - return -1; +static int +pickle_traverse(PyObject *m, visitproc visit, void *arg) +{ + PickleState *st = _Pickle_GetState(m); + Py_VISIT(st->PickleError); + Py_VISIT(st->PicklingError); + Py_VISIT(st->UnpicklingError); + Py_VISIT(st->dispatch_table); + Py_VISIT(st->extension_registry); + Py_VISIT(st->extension_cache); + Py_VISIT(st->inverted_registry); + Py_VISIT(st->name_mapping_2to3); + Py_VISIT(st->import_mapping_2to3); + Py_VISIT(st->name_mapping_3to2); + Py_VISIT(st->import_mapping_3to2); + Py_VISIT(st->codecs_encode); + return 0; } static struct PyModuleDef _picklemodule = { PyModuleDef_HEAD_INIT, - "_pickle", - pickle_module_doc, - -1, - pickle_methods, - NULL, - NULL, - NULL, - NULL + "_pickle", /* m_name */ + pickle_module_doc, /* m_doc */ + sizeof(PickleState), /* m_size */ + pickle_methods, /* m_methods */ + NULL, /* m_reload */ + pickle_traverse, /* m_traverse */ + pickle_clear, /* m_clear */ + NULL /* m_free */ }; PyMODINIT_FUNC PyInit__pickle(void) { PyObject *m; + PickleState *st; + + m = PyState_FindModule(&_picklemodule); + if (m) { + Py_INCREF(m); + return m; + } if (PyType_Ready(&Unpickler_Type) < 0) return NULL; @@ -6499,27 +7502,32 @@ PyInit__pickle(void) if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0) return NULL; + st = _Pickle_GetState(m); + /* Initialize the exceptions. */ - PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL); - if (PickleError == NULL) + st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL); + if (st->PickleError == NULL) return NULL; - PicklingError = \ - PyErr_NewException("_pickle.PicklingError", PickleError, NULL); - if (PicklingError == NULL) + st->PicklingError = \ + PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL); + if (st->PicklingError == NULL) return NULL; - UnpicklingError = \ - PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL); - if (UnpicklingError == NULL) + st->UnpicklingError = \ + PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL); + if (st->UnpicklingError == NULL) return NULL; - if (PyModule_AddObject(m, "PickleError", PickleError) < 0) + Py_INCREF(st->PickleError); + if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0) return NULL; - if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0) + Py_INCREF(st->PicklingError); + if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0) return NULL; - if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0) + Py_INCREF(st->UnpicklingError); + if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0) return NULL; - if (initmodule() < 0) + if (_Pickle_InitState(st) < 0) return NULL; return m; |