diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2015-12-06 20:01:35 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2015-12-06 20:01:35 (GMT) |
commit | 59fb6342a4ffdc23e1269a418734f4fc0f984873 (patch) | |
tree | 1be1e6d729c7b14769a44e80db57de2b58c2fa49 | |
parent | c68e723e6f8bd9923d23a4f14b66504b192aba74 (diff) | |
download | cpython-59fb6342a4ffdc23e1269a418734f4fc0f984873.zip cpython-59fb6342a4ffdc23e1269a418734f4fc0f984873.tar.gz cpython-59fb6342a4ffdc23e1269a418734f4fc0f984873.tar.bz2 |
Issue #25761: Improved detecting errors in broken pickle data.
-rw-r--r-- | Lib/pickle.py | 88 | ||||
-rw-r--r-- | Lib/test/pickletester.py | 17 | ||||
-rw-r--r-- | Lib/test/test_pickle.py | 5 | ||||
-rw-r--r-- | Misc/NEWS | 2 | ||||
-rw-r--r-- | Modules/_pickle.c | 111 |
5 files changed, 113 insertions, 110 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py index 53978fb..a60b1b7 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -1031,7 +1031,7 @@ class _Unpickler: self._unframer = _Unframer(self._file_read, self._file_readline) self.read = self._unframer.read self.readline = self._unframer.readline - self.mark = object() # any new unique object + self.metastack = [] self.stack = [] self.append = self.stack.append self.proto = 0 @@ -1047,20 +1047,12 @@ class _Unpickler: except _Stop as stopinst: return stopinst.value - # Return largest index k such that self.stack[k] is self.mark. - # If the stack doesn't contain a mark, eventually raises IndexError. - # This could be sped by maintaining another stack, of indices at which - # the mark appears. For that matter, the latter stack would suffice, - # and we wouldn't need to push mark objects on self.stack at all. - # Doing so is probably a good thing, though, since if the pickle is - # corrupt (or hostile) we may get a clue from finding self.mark embedded - # in unpickled objects. - def marker(self): - stack = self.stack - mark = self.mark - k = len(stack)-1 - while stack[k] is not mark: k = k-1 - return k + # Return a list of items pushed in the stack after last MARK instruction. + def pop_mark(self): + items = self.stack + self.stack = self.metastack.pop() + self.append = self.stack.append + return items def persistent_load(self, pid): raise UnpicklingError("unsupported persistent id encountered") @@ -1237,8 +1229,8 @@ class _Unpickler: dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode def load_tuple(self): - k = self.marker() - self.stack[k:] = [tuple(self.stack[k+1:])] + items = self.pop_mark() + self.append(tuple(items)) dispatch[TUPLE[0]] = load_tuple def load_empty_tuple(self): @@ -1270,21 +1262,20 @@ class _Unpickler: dispatch[EMPTY_SET[0]] = load_empty_set def load_frozenset(self): - k = self.marker() - self.stack[k:] = [frozenset(self.stack[k+1:])] + items = self.pop_mark() + self.append(frozenset(items)) dispatch[FROZENSET[0]] = load_frozenset def load_list(self): - k = self.marker() - self.stack[k:] = [self.stack[k+1:]] + items = self.pop_mark() + self.append(items) dispatch[LIST[0]] = load_list def load_dict(self): - k = self.marker() - items = self.stack[k+1:] + items = self.pop_mark() d = {items[i]: items[i+1] for i in range(0, len(items), 2)} - self.stack[k:] = [d] + self.append(d) dispatch[DICT[0]] = load_dict # INST and OBJ differ only in how they get a class object. It's not @@ -1292,9 +1283,7 @@ class _Unpickler: # previously diverged and grew different bugs. # klass is the class to instantiate, and k points to the topmost mark # object, following which are the arguments for klass.__init__. - def _instantiate(self, klass, k): - args = tuple(self.stack[k+1:]) - del self.stack[k:] + def _instantiate(self, klass, args): if (args or not isinstance(klass, type) or hasattr(klass, "__getinitargs__")): try: @@ -1310,14 +1299,14 @@ class _Unpickler: module = self.readline()[:-1].decode("ascii") name = self.readline()[:-1].decode("ascii") klass = self.find_class(module, name) - self._instantiate(klass, self.marker()) + self._instantiate(klass, self.pop_mark()) dispatch[INST[0]] = load_inst def load_obj(self): # Stack is ... markobject classobject arg1 arg2 ... - k = self.marker() - klass = self.stack.pop(k+1) - self._instantiate(klass, k) + args = self.pop_mark() + cls = args.pop(0) + self._instantiate(cls, args) dispatch[OBJ[0]] = load_obj def load_newobj(self): @@ -1402,12 +1391,14 @@ class _Unpickler: dispatch[REDUCE[0]] = load_reduce def load_pop(self): - del self.stack[-1] + if self.stack: + del self.stack[-1] + else: + self.pop_mark() dispatch[POP[0]] = load_pop def load_pop_mark(self): - k = self.marker() - del self.stack[k:] + self.pop_mark() dispatch[POP_MARK[0]] = load_pop_mark def load_dup(self): @@ -1463,17 +1454,14 @@ class _Unpickler: dispatch[APPEND[0]] = load_append def load_appends(self): - stack = self.stack - mark = self.marker() - list_obj = stack[mark - 1] - items = stack[mark + 1:] + items = self.pop_mark() + list_obj = self.stack[-1] if isinstance(list_obj, list): list_obj.extend(items) else: append = list_obj.append for item in items: append(item) - del stack[mark:] dispatch[APPENDS[0]] = load_appends def load_setitem(self): @@ -1485,27 +1473,21 @@ class _Unpickler: dispatch[SETITEM[0]] = load_setitem def load_setitems(self): - stack = self.stack - mark = self.marker() - dict = stack[mark - 1] - for i in range(mark + 1, len(stack), 2): - dict[stack[i]] = stack[i + 1] - - del stack[mark:] + items = self.pop_mark() + dict = self.stack[-1] + for i in range(0, len(items), 2): + dict[items[i]] = items[i + 1] dispatch[SETITEMS[0]] = load_setitems def load_additems(self): - stack = self.stack - mark = self.marker() - set_obj = stack[mark - 1] - items = stack[mark + 1:] + items = self.pop_mark() + set_obj = self.stack[-1] if isinstance(set_obj, set): set_obj.update(items) else: add = set_obj.add for item in items: add(item) - del stack[mark:] dispatch[ADDITEMS[0]] = load_additems def load_build(self): @@ -1533,7 +1515,9 @@ class _Unpickler: dispatch[BUILD[0]] = load_build def load_mark(self): - self.append(self.mark) + self.metastack.append(self.stack) + self.stack = [] + self.append = self.stack.append dispatch[MARK[0]] = load_mark def load_stop(self): diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 608c35a..217aa3d 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1000,7 +1000,7 @@ class AbstractUnpickleTests(unittest.TestCase): b'0', # POP b'1', # POP_MARK b'2', # DUP - # b'(2', # PyUnpickler doesn't raise + b'(2', b'R', # REDUCE b')R', b'a', # APPEND @@ -1009,7 +1009,7 @@ class AbstractUnpickleTests(unittest.TestCase): b'Nb', b'd', # DICT b'e', # APPENDS - # b'(e', # PyUnpickler raises AttributeError + b'(e', b'ibuiltins\nlist\n', # INST b'l', # LIST b'o', # OBJ @@ -1022,7 +1022,7 @@ class AbstractUnpickleTests(unittest.TestCase): b'NNs', b't', # TUPLE b'u', # SETITEMS - # b'(u', # PyUnpickler doesn't raise + b'(u', b'}(Nu', b'\x81', # NEWOBJ b')\x81', @@ -1033,7 +1033,7 @@ class AbstractUnpickleTests(unittest.TestCase): b'N\x87', b'NN\x87', b'\x90', # ADDITEMS - # b'(\x90', # PyUnpickler raises AttributeError + b'(\x90', b'\x91', # FROZENSET b'\x92', # NEWOBJ_EX b')}\x92', @@ -1046,7 +1046,7 @@ class AbstractUnpickleTests(unittest.TestCase): def test_bad_mark(self): badpickles = [ - # b'N(.', # STOP + b'N(.', # STOP b'N(2', # DUP b'cbuiltins\nlist\n)(R', # REDUCE b'cbuiltins\nlist\n()R', @@ -1081,7 +1081,7 @@ class AbstractUnpickleTests(unittest.TestCase): b'N(\x94', # MEMOIZE ] for p in badpickles: - self.check_unpickling_error(self.bad_mark_errors, p) + self.check_unpickling_error(self.bad_stack_errors, p) def test_truncated_data(self): self.check_unpickling_error(EOFError, b'') @@ -2581,11 +2581,6 @@ class AbstractPickleModuleTests(unittest.TestCase): self.assertRaises(pickle.PicklingError, BadPickler().dump, 0) self.assertRaises(pickle.UnpicklingError, BadUnpickler().load) - def test_bad_input(self): - # Test issue4298 - s = bytes([0x58, 0, 0, 0, 0x54]) - self.assertRaises(EOFError, pickle.loads, s) - class AbstractPersistentPicklerTests(unittest.TestCase): diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index bd38cfb..6b97315 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -33,8 +33,6 @@ class PyUnpicklerTests(AbstractUnpickleTests): unpickler = pickle._Unpickler bad_stack_errors = (IndexError,) - bad_mark_errors = (IndexError, pickle.UnpicklingError, - TypeError, AttributeError, EOFError) truncated_errors = (pickle.UnpicklingError, EOFError, AttributeError, ValueError, struct.error, IndexError, ImportError) @@ -69,8 +67,6 @@ class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests, pickler = pickle._Pickler unpickler = pickle._Unpickler bad_stack_errors = (pickle.UnpicklingError, IndexError) - bad_mark_errors = (pickle.UnpicklingError, IndexError, - TypeError, AttributeError, EOFError) truncated_errors = (pickle.UnpicklingError, EOFError, AttributeError, ValueError, struct.error, IndexError, ImportError) @@ -132,7 +128,6 @@ if has_c_implementation: class CUnpicklerTests(PyUnpicklerTests): unpickler = _pickle.Unpickler bad_stack_errors = (pickle.UnpicklingError,) - bad_mark_errors = (EOFError,) truncated_errors = (pickle.UnpicklingError, EOFError, AttributeError, ValueError) @@ -109,6 +109,8 @@ Core and Builtins Library ------- +- Issue #25761: Improved detecting errors in broken pickle data. + - Issue #25717: Restore the previous behaviour of tolerating most fstat() errors when opening files. This was a regression in 3.5a1, and stopped anonymous temporary files from working in special cases. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 3ddf6a0..38598c5 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -370,18 +370,12 @@ _Pickle_FastCall(PyObject *func, PyObject *obj) /*************************************************************************/ -static int -stack_underflow(void) -{ - PickleState *st = _Pickle_GetGlobalState(); - PyErr_SetString(st->UnpicklingError, "unpickling stack underflow"); - return -1; -} - /* Internal data type used as the unpickling stack. */ typedef struct { PyObject_VAR_HEAD PyObject **data; + int mark_set; /* is MARK set? */ + Py_ssize_t fence; /* position of top MARK or 0 */ Py_ssize_t allocated; /* number of slots in data allocated */ } Pdata; @@ -412,6 +406,8 @@ Pdata_New(void) if (!(self = PyObject_New(Pdata, &Pdata_Type))) return NULL; Py_SIZE(self) = 0; + self->mark_set = 0; + self->fence = 0; self->allocated = 8; self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *)); if (self->data) @@ -429,8 +425,7 @@ Pdata_clear(Pdata *self, Py_ssize_t clearto) { Py_ssize_t i = Py_SIZE(self); - if (clearto < 0) - return stack_underflow(); + assert(clearto >= self->fence); if (clearto >= i) return 0; @@ -466,6 +461,17 @@ Pdata_grow(Pdata *self) return -1; } +static int +Pdata_stack_underflow(Pdata *self) +{ + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->UnpicklingError, + self->mark_set ? + "unexpected MARK found" : + "unpickling stack underflow"); + return -1; +} + /* D is a Pdata*. Pop the topmost element and store it into V, which * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError * is raised and V is set to NULL. @@ -473,9 +479,8 @@ Pdata_grow(Pdata *self) static PyObject * Pdata_pop(Pdata *self) { - if (Py_SIZE(self) == 0) { - PickleState *st = _Pickle_GetGlobalState(); - PyErr_SetString(st->UnpicklingError, "bad pickle data"); + if (Py_SIZE(self) <= self->fence) { + Pdata_stack_underflow(self); return NULL; } return self->data[--Py_SIZE(self)]; @@ -507,6 +512,10 @@ Pdata_poptuple(Pdata *self, Py_ssize_t start) PyObject *tuple; Py_ssize_t len, i, j; + if (start < self->fence) { + Pdata_stack_underflow(self); + return NULL; + } len = Py_SIZE(self) - start; tuple = PyTuple_New(len); if (tuple == NULL) @@ -4585,13 +4594,19 @@ find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name) static Py_ssize_t marker(UnpicklerObject *self) { - PickleState *st = _Pickle_GetGlobalState(); + Py_ssize_t mark; + if (self->num_marks < 1) { + PickleState *st = _Pickle_GetGlobalState(); PyErr_SetString(st->UnpicklingError, "could not find MARK"); return -1; } - return self->marks[--self->num_marks]; + mark = self->marks[--self->num_marks]; + self->stack->mark_set = self->num_marks != 0; + self->stack->fence = self->num_marks ? + self->marks[self->num_marks - 1] : 0; + return mark; } static int @@ -5052,7 +5067,7 @@ load_counted_tuple(UnpicklerObject *self, int len) PyObject *tuple; if (Py_SIZE(self->stack) < len) - return stack_underflow(); + return Pdata_stack_underflow(self->stack); tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len); if (tuple == NULL) @@ -5134,6 +5149,12 @@ load_dict(UnpicklerObject *self) if ((dict = PyDict_New()) == NULL) return -1; + if ((j - i) % 2 != 0) { + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->UnpicklingError, "odd number of items for DICT"); + return -1; + } + for (k = i + 1; k < j; k += 2) { key = self->stack->data[k - 1]; value = self->stack->data[k]; @@ -5201,7 +5222,7 @@ load_obj(UnpicklerObject *self) return -1; if (Py_SIZE(self->stack) - i < 1) - return stack_underflow(); + return Pdata_stack_underflow(self->stack); args = Pdata_poptuple(self->stack, i + 1); if (args == NULL) @@ -5518,12 +5539,15 @@ load_pop(UnpicklerObject *self) */ if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) { self->num_marks--; - } else if (len > 0) { + self->stack->mark_set = self->num_marks != 0; + self->stack->fence = self->num_marks ? + self->marks[self->num_marks - 1] : 0; + } else if (len <= self->stack->fence) + return Pdata_stack_underflow(self->stack); + else { len--; Py_DECREF(self->stack->data[len]); Py_SIZE(self->stack) = len; - } else { - return stack_underflow(); } return 0; } @@ -5545,10 +5569,10 @@ static int load_dup(UnpicklerObject *self) { PyObject *last; - Py_ssize_t len; + Py_ssize_t len = Py_SIZE(self->stack); - if ((len = Py_SIZE(self->stack)) <= 0) - return stack_underflow(); + if (len <= self->stack->fence) + return Pdata_stack_underflow(self->stack); last = self->stack->data[len - 1]; PDATA_APPEND(self->stack, last, -1); return 0; @@ -5731,8 +5755,8 @@ load_put(UnpicklerObject *self) return -1; if (len < 2) return bad_readline(); - if (Py_SIZE(self->stack) <= 0) - return stack_underflow(); + if (Py_SIZE(self->stack) <= self->stack->fence) + return Pdata_stack_underflow(self->stack); value = self->stack->data[Py_SIZE(self->stack) - 1]; key = PyLong_FromString(s, NULL, 10); @@ -5760,8 +5784,8 @@ load_binput(UnpicklerObject *self) if (_Unpickler_Read(self, &s, 1) < 0) return -1; - if (Py_SIZE(self->stack) <= 0) - return stack_underflow(); + if (Py_SIZE(self->stack) <= self->stack->fence) + return Pdata_stack_underflow(self->stack); value = self->stack->data[Py_SIZE(self->stack) - 1]; idx = Py_CHARMASK(s[0]); @@ -5779,8 +5803,8 @@ load_long_binput(UnpicklerObject *self) if (_Unpickler_Read(self, &s, 4) < 0) return -1; - if (Py_SIZE(self->stack) <= 0) - return stack_underflow(); + if (Py_SIZE(self->stack) <= self->stack->fence) + return Pdata_stack_underflow(self->stack); value = self->stack->data[Py_SIZE(self->stack) - 1]; idx = calc_binsize(s, 4); @@ -5798,8 +5822,8 @@ load_memoize(UnpicklerObject *self) { PyObject *value; - if (Py_SIZE(self->stack) <= 0) - return stack_underflow(); + if (Py_SIZE(self->stack) <= self->stack->fence) + return Pdata_stack_underflow(self->stack); value = self->stack->data[Py_SIZE(self->stack) - 1]; return _Unpickler_MemoPut(self, self->memo_len, value); @@ -5813,8 +5837,8 @@ do_append(UnpicklerObject *self, Py_ssize_t x) Py_ssize_t len, i; len = Py_SIZE(self->stack); - if (x > len || x <= 0) - return stack_underflow(); + if (x > len || x <= self->stack->fence) + return Pdata_stack_underflow(self->stack); if (len == x) /* nothing to do */ return 0; @@ -5863,8 +5887,8 @@ do_append(UnpicklerObject *self, Py_ssize_t x) static int load_append(UnpicklerObject *self) { - if (Py_SIZE(self->stack) - 1 <= 0) - return stack_underflow(); + if (Py_SIZE(self->stack) - 1 <= self->stack->fence) + return Pdata_stack_underflow(self->stack); return do_append(self, Py_SIZE(self->stack) - 1); } @@ -5886,8 +5910,8 @@ do_setitems(UnpicklerObject *self, Py_ssize_t x) int status = 0; len = Py_SIZE(self->stack); - if (x > len || x <= 0) - return stack_underflow(); + if (x > len || x <= self->stack->fence) + return Pdata_stack_underflow(self->stack); if (len == x) /* nothing to do */ return 0; if ((len - x) % 2 != 0) { @@ -5940,8 +5964,8 @@ load_additems(UnpicklerObject *self) if (mark < 0) return -1; len = Py_SIZE(self->stack); - if (mark > len || mark <= 0) - return stack_underflow(); + if (mark > len || mark <= self->stack->fence) + return Pdata_stack_underflow(self->stack); if (len == mark) /* nothing to do */ return 0; @@ -5996,8 +6020,8 @@ load_build(UnpicklerObject *self) /* Stack is ... instance, state. We want to leave instance at * the stack top, possibly mutated via instance.__setstate__(state). */ - if (Py_SIZE(self->stack) < 2) - return stack_underflow(); + if (Py_SIZE(self->stack) - 2 < self->stack->fence) + return Pdata_stack_underflow(self->stack); PDATA_POP(self->stack, state); if (state == NULL) @@ -6133,7 +6157,8 @@ load_mark(UnpicklerObject *self) self->marks_size = (Py_ssize_t)alloc; } - self->marks[self->num_marks++] = Py_SIZE(self->stack); + self->stack->mark_set = 1; + self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack); return 0; } @@ -6216,6 +6241,8 @@ load(UnpicklerObject *self) char *s = NULL; self->num_marks = 0; + self->stack->mark_set = 0; + self->stack->fence = 0; self->proto = 0; if (Py_SIZE(self->stack)) Pdata_clear(self->stack, 0); |