summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2015-12-06 20:01:35 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2015-12-06 20:01:35 (GMT)
commit59fb6342a4ffdc23e1269a418734f4fc0f984873 (patch)
tree1be1e6d729c7b14769a44e80db57de2b58c2fa49
parentc68e723e6f8bd9923d23a4f14b66504b192aba74 (diff)
downloadcpython-59fb6342a4ffdc23e1269a418734f4fc0f984873.zip
cpython-59fb6342a4ffdc23e1269a418734f4fc0f984873.tar.gz
cpython-59fb6342a4ffdc23e1269a418734f4fc0f984873.tar.bz2
Issue #25761: Improved detecting errors in broken pickle data.
-rw-r--r--Lib/pickle.py88
-rw-r--r--Lib/test/pickletester.py17
-rw-r--r--Lib/test/test_pickle.py5
-rw-r--r--Misc/NEWS2
-rw-r--r--Modules/_pickle.c111
5 files changed, 113 insertions, 110 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 53978fb..a60b1b7 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -1031,7 +1031,7 @@ class _Unpickler:
self._unframer = _Unframer(self._file_read, self._file_readline)
self.read = self._unframer.read
self.readline = self._unframer.readline
- self.mark = object() # any new unique object
+ self.metastack = []
self.stack = []
self.append = self.stack.append
self.proto = 0
@@ -1047,20 +1047,12 @@ class _Unpickler:
except _Stop as stopinst:
return stopinst.value
- # Return largest index k such that self.stack[k] is self.mark.
- # If the stack doesn't contain a mark, eventually raises IndexError.
- # This could be sped by maintaining another stack, of indices at which
- # the mark appears. For that matter, the latter stack would suffice,
- # and we wouldn't need to push mark objects on self.stack at all.
- # Doing so is probably a good thing, though, since if the pickle is
- # corrupt (or hostile) we may get a clue from finding self.mark embedded
- # in unpickled objects.
- def marker(self):
- stack = self.stack
- mark = self.mark
- k = len(stack)-1
- while stack[k] is not mark: k = k-1
- return k
+ # Return a list of items pushed in the stack after last MARK instruction.
+ def pop_mark(self):
+ items = self.stack
+ self.stack = self.metastack.pop()
+ self.append = self.stack.append
+ return items
def persistent_load(self, pid):
raise UnpicklingError("unsupported persistent id encountered")
@@ -1237,8 +1229,8 @@ class _Unpickler:
dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
def load_tuple(self):
- k = self.marker()
- self.stack[k:] = [tuple(self.stack[k+1:])]
+ items = self.pop_mark()
+ self.append(tuple(items))
dispatch[TUPLE[0]] = load_tuple
def load_empty_tuple(self):
@@ -1270,21 +1262,20 @@ class _Unpickler:
dispatch[EMPTY_SET[0]] = load_empty_set
def load_frozenset(self):
- k = self.marker()
- self.stack[k:] = [frozenset(self.stack[k+1:])]
+ items = self.pop_mark()
+ self.append(frozenset(items))
dispatch[FROZENSET[0]] = load_frozenset
def load_list(self):
- k = self.marker()
- self.stack[k:] = [self.stack[k+1:]]
+ items = self.pop_mark()
+ self.append(items)
dispatch[LIST[0]] = load_list
def load_dict(self):
- k = self.marker()
- items = self.stack[k+1:]
+ items = self.pop_mark()
d = {items[i]: items[i+1]
for i in range(0, len(items), 2)}
- self.stack[k:] = [d]
+ self.append(d)
dispatch[DICT[0]] = load_dict
# INST and OBJ differ only in how they get a class object. It's not
@@ -1292,9 +1283,7 @@ class _Unpickler:
# previously diverged and grew different bugs.
# klass is the class to instantiate, and k points to the topmost mark
# object, following which are the arguments for klass.__init__.
- def _instantiate(self, klass, k):
- args = tuple(self.stack[k+1:])
- del self.stack[k:]
+ def _instantiate(self, klass, args):
if (args or not isinstance(klass, type) or
hasattr(klass, "__getinitargs__")):
try:
@@ -1310,14 +1299,14 @@ class _Unpickler:
module = self.readline()[:-1].decode("ascii")
name = self.readline()[:-1].decode("ascii")
klass = self.find_class(module, name)
- self._instantiate(klass, self.marker())
+ self._instantiate(klass, self.pop_mark())
dispatch[INST[0]] = load_inst
def load_obj(self):
# Stack is ... markobject classobject arg1 arg2 ...
- k = self.marker()
- klass = self.stack.pop(k+1)
- self._instantiate(klass, k)
+ args = self.pop_mark()
+ cls = args.pop(0)
+ self._instantiate(cls, args)
dispatch[OBJ[0]] = load_obj
def load_newobj(self):
@@ -1402,12 +1391,14 @@ class _Unpickler:
dispatch[REDUCE[0]] = load_reduce
def load_pop(self):
- del self.stack[-1]
+ if self.stack:
+ del self.stack[-1]
+ else:
+ self.pop_mark()
dispatch[POP[0]] = load_pop
def load_pop_mark(self):
- k = self.marker()
- del self.stack[k:]
+ self.pop_mark()
dispatch[POP_MARK[0]] = load_pop_mark
def load_dup(self):
@@ -1463,17 +1454,14 @@ class _Unpickler:
dispatch[APPEND[0]] = load_append
def load_appends(self):
- stack = self.stack
- mark = self.marker()
- list_obj = stack[mark - 1]
- items = stack[mark + 1:]
+ items = self.pop_mark()
+ list_obj = self.stack[-1]
if isinstance(list_obj, list):
list_obj.extend(items)
else:
append = list_obj.append
for item in items:
append(item)
- del stack[mark:]
dispatch[APPENDS[0]] = load_appends
def load_setitem(self):
@@ -1485,27 +1473,21 @@ class _Unpickler:
dispatch[SETITEM[0]] = load_setitem
def load_setitems(self):
- stack = self.stack
- mark = self.marker()
- dict = stack[mark - 1]
- for i in range(mark + 1, len(stack), 2):
- dict[stack[i]] = stack[i + 1]
-
- del stack[mark:]
+ items = self.pop_mark()
+ dict = self.stack[-1]
+ for i in range(0, len(items), 2):
+ dict[items[i]] = items[i + 1]
dispatch[SETITEMS[0]] = load_setitems
def load_additems(self):
- stack = self.stack
- mark = self.marker()
- set_obj = stack[mark - 1]
- items = stack[mark + 1:]
+ items = self.pop_mark()
+ set_obj = self.stack[-1]
if isinstance(set_obj, set):
set_obj.update(items)
else:
add = set_obj.add
for item in items:
add(item)
- del stack[mark:]
dispatch[ADDITEMS[0]] = load_additems
def load_build(self):
@@ -1533,7 +1515,9 @@ class _Unpickler:
dispatch[BUILD[0]] = load_build
def load_mark(self):
- self.append(self.mark)
+ self.metastack.append(self.stack)
+ self.stack = []
+ self.append = self.stack.append
dispatch[MARK[0]] = load_mark
def load_stop(self):
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 608c35a..217aa3d 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -1000,7 +1000,7 @@ class AbstractUnpickleTests(unittest.TestCase):
b'0', # POP
b'1', # POP_MARK
b'2', # DUP
- # b'(2', # PyUnpickler doesn't raise
+ b'(2',
b'R', # REDUCE
b')R',
b'a', # APPEND
@@ -1009,7 +1009,7 @@ class AbstractUnpickleTests(unittest.TestCase):
b'Nb',
b'd', # DICT
b'e', # APPENDS
- # b'(e', # PyUnpickler raises AttributeError
+ b'(e',
b'ibuiltins\nlist\n', # INST
b'l', # LIST
b'o', # OBJ
@@ -1022,7 +1022,7 @@ class AbstractUnpickleTests(unittest.TestCase):
b'NNs',
b't', # TUPLE
b'u', # SETITEMS
- # b'(u', # PyUnpickler doesn't raise
+ b'(u',
b'}(Nu',
b'\x81', # NEWOBJ
b')\x81',
@@ -1033,7 +1033,7 @@ class AbstractUnpickleTests(unittest.TestCase):
b'N\x87',
b'NN\x87',
b'\x90', # ADDITEMS
- # b'(\x90', # PyUnpickler raises AttributeError
+ b'(\x90',
b'\x91', # FROZENSET
b'\x92', # NEWOBJ_EX
b')}\x92',
@@ -1046,7 +1046,7 @@ class AbstractUnpickleTests(unittest.TestCase):
def test_bad_mark(self):
badpickles = [
- # b'N(.', # STOP
+ b'N(.', # STOP
b'N(2', # DUP
b'cbuiltins\nlist\n)(R', # REDUCE
b'cbuiltins\nlist\n()R',
@@ -1081,7 +1081,7 @@ class AbstractUnpickleTests(unittest.TestCase):
b'N(\x94', # MEMOIZE
]
for p in badpickles:
- self.check_unpickling_error(self.bad_mark_errors, p)
+ self.check_unpickling_error(self.bad_stack_errors, p)
def test_truncated_data(self):
self.check_unpickling_error(EOFError, b'')
@@ -2581,11 +2581,6 @@ class AbstractPickleModuleTests(unittest.TestCase):
self.assertRaises(pickle.PicklingError, BadPickler().dump, 0)
self.assertRaises(pickle.UnpicklingError, BadUnpickler().load)
- def test_bad_input(self):
- # Test issue4298
- s = bytes([0x58, 0, 0, 0, 0x54])
- self.assertRaises(EOFError, pickle.loads, s)
-
class AbstractPersistentPicklerTests(unittest.TestCase):
diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py
index bd38cfb..6b97315 100644
--- a/Lib/test/test_pickle.py
+++ b/Lib/test/test_pickle.py
@@ -33,8 +33,6 @@ class PyUnpicklerTests(AbstractUnpickleTests):
unpickler = pickle._Unpickler
bad_stack_errors = (IndexError,)
- bad_mark_errors = (IndexError, pickle.UnpicklingError,
- TypeError, AttributeError, EOFError)
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
struct.error, IndexError, ImportError)
@@ -69,8 +67,6 @@ class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests,
pickler = pickle._Pickler
unpickler = pickle._Unpickler
bad_stack_errors = (pickle.UnpicklingError, IndexError)
- bad_mark_errors = (pickle.UnpicklingError, IndexError,
- TypeError, AttributeError, EOFError)
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
struct.error, IndexError, ImportError)
@@ -132,7 +128,6 @@ if has_c_implementation:
class CUnpicklerTests(PyUnpicklerTests):
unpickler = _pickle.Unpickler
bad_stack_errors = (pickle.UnpicklingError,)
- bad_mark_errors = (EOFError,)
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError)
diff --git a/Misc/NEWS b/Misc/NEWS
index 026a19a..578d2a3 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -109,6 +109,8 @@ Core and Builtins
Library
-------
+- Issue #25761: Improved detecting errors in broken pickle data.
+
- Issue #25717: Restore the previous behaviour of tolerating most fstat()
errors when opening files. This was a regression in 3.5a1, and stopped
anonymous temporary files from working in special cases.
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index 3ddf6a0..38598c5 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -370,18 +370,12 @@ _Pickle_FastCall(PyObject *func, PyObject *obj)
/*************************************************************************/
-static int
-stack_underflow(void)
-{
- PickleState *st = _Pickle_GetGlobalState();
- PyErr_SetString(st->UnpicklingError, "unpickling stack underflow");
- return -1;
-}
-
/* Internal data type used as the unpickling stack. */
typedef struct {
PyObject_VAR_HEAD
PyObject **data;
+ int mark_set; /* is MARK set? */
+ Py_ssize_t fence; /* position of top MARK or 0 */
Py_ssize_t allocated; /* number of slots in data allocated */
} Pdata;
@@ -412,6 +406,8 @@ Pdata_New(void)
if (!(self = PyObject_New(Pdata, &Pdata_Type)))
return NULL;
Py_SIZE(self) = 0;
+ self->mark_set = 0;
+ self->fence = 0;
self->allocated = 8;
self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
if (self->data)
@@ -429,8 +425,7 @@ Pdata_clear(Pdata *self, Py_ssize_t clearto)
{
Py_ssize_t i = Py_SIZE(self);
- if (clearto < 0)
- return stack_underflow();
+ assert(clearto >= self->fence);
if (clearto >= i)
return 0;
@@ -466,6 +461,17 @@ Pdata_grow(Pdata *self)
return -1;
}
+static int
+Pdata_stack_underflow(Pdata *self)
+{
+ PickleState *st = _Pickle_GetGlobalState();
+ PyErr_SetString(st->UnpicklingError,
+ self->mark_set ?
+ "unexpected MARK found" :
+ "unpickling stack underflow");
+ return -1;
+}
+
/* D is a Pdata*. Pop the topmost element and store it into V, which
* must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
* is raised and V is set to NULL.
@@ -473,9 +479,8 @@ Pdata_grow(Pdata *self)
static PyObject *
Pdata_pop(Pdata *self)
{
- if (Py_SIZE(self) == 0) {
- PickleState *st = _Pickle_GetGlobalState();
- PyErr_SetString(st->UnpicklingError, "bad pickle data");
+ if (Py_SIZE(self) <= self->fence) {
+ Pdata_stack_underflow(self);
return NULL;
}
return self->data[--Py_SIZE(self)];
@@ -507,6 +512,10 @@ Pdata_poptuple(Pdata *self, Py_ssize_t start)
PyObject *tuple;
Py_ssize_t len, i, j;
+ if (start < self->fence) {
+ Pdata_stack_underflow(self);
+ return NULL;
+ }
len = Py_SIZE(self) - start;
tuple = PyTuple_New(len);
if (tuple == NULL)
@@ -4585,13 +4594,19 @@ find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
static Py_ssize_t
marker(UnpicklerObject *self)
{
- PickleState *st = _Pickle_GetGlobalState();
+ Py_ssize_t mark;
+
if (self->num_marks < 1) {
+ PickleState *st = _Pickle_GetGlobalState();
PyErr_SetString(st->UnpicklingError, "could not find MARK");
return -1;
}
- return self->marks[--self->num_marks];
+ mark = self->marks[--self->num_marks];
+ self->stack->mark_set = self->num_marks != 0;
+ self->stack->fence = self->num_marks ?
+ self->marks[self->num_marks - 1] : 0;
+ return mark;
}
static int
@@ -5052,7 +5067,7 @@ load_counted_tuple(UnpicklerObject *self, int len)
PyObject *tuple;
if (Py_SIZE(self->stack) < len)
- return stack_underflow();
+ return Pdata_stack_underflow(self->stack);
tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
if (tuple == NULL)
@@ -5134,6 +5149,12 @@ load_dict(UnpicklerObject *self)
if ((dict = PyDict_New()) == NULL)
return -1;
+ if ((j - i) % 2 != 0) {
+ PickleState *st = _Pickle_GetGlobalState();
+ PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
+ return -1;
+ }
+
for (k = i + 1; k < j; k += 2) {
key = self->stack->data[k - 1];
value = self->stack->data[k];
@@ -5201,7 +5222,7 @@ load_obj(UnpicklerObject *self)
return -1;
if (Py_SIZE(self->stack) - i < 1)
- return stack_underflow();
+ return Pdata_stack_underflow(self->stack);
args = Pdata_poptuple(self->stack, i + 1);
if (args == NULL)
@@ -5518,12 +5539,15 @@ load_pop(UnpicklerObject *self)
*/
if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
self->num_marks--;
- } else if (len > 0) {
+ self->stack->mark_set = self->num_marks != 0;
+ self->stack->fence = self->num_marks ?
+ self->marks[self->num_marks - 1] : 0;
+ } else if (len <= self->stack->fence)
+ return Pdata_stack_underflow(self->stack);
+ else {
len--;
Py_DECREF(self->stack->data[len]);
Py_SIZE(self->stack) = len;
- } else {
- return stack_underflow();
}
return 0;
}
@@ -5545,10 +5569,10 @@ static int
load_dup(UnpicklerObject *self)
{
PyObject *last;
- Py_ssize_t len;
+ Py_ssize_t len = Py_SIZE(self->stack);
- if ((len = Py_SIZE(self->stack)) <= 0)
- return stack_underflow();
+ if (len <= self->stack->fence)
+ return Pdata_stack_underflow(self->stack);
last = self->stack->data[len - 1];
PDATA_APPEND(self->stack, last, -1);
return 0;
@@ -5731,8 +5755,8 @@ load_put(UnpicklerObject *self)
return -1;
if (len < 2)
return bad_readline();
- if (Py_SIZE(self->stack) <= 0)
- return stack_underflow();
+ if (Py_SIZE(self->stack) <= self->stack->fence)
+ return Pdata_stack_underflow(self->stack);
value = self->stack->data[Py_SIZE(self->stack) - 1];
key = PyLong_FromString(s, NULL, 10);
@@ -5760,8 +5784,8 @@ load_binput(UnpicklerObject *self)
if (_Unpickler_Read(self, &s, 1) < 0)
return -1;
- if (Py_SIZE(self->stack) <= 0)
- return stack_underflow();
+ if (Py_SIZE(self->stack) <= self->stack->fence)
+ return Pdata_stack_underflow(self->stack);
value = self->stack->data[Py_SIZE(self->stack) - 1];
idx = Py_CHARMASK(s[0]);
@@ -5779,8 +5803,8 @@ load_long_binput(UnpicklerObject *self)
if (_Unpickler_Read(self, &s, 4) < 0)
return -1;
- if (Py_SIZE(self->stack) <= 0)
- return stack_underflow();
+ if (Py_SIZE(self->stack) <= self->stack->fence)
+ return Pdata_stack_underflow(self->stack);
value = self->stack->data[Py_SIZE(self->stack) - 1];
idx = calc_binsize(s, 4);
@@ -5798,8 +5822,8 @@ load_memoize(UnpicklerObject *self)
{
PyObject *value;
- if (Py_SIZE(self->stack) <= 0)
- return stack_underflow();
+ if (Py_SIZE(self->stack) <= self->stack->fence)
+ return Pdata_stack_underflow(self->stack);
value = self->stack->data[Py_SIZE(self->stack) - 1];
return _Unpickler_MemoPut(self, self->memo_len, value);
@@ -5813,8 +5837,8 @@ do_append(UnpicklerObject *self, Py_ssize_t x)
Py_ssize_t len, i;
len = Py_SIZE(self->stack);
- if (x > len || x <= 0)
- return stack_underflow();
+ if (x > len || x <= self->stack->fence)
+ return Pdata_stack_underflow(self->stack);
if (len == x) /* nothing to do */
return 0;
@@ -5863,8 +5887,8 @@ do_append(UnpicklerObject *self, Py_ssize_t x)
static int
load_append(UnpicklerObject *self)
{
- if (Py_SIZE(self->stack) - 1 <= 0)
- return stack_underflow();
+ if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
+ return Pdata_stack_underflow(self->stack);
return do_append(self, Py_SIZE(self->stack) - 1);
}
@@ -5886,8 +5910,8 @@ do_setitems(UnpicklerObject *self, Py_ssize_t x)
int status = 0;
len = Py_SIZE(self->stack);
- if (x > len || x <= 0)
- return stack_underflow();
+ if (x > len || x <= self->stack->fence)
+ return Pdata_stack_underflow(self->stack);
if (len == x) /* nothing to do */
return 0;
if ((len - x) % 2 != 0) {
@@ -5940,8 +5964,8 @@ load_additems(UnpicklerObject *self)
if (mark < 0)
return -1;
len = Py_SIZE(self->stack);
- if (mark > len || mark <= 0)
- return stack_underflow();
+ if (mark > len || mark <= self->stack->fence)
+ return Pdata_stack_underflow(self->stack);
if (len == mark) /* nothing to do */
return 0;
@@ -5996,8 +6020,8 @@ load_build(UnpicklerObject *self)
/* Stack is ... instance, state. We want to leave instance at
* the stack top, possibly mutated via instance.__setstate__(state).
*/
- if (Py_SIZE(self->stack) < 2)
- return stack_underflow();
+ if (Py_SIZE(self->stack) - 2 < self->stack->fence)
+ return Pdata_stack_underflow(self->stack);
PDATA_POP(self->stack, state);
if (state == NULL)
@@ -6133,7 +6157,8 @@ load_mark(UnpicklerObject *self)
self->marks_size = (Py_ssize_t)alloc;
}
- self->marks[self->num_marks++] = Py_SIZE(self->stack);
+ self->stack->mark_set = 1;
+ self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
return 0;
}
@@ -6216,6 +6241,8 @@ load(UnpicklerObject *self)
char *s = NULL;
self->num_marks = 0;
+ self->stack->mark_set = 0;
+ self->stack->fence = 0;
self->proto = 0;
if (Py_SIZE(self->stack))
Pdata_clear(self->stack, 0);