From 5bdae3bb7c7ab9e85453698972fa5fa926f012f3 Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Sat, 2 Jul 2011 16:42:47 +0100 Subject: Closes #12291: Fixed bug which was found when doing multiple loads from one stream. --- Lib/importlib/test/source/test_file_loader.py | 2 +- Lib/test/test_marshal.py | 24 +++ Misc/NEWS | 3 + Python/marshal.c | 215 +++++++++++++++++++------- 4 files changed, 183 insertions(+), 61 deletions(-) diff --git a/Lib/importlib/test/source/test_file_loader.py b/Lib/importlib/test/source/test_file_loader.py index 0ffe78d..2028092 100644 --- a/Lib/importlib/test/source/test_file_loader.py +++ b/Lib/importlib/test/source/test_file_loader.py @@ -214,7 +214,7 @@ class BadBytecodeTest(unittest.TestCase): lambda bc: bc[:8] + b'', del_source=del_source) file_path = mapping['_temp'] if not del_source else bytecode_path - with self.assertRaises(ValueError): + with self.assertRaises(EOFError): self.import_(file_path, '_temp') def _test_bad_magic(self, test, *, del_source=False): diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index 81cf598..cd100f9 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -211,6 +211,30 @@ class BugsTestCase(unittest.TestCase): invalid_string = b'l\x02\x00\x00\x00\x00\x00\x00\x00' self.assertRaises(ValueError, marshal.loads, invalid_string) + def test_multiple_dumps_and_loads(self): + # Issue 12291: marshal.load() should be callable multiple times + # with interleaved data written by non-marshal code + # Adapted from a patch by Engelbert Gruber. + data = (1, 'abc', b'def', 1.0, (2, 'a', ['b', b'c'])) + for interleaved in (b'', b'0123'): + ilen = len(interleaved) + positions = [] + try: + with open(support.TESTFN, 'wb') as f: + for d in data: + marshal.dump(d, f) + if ilen: + f.write(interleaved) + positions.append(f.tell()) + with open(support.TESTFN, 'rb') as f: + for i, d in enumerate(data): + self.assertEqual(d, marshal.load(f)) + if ilen: + f.read(ilen) + self.assertEqual(positions[i], f.tell()) + finally: + support.unlink(support.TESTFN) + def test_main(): support.run_unittest(IntTestCase, diff --git a/Misc/NEWS b/Misc/NEWS index 423d7ec..2013559 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.2.1 release candidate 2? Core and Builtins ----------------- +- Issue #12291: You can now load multiple marshalled objects from a stream, + with other data interleaved between marshalled objects. + - Issue #12084: os.stat on Windows now works properly with relative symbolic links when called from any directory. diff --git a/Python/marshal.c b/Python/marshal.c index 73d4f37..396e05c 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -57,6 +57,7 @@ typedef struct { int error; /* see WFERR_* values */ int depth; /* If fp == NULL, the following are valid: */ + PyObject * readable; /* Stream-like object being read from */ PyObject *str; char *ptr; char *end; @@ -466,27 +467,75 @@ typedef WFILE RFILE; /* Same struct with different invariants */ #define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF) -#define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p)) - static int r_string(char *s, int n, RFILE *p) { - if (p->fp != NULL) - /* The result fits into int because it must be <=n. */ - return (int)fread(s, 1, n, p->fp); - if (p->end - p->ptr < n) - n = (int)(p->end - p->ptr); - memcpy(s, p->ptr, n); - p->ptr += n; - return n; + char * ptr; + int read, left; + + if (!p->readable) { + if (p->fp != NULL) + /* The result fits into int because it must be <=n. */ + read = (int) fread(s, 1, n, p->fp); + else { + left = (int)(p->end - p->ptr); + read = (left < n) ? left : n; + memcpy(s, p->ptr, read); + p->ptr += read; + } + } + else { + PyObject *data = PyObject_CallMethod(p->readable, "read", "i", n); + read = 0; + if (data != NULL) { + if (!PyBytes_Check(data)) { + PyErr_Format(PyExc_TypeError, + "f.read() returned not bytes but %.100s", + data->ob_type->tp_name); + } + else { + read = PyBytes_GET_SIZE(data); + if (read > 0) { + ptr = PyBytes_AS_STRING(data); + memcpy(s, ptr, read); + } + } + Py_DECREF(data); + } + } + if (!PyErr_Occurred() && (read < n)) { + PyErr_SetString(PyExc_EOFError, "EOF read where not expected"); + } + return read; +} + + +static int +r_byte(RFILE *p) +{ + int c = EOF; + unsigned char ch; + int n; + + if (!p->readable) + c = p->fp ? getc(p->fp) : rs_byte(p); + else { + n = r_string((char *) &ch, 1, p); + if (n > 0) + c = ch; + } + return c; } static int r_short(RFILE *p) { register short x; - x = r_byte(p); - x |= r_byte(p) << 8; + unsigned char buffer[2]; + + r_string((char *) buffer, 2, p); + x = buffer[0]; + x |= buffer[1] << 8; /* Sign-extension, in case short greater than 16 bits */ x |= -(x & 0x8000); return x; @@ -496,19 +545,13 @@ static long r_long(RFILE *p) { register long x; - register FILE *fp = p->fp; - if (fp) { - x = getc(fp); - x |= (long)getc(fp) << 8; - x |= (long)getc(fp) << 16; - x |= (long)getc(fp) << 24; - } - else { - x = rs_byte(p); - x |= (long)rs_byte(p) << 8; - x |= (long)rs_byte(p) << 16; - x |= (long)rs_byte(p) << 24; - } + unsigned char buffer[4]; + + r_string((char *) buffer, 4, p); + x = buffer[0]; + x |= (long)buffer[1] << 8; + x |= (long)buffer[2] << 16; + x |= (long)buffer[3] << 24; #if SIZEOF_LONG > 4 /* Sign extension for 64-bit machines */ x |= -(x & 0x80000000L); @@ -526,25 +569,30 @@ r_long(RFILE *p) static PyObject * r_long64(RFILE *p) { + PyObject * result = NULL; long lo4 = r_long(p); long hi4 = r_long(p); + + if (!PyErr_Occurred()) { #if SIZEOF_LONG > 4 - long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL); - return PyLong_FromLong(x); + long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL); + result = PyLong_FromLong(x); #else - unsigned char buf[8]; - int one = 1; - int is_little_endian = (int)*(char*)&one; - if (is_little_endian) { - memcpy(buf, &lo4, 4); - memcpy(buf+4, &hi4, 4); - } - else { - memcpy(buf, &hi4, 4); - memcpy(buf+4, &lo4, 4); - } - return _PyLong_FromByteArray(buf, 8, is_little_endian, 1); + unsigned char buf[8]; + int one = 1; + int is_little_endian = (int)*(char*)&one; + if (is_little_endian) { + memcpy(buf, &lo4, 4); + memcpy(buf+4, &hi4, 4); + } + else { + memcpy(buf, &hi4, 4); + memcpy(buf+4, &lo4, 4); + } + result = _PyLong_FromByteArray(buf, 8, is_little_endian, 1); #endif + } + return result; } static PyObject * @@ -556,6 +604,8 @@ r_PyLong(RFILE *p) digit d; n = r_long(p); + if (PyErr_Occurred()) + return NULL; if (n == 0) return (PyObject *)_PyLong_New(0); if (n < -INT_MAX || n > INT_MAX) { @@ -575,6 +625,8 @@ r_PyLong(RFILE *p) d = 0; for (j=0; j < PyLong_MARSHAL_RATIO; j++) { md = r_short(p); + if (PyErr_Occurred()) + break; if (md < 0 || md > PyLong_MARSHAL_BASE) goto bad_digit; d += (digit)md << j*PyLong_MARSHAL_SHIFT; @@ -584,6 +636,8 @@ r_PyLong(RFILE *p) d = 0; for (j=0; j < shorts_in_top_digit; j++) { md = r_short(p); + if (PyErr_Occurred()) + break; if (md < 0 || md > PyLong_MARSHAL_BASE) goto bad_digit; /* topmost marshal digit should be nonzero */ @@ -595,6 +649,10 @@ r_PyLong(RFILE *p) } d += (digit)md << j*PyLong_MARSHAL_SHIFT; } + if (PyErr_Occurred()) { + Py_DECREF(ob); + return NULL; + } /* top digit should be nonzero, else the resulting PyLong won't be normalized */ ob->ob_digit[size-1] = d; @@ -663,7 +721,8 @@ r_object(RFILE *p) break; case TYPE_INT: - retval = PyLong_FromLong(r_long(p)); + n = r_long(p); + retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n); break; case TYPE_INT64: @@ -773,6 +832,10 @@ r_object(RFILE *p) case TYPE_STRING: n = r_long(p); + if (PyErr_Occurred()) { + retval = NULL; + break; + } if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); retval = NULL; @@ -798,6 +861,10 @@ r_object(RFILE *p) char *buffer; n = r_long(p); + if (PyErr_Occurred()) { + retval = NULL; + break; + } if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); retval = NULL; @@ -823,6 +890,10 @@ r_object(RFILE *p) case TYPE_TUPLE: n = r_long(p); + if (PyErr_Occurred()) { + retval = NULL; + break; + } if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); retval = NULL; @@ -850,6 +921,10 @@ r_object(RFILE *p) case TYPE_LIST: n = r_long(p); + if (PyErr_Occurred()) { + retval = NULL; + break; + } if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); retval = NULL; @@ -902,6 +977,10 @@ r_object(RFILE *p) case TYPE_SET: case TYPE_FROZENSET: n = r_long(p); + if (PyErr_Occurred()) { + retval = NULL; + break; + } if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); retval = NULL; @@ -955,10 +1034,20 @@ r_object(RFILE *p) /* XXX ignore long->int overflows for now */ argcount = (int)r_long(p); + if (PyErr_Occurred()) + goto code_error; kwonlyargcount = (int)r_long(p); + if (PyErr_Occurred()) + goto code_error; nlocals = (int)r_long(p); + if (PyErr_Occurred()) + goto code_error; stacksize = (int)r_long(p); + if (PyErr_Occurred()) + goto code_error; flags = (int)r_long(p); + if (PyErr_Occurred()) + goto code_error; code = r_object(p); if (code == NULL) goto code_error; @@ -1040,6 +1129,7 @@ PyMarshal_ReadShortFromFile(FILE *fp) { RFILE rf; assert(fp); + rf.readable = NULL; rf.fp = fp; rf.strings = NULL; rf.end = rf.ptr = NULL; @@ -1051,6 +1141,7 @@ PyMarshal_ReadLongFromFile(FILE *fp) { RFILE rf; rf.fp = fp; + rf.readable = NULL; rf.strings = NULL; rf.ptr = rf.end = NULL; return r_long(&rf); @@ -1112,6 +1203,7 @@ PyMarshal_ReadObjectFromFile(FILE *fp) RFILE rf; PyObject *result; rf.fp = fp; + rf.readable = NULL; rf.strings = PyList_New(0); rf.depth = 0; rf.ptr = rf.end = NULL; @@ -1126,6 +1218,7 @@ PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len) RFILE rf; PyObject *result; rf.fp = NULL; + rf.readable = NULL; rf.ptr = str; rf.end = str + len; rf.strings = PyList_New(0); @@ -1142,6 +1235,7 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) PyObject *res = NULL; wf.fp = NULL; + wf.readable = NULL; wf.str = PyBytes_FromStringAndSize((char *)NULL, 50); if (wf.str == NULL) return NULL; @@ -1219,33 +1313,33 @@ The version argument indicates the data format that dump should use."); static PyObject * marshal_load(PyObject *self, PyObject *f) { - /* XXX Quick hack -- need to do this differently */ PyObject *data, *result; RFILE rf; - data = PyObject_CallMethod(f, "read", ""); + char *p; + int n; + + /* + * Make a call to the read method, but read zero bytes. + * This is to ensure that the object passed in at least + * has a read method which returns bytes. + */ + data = PyObject_CallMethod(f, "read", "i", 0); if (data == NULL) return NULL; - rf.fp = NULL; - if (PyBytes_Check(data)) { - rf.ptr = PyBytes_AS_STRING(data); - rf.end = rf.ptr + PyBytes_GET_SIZE(data); - } - else if (PyBytes_Check(data)) { - rf.ptr = PyBytes_AS_STRING(data); - rf.end = rf.ptr + PyBytes_GET_SIZE(data); - } - else { + if (!PyBytes_Check(data)) { PyErr_Format(PyExc_TypeError, - "f.read() returned neither string " - "nor bytes but %.100s", + "f.read() returned not bytes but %.100s", data->ob_type->tp_name); - Py_DECREF(data); - return NULL; + result = NULL; + } + else { + rf.strings = PyList_New(0); + rf.depth = 0; + rf.fp = NULL; + rf.readable = f; + result = read_object(&rf); + Py_DECREF(rf.strings); } - rf.strings = PyList_New(0); - rf.depth = 0; - result = read_object(&rf); - Py_DECREF(rf.strings); Py_DECREF(data); return result; } @@ -1296,6 +1390,7 @@ marshal_loads(PyObject *self, PyObject *args) s = p.buf; n = p.len; rf.fp = NULL; + rf.readable = NULL; rf.ptr = s; rf.end = s + n; rf.strings = PyList_New(0); -- cgit v0.12