From 5bdae3bb7c7ab9e85453698972fa5fa926f012f3 Mon Sep 17 00:00:00 2001
From: Vinay Sajip <vinay_sajip@yahoo.co.uk>
Date: Sat, 2 Jul 2011 16:42:47 +0100
Subject: Closes #12291: Fixed bug which was found when doing  multiple loads
 from one stream.

---
 Lib/importlib/test/source/test_file_loader.py |   2 +-
 Lib/test/test_marshal.py                      |  24 +++
 Misc/NEWS                                     |   3 +
 Python/marshal.c                              | 215 +++++++++++++++++++-------
 4 files changed, 183 insertions(+), 61 deletions(-)
diff --git a/Lib/importlib/test/source/test_file_loader.py b/Lib/importlib/test/source/test_file_loader.py
index 0ffe78d..2028092 100644
--- a/Lib/importlib/test/source/test_file_loader.py
+++ b/Lib/importlib/test/source/test_file_loader.py
@@ -214,7 +214,7 @@ class BadBytecodeTest(unittest.TestCase):
                                                 lambda bc: bc[:8] + b'<test>',
                                                 del_source=del_source)
             file_path = mapping['_temp'] if not del_source else bytecode_path
-            with self.assertRaises(ValueError):
+            with self.assertRaises(EOFError):
                 self.import_(file_path, '_temp')
 
     def _test_bad_magic(self, test, *, del_source=False):
diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py
index 81cf598..cd100f9 100644
--- a/Lib/test/test_marshal.py
+++ b/Lib/test/test_marshal.py
@@ -211,6 +211,30 @@ class BugsTestCase(unittest.TestCase):
         invalid_string = b'l\x02\x00\x00\x00\x00\x00\x00\x00'
         self.assertRaises(ValueError, marshal.loads, invalid_string)
 
+    def test_multiple_dumps_and_loads(self):
+        # Issue 12291: marshal.load() should be callable multiple times
+        # with interleaved data written by non-marshal code
+        # Adapted from a patch by Engelbert Gruber.
+        data = (1, 'abc', b'def', 1.0, (2, 'a', ['b', b'c']))
+        for interleaved in (b'', b'0123'):
+            ilen = len(interleaved)
+            positions = []
+            try:
+                with open(support.TESTFN, 'wb') as f:
+                    for d in data:
+                        marshal.dump(d, f)
+                        if ilen:
+                            f.write(interleaved)
+                        positions.append(f.tell())
+                with open(support.TESTFN, 'rb') as f:
+                    for i, d in enumerate(data):
+                        self.assertEqual(d, marshal.load(f))
+                        if ilen:
+                            f.read(ilen)
+                        self.assertEqual(positions[i], f.tell())
+            finally:
+                support.unlink(support.TESTFN)
+
 
 def test_main():
     support.run_unittest(IntTestCase,
diff --git a/Misc/NEWS b/Misc/NEWS
index 423d7ec..2013559 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.2.1 release candidate 2?
 Core and Builtins
 -----------------
 
+- Issue #12291: You can now load multiple marshalled objects from a stream,
+  with other data interleaved between marshalled objects.
+
 - Issue #12084: os.stat on Windows now works properly with relative symbolic
   links when called from any directory.
 
diff --git a/Python/marshal.c b/Python/marshal.c
index 73d4f37..396e05c 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -57,6 +57,7 @@ typedef struct {
     int error;  /* see WFERR_* values */
     int depth;
     /* If fp == NULL, the following are valid: */
+    PyObject * readable;    /* Stream-like object being read from */
     PyObject *str;
     char *ptr;
     char *end;
@@ -466,27 +467,75 @@ typedef WFILE RFILE; /* Same struct with different invariants */
 
 #define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
 
-#define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p))
-
 static int
 r_string(char *s, int n, RFILE *p)
 {
-    if (p->fp != NULL)
-        /* The result fits into int because it must be <=n. */
-        return (int)fread(s, 1, n, p->fp);
-    if (p->end - p->ptr < n)
-        n = (int)(p->end - p->ptr);
-    memcpy(s, p->ptr, n);
-    p->ptr += n;
-    return n;
+    char * ptr;
+    int read, left;
+
+    if (!p->readable) {
+        if (p->fp != NULL)
+            /* The result fits into int because it must be <=n. */
+            read = (int) fread(s, 1, n, p->fp);
+        else {
+            left = (int)(p->end - p->ptr);
+            read = (left < n) ? left : n;
+            memcpy(s, p->ptr, read);
+            p->ptr += read;
+        }
+    }
+    else {
+        PyObject *data = PyObject_CallMethod(p->readable, "read", "i", n);
+        read = 0;
+        if (data != NULL) {
+            if (!PyBytes_Check(data)) {
+                PyErr_Format(PyExc_TypeError,
+                             "f.read() returned not bytes but %.100s",
+                             data->ob_type->tp_name);
+            }
+            else {
+                read = PyBytes_GET_SIZE(data);
+                if (read > 0) {
+                    ptr = PyBytes_AS_STRING(data);
+                    memcpy(s, ptr, read);
+                }
+            }
+            Py_DECREF(data);
+        }
+    }
+    if (!PyErr_Occurred() && (read < n)) {
+        PyErr_SetString(PyExc_EOFError, "EOF read where not expected");
+    }
+    return read;
+}
+
+
+static int
+r_byte(RFILE *p)
+{
+    int c = EOF;
+    unsigned char ch;
+    int n;
+
+    if (!p->readable)
+        c = p->fp ? getc(p->fp) : rs_byte(p);
+    else {
+        n = r_string((char *) &ch, 1, p);
+        if (n > 0)
+            c = ch;
+    }
+    return c;
 }
 
 static int
 r_short(RFILE *p)
 {
     register short x;
-    x = r_byte(p);
-    x |= r_byte(p) << 8;
+    unsigned char buffer[2];
+
+    r_string((char *) buffer, 2, p);
+    x = buffer[0];
+    x |= buffer[1] << 8;
     /* Sign-extension, in case short greater than 16 bits */
     x |= -(x & 0x8000);
     return x;
@@ -496,19 +545,13 @@ static long
 r_long(RFILE *p)
 {
     register long x;
-    register FILE *fp = p->fp;
-    if (fp) {
-        x = getc(fp);
-        x |= (long)getc(fp) << 8;
-        x |= (long)getc(fp) << 16;
-        x |= (long)getc(fp) << 24;
-    }
-    else {
-        x = rs_byte(p);
-        x |= (long)rs_byte(p) << 8;
-        x |= (long)rs_byte(p) << 16;
-        x |= (long)rs_byte(p) << 24;
-    }
+    unsigned char buffer[4];
+
+    r_string((char *) buffer, 4, p);
+    x = buffer[0];
+    x |= (long)buffer[1] << 8;
+    x |= (long)buffer[2] << 16;
+    x |= (long)buffer[3] << 24;
 #if SIZEOF_LONG > 4
     /* Sign extension for 64-bit machines */
     x |= -(x & 0x80000000L);
@@ -526,25 +569,30 @@ r_long(RFILE *p)
 static PyObject *
 r_long64(RFILE *p)
 {
+    PyObject * result = NULL;
     long lo4 = r_long(p);
     long hi4 = r_long(p);
+
+    if (!PyErr_Occurred()) {
 #if SIZEOF_LONG > 4
-    long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL);
-    return PyLong_FromLong(x);
+        long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL);
+        result = PyLong_FromLong(x);
 #else
-    unsigned char buf[8];
-    int one = 1;
-    int is_little_endian = (int)*(char*)&one;
-    if (is_little_endian) {
-        memcpy(buf, &lo4, 4);
-        memcpy(buf+4, &hi4, 4);
-    }
-    else {
-        memcpy(buf, &hi4, 4);
-        memcpy(buf+4, &lo4, 4);
-    }
-    return _PyLong_FromByteArray(buf, 8, is_little_endian, 1);
+        unsigned char buf[8];
+        int one = 1;
+        int is_little_endian = (int)*(char*)&one;
+        if (is_little_endian) {
+            memcpy(buf, &lo4, 4);
+            memcpy(buf+4, &hi4, 4);
+        }
+        else {
+            memcpy(buf, &hi4, 4);
+            memcpy(buf+4, &lo4, 4);
+        }
+        result = _PyLong_FromByteArray(buf, 8, is_little_endian, 1);
 #endif
+    }
+    return result;
 }
 
 static PyObject *
@@ -556,6 +604,8 @@ r_PyLong(RFILE *p)
     digit d;
 
     n = r_long(p);
+    if (PyErr_Occurred())
+        return NULL;
     if (n == 0)
         return (PyObject *)_PyLong_New(0);
     if (n < -INT_MAX || n > INT_MAX) {
@@ -575,6 +625,8 @@ r_PyLong(RFILE *p)
         d = 0;
         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
             md = r_short(p);
+            if (PyErr_Occurred())
+                break;
             if (md < 0 || md > PyLong_MARSHAL_BASE)
                 goto bad_digit;
             d += (digit)md << j*PyLong_MARSHAL_SHIFT;
@@ -584,6 +636,8 @@ r_PyLong(RFILE *p)
     d = 0;
     for (j=0; j < shorts_in_top_digit; j++) {
         md = r_short(p);
+        if (PyErr_Occurred())
+            break;
         if (md < 0 || md > PyLong_MARSHAL_BASE)
             goto bad_digit;
         /* topmost marshal digit should be nonzero */
@@ -595,6 +649,10 @@ r_PyLong(RFILE *p)
         }
         d += (digit)md << j*PyLong_MARSHAL_SHIFT;
     }
+    if (PyErr_Occurred()) {
+        Py_DECREF(ob);
+        return NULL;
+    }
     /* top digit should be nonzero, else the resulting PyLong won't be
        normalized */
     ob->ob_digit[size-1] = d;
@@ -663,7 +721,8 @@ r_object(RFILE *p)
         break;
 
     case TYPE_INT:
-        retval = PyLong_FromLong(r_long(p));
+        n = r_long(p);
+        retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
         break;
 
     case TYPE_INT64:
@@ -773,6 +832,10 @@ r_object(RFILE *p)
 
     case TYPE_STRING:
         n = r_long(p);
+        if (PyErr_Occurred()) {
+            retval = NULL;
+            break;
+        }
         if (n < 0 || n > INT_MAX) {
             PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
             retval = NULL;
@@ -798,6 +861,10 @@ r_object(RFILE *p)
         char *buffer;
 
         n = r_long(p);
+        if (PyErr_Occurred()) {
+            retval = NULL;
+            break;
+        }
         if (n < 0 || n > INT_MAX) {
             PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
             retval = NULL;
@@ -823,6 +890,10 @@ r_object(RFILE *p)
 
     case TYPE_TUPLE:
         n = r_long(p);
+        if (PyErr_Occurred()) {
+            retval = NULL;
+            break;
+        }
         if (n < 0 || n > INT_MAX) {
             PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
             retval = NULL;
@@ -850,6 +921,10 @@ r_object(RFILE *p)
 
     case TYPE_LIST:
         n = r_long(p);
+        if (PyErr_Occurred()) {
+            retval = NULL;
+            break;
+        }
         if (n < 0 || n > INT_MAX) {
             PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
             retval = NULL;
@@ -902,6 +977,10 @@ r_object(RFILE *p)
     case TYPE_SET:
     case TYPE_FROZENSET:
         n = r_long(p);
+        if (PyErr_Occurred()) {
+            retval = NULL;
+            break;
+        }
         if (n < 0 || n > INT_MAX) {
             PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
             retval = NULL;
@@ -955,10 +1034,20 @@ r_object(RFILE *p)
 
             /* XXX ignore long->int overflows for now */
             argcount = (int)r_long(p);
+            if (PyErr_Occurred())
+                goto code_error;
             kwonlyargcount = (int)r_long(p);
+            if (PyErr_Occurred())
+                goto code_error;
             nlocals = (int)r_long(p);
+            if (PyErr_Occurred())
+                goto code_error;
             stacksize = (int)r_long(p);
+            if (PyErr_Occurred())
+                goto code_error;
             flags = (int)r_long(p);
+            if (PyErr_Occurred())
+                goto code_error;
             code = r_object(p);
             if (code == NULL)
                 goto code_error;
@@ -1040,6 +1129,7 @@ PyMarshal_ReadShortFromFile(FILE *fp)
 {
     RFILE rf;
     assert(fp);
+    rf.readable = NULL;
     rf.fp = fp;
     rf.strings = NULL;
     rf.end = rf.ptr = NULL;
@@ -1051,6 +1141,7 @@ PyMarshal_ReadLongFromFile(FILE *fp)
 {
     RFILE rf;
     rf.fp = fp;
+    rf.readable = NULL;
     rf.strings = NULL;
     rf.ptr = rf.end = NULL;
     return r_long(&rf);
@@ -1112,6 +1203,7 @@ PyMarshal_ReadObjectFromFile(FILE *fp)
     RFILE rf;
     PyObject *result;
     rf.fp = fp;
+    rf.readable = NULL;
     rf.strings = PyList_New(0);
     rf.depth = 0;
     rf.ptr = rf.end = NULL;
@@ -1126,6 +1218,7 @@ PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len)
     RFILE rf;
     PyObject *result;
     rf.fp = NULL;
+    rf.readable = NULL;
     rf.ptr = str;
     rf.end = str + len;
     rf.strings = PyList_New(0);
@@ -1142,6 +1235,7 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
     PyObject *res = NULL;
 
     wf.fp = NULL;
+    wf.readable = NULL;
     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
     if (wf.str == NULL)
         return NULL;
@@ -1219,33 +1313,33 @@ The version argument indicates the data format that dump should use.");
 static PyObject *
 marshal_load(PyObject *self, PyObject *f)
 {
-    /* XXX Quick hack -- need to do this differently */
     PyObject *data, *result;
     RFILE rf;
-    data = PyObject_CallMethod(f, "read", "");
+    char *p;
+    int n;
+
+    /*
+     * Make a call to the read method, but read zero bytes.
+     * This is to ensure that the object passed in at least
+     * has a read method which returns bytes.
+     */
+    data = PyObject_CallMethod(f, "read", "i", 0);
     if (data == NULL)
         return NULL;
-    rf.fp = NULL;
-    if (PyBytes_Check(data)) {
-        rf.ptr = PyBytes_AS_STRING(data);
-        rf.end = rf.ptr + PyBytes_GET_SIZE(data);
-    }
-    else if (PyBytes_Check(data)) {
-        rf.ptr = PyBytes_AS_STRING(data);
-        rf.end = rf.ptr + PyBytes_GET_SIZE(data);
-    }
-    else {
+    if (!PyBytes_Check(data)) {
         PyErr_Format(PyExc_TypeError,
-                     "f.read() returned neither string "
-                     "nor bytes but %.100s",
+                     "f.read() returned not bytes but %.100s",
                      data->ob_type->tp_name);
-        Py_DECREF(data);
-        return NULL;
+        result = NULL;
+    }
+    else {
+        rf.strings = PyList_New(0);
+        rf.depth = 0;
+        rf.fp = NULL;
+        rf.readable = f;
+        result = read_object(&rf);
+        Py_DECREF(rf.strings);
     }
-    rf.strings = PyList_New(0);
-    rf.depth = 0;
-    result = read_object(&rf);
-    Py_DECREF(rf.strings);
     Py_DECREF(data);
     return result;
 }
@@ -1296,6 +1390,7 @@ marshal_loads(PyObject *self, PyObject *args)
     s = p.buf;
     n = p.len;
     rf.fp = NULL;
+    rf.readable = NULL;
     rf.ptr = s;
     rf.end = s + n;
     rf.strings = PyList_New(0);
-- 
cgit v0.12