summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKristján Valur Jónsson <sweskman@gmail.com>2013-03-20 01:02:10 (GMT)
committerKristján Valur Jónsson <sweskman@gmail.com>2013-03-20 01:02:10 (GMT)
commitd7009c69136a3809282804f460902ab42e9972f6 (patch)
tree4d63e0482a090ede2c41c6a8f347418665e93072
parentc7c42efb16612f66af266073937cffa99ad49a33 (diff)
downloadcpython-d7009c69136a3809282804f460902ab42e9972f6.zip
cpython-d7009c69136a3809282804f460902ab42e9972f6.tar.gz
cpython-d7009c69136a3809282804f460902ab42e9972f6.tar.bz2
Issue #16475: Support object instancing, recursion and interned strings
in marshal
-rw-r--r--Doc/library/marshal.rst9
-rw-r--r--Include/marshal.h2
-rw-r--r--Lib/test/test_marshal.py122
-rw-r--r--Misc/NEWS3
-rw-r--r--Python/marshal.c272
5 files changed, 376 insertions, 32 deletions
diff --git a/Doc/library/marshal.rst b/Doc/library/marshal.rst
index 3b9e3d2..124eb61 100644
--- a/Doc/library/marshal.rst
+++ b/Doc/library/marshal.rst
@@ -40,10 +40,11 @@ this module. The following types are supported: booleans, integers, floating
point numbers, complex numbers, strings, bytes, bytearrays, tuples, lists, sets,
frozensets, dictionaries, and code objects, where it should be understood that
tuples, lists, sets, frozensets and dictionaries are only supported as long as
-the values contained therein are themselves supported; and recursive lists, sets
-and dictionaries should not be written (they will cause infinite loops). The
+the values contained therein are themselves supported.
singletons :const:`None`, :const:`Ellipsis` and :exc:`StopIteration` can also be
marshalled and unmarshalled.
+For format *version* lower than 3, recursive lists, sets and dictionaries cannot
+be written (see below).
There are functions that read/write files as well as functions operating on
strings.
@@ -103,7 +104,9 @@ In addition, the following constants are defined:
Indicates the format that the module uses. Version 0 is the historical
format, version 1 shares interned strings and version 2 uses a binary format
- for floating point numbers. The current version is 2.
+ for floating point numbers.
+ Version 3 adds support for object instancing and recursion.
+ The current version is 3.
.. rubric:: Footnotes
diff --git a/Include/marshal.h b/Include/marshal.h
index e96d062..0c69655 100644
--- a/Include/marshal.h
+++ b/Include/marshal.h
@@ -7,7 +7,7 @@
extern "C" {
#endif
-#define Py_MARSHAL_VERSION 2
+#define Py_MARSHAL_VERSION 3
PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *, int);
PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *, int);
diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py
index 025b53c..16e6d81 100644
--- a/Lib/test/test_marshal.py
+++ b/Lib/test/test_marshal.py
@@ -200,10 +200,14 @@ class BugsTestCase(unittest.TestCase):
except Exception:
pass
- def test_loads_recursion(self):
+ def test_loads_2x_code(self):
s = b'c' + (b'X' * 4*4) + b'{' * 2**20
self.assertRaises(ValueError, marshal.loads, s)
+ def test_loads_recursion(self):
+ s = b'c' + (b'X' * 4*5) + b'{' * 2**20
+ self.assertRaises(ValueError, marshal.loads, s)
+
def test_recursion_limit(self):
# Create a deeply nested structure.
head = last = []
@@ -323,6 +327,122 @@ class LargeValuesTestCase(unittest.TestCase):
def test_bytearray(self, size):
self.check_unmarshallable(bytearray(size))
+def CollectObjectIDs(ids, obj):
+ """Collect object ids seen in a structure"""
+ if id(obj) in ids:
+ return
+ ids.add(id(obj))
+ if isinstance(obj, (list, tuple, set, frozenset)):
+ for e in obj:
+ CollectObjectIDs(ids, e)
+ elif isinstance(obj, dict):
+ for k, v in obj.items():
+ CollectObjectIDs(ids, k)
+ CollectObjectIDs(ids, v)
+ return len(ids)
+
+class InstancingTestCase(unittest.TestCase, HelperMixin):
+ intobj = 123321
+ floatobj = 1.2345
+ strobj = "abcde"*3
+ dictobj = {"hello":floatobj, "goodbye":floatobj, floatobj:"hello"}
+
+ def helper3(self, rsample, recursive=False, simple=False):
+ #we have two instances
+ sample = (rsample, rsample)
+
+ n0 = CollectObjectIDs(set(), sample)
+
+ s3 = marshal.dumps(sample, 3)
+ n3 = CollectObjectIDs(set(), marshal.loads(s3))
+
+ #same number of instances generated
+ self.assertEqual(n3, n0)
+
+ if not recursive:
+ #can compare with version 2
+ s2 = marshal.dumps(sample, 2)
+ n2 = CollectObjectIDs(set(), marshal.loads(s2))
+ #old format generated more instances
+ self.assertGreater(n2, n0)
+
+ #if complex objects are in there, old format is larger
+ if not simple:
+ self.assertGreater(len(s2), len(s3))
+ else:
+ self.assertGreaterEqual(len(s2), len(s3))
+
+ def testInt(self):
+ self.helper(self.intobj)
+ self.helper3(self.intobj, simple=True)
+
+ def testFloat(self):
+ self.helper(self.floatobj)
+ self.helper3(self.floatobj)
+
+ def testStr(self):
+ self.helper(self.strobj)
+ self.helper3(self.strobj)
+
+ def testDict(self):
+ self.helper(self.dictobj)
+ self.helper3(self.dictobj)
+
+ def testModule(self):
+ with open(__file__, "rb") as f:
+ code = f.read()
+ if __file__.endswith(".py"):
+ code = compile(code, __file__, "exec")
+ self.helper(code)
+ self.helper3(code)
+
+ def testRecursion(self):
+ d = dict(self.dictobj)
+ d["self"] = d
+ self.helper3(d, recursive=True)
+ l = [self.dictobj]
+ l.append(l)
+ self.helper3(l, recursive=True)
+
+class CompatibilityTestCase(unittest.TestCase):
+ def _test(self, version):
+ with open(__file__, "rb") as f:
+ code = f.read()
+ if __file__.endswith(".py"):
+ code = compile(code, __file__, "exec")
+ data = marshal.dumps(code, version)
+ marshal.loads(data)
+
+ def test0To3(self):
+ self._test(0)
+
+ def test1To3(self):
+ self._test(1)
+
+ def test2To3(self):
+ self._test(2)
+
+ def test3To3(self):
+ self._test(3)
+
+class InterningTestCase(unittest.TestCase, HelperMixin):
+ strobj = "this is an interned string"
+ strobj = sys.intern(strobj)
+
+ def testIntern(self):
+ s = marshal.loads(marshal.dumps(self.strobj))
+ self.assertEqual(s, self.strobj)
+ self.assertEqual(id(s), id(self.strobj))
+ s2 = sys.intern(s)
+ self.assertEqual(id(s2), id(s))
+
+ def testNoIntern(self):
+ s = marshal.loads(marshal.dumps(self.strobj, 2))
+ self.assertEqual(s, self.strobj)
+ self.assertNotEqual(id(s), id(self.strobj))
+ s2 = sys.intern(s)
+ self.assertNotEqual(id(s2), id(s))
+
def test_main():
support.run_unittest(IntTestCase,
diff --git a/Misc/NEWS b/Misc/NEWS
index 792b62f..e7c59b1 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1?
Core and Builtins
-----------------
+- Issue #16475: Support object instancing, recursion and interned strings
+ in marshal
+
- Use the HTTPS PyPI url for upload, overriding any plain HTTP URL in pypirc.
- Issue #16795: On the ast.arguments object, unify vararg with varargannotation
diff --git a/Python/marshal.c b/Python/marshal.c
index 959f3f7..c3e75b1 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -1,8 +1,10 @@
/* Write Python objects to files and read them back.
- This is intended for writing and reading compiled Python code only;
- a true persistent storage facility would be much harder, since
- it would have to take circular links and sharing into account. */
+ This is primarily intended for writing and reading compiled Python code,
+ even though dicts, lists, sets and frozensets, not commonly seen in
+ code objects, are supported.
+ Version 3 of this protocol properly supports circular links
+ and sharing. */
#define PY_SSIZE_T_CLEAN
@@ -41,6 +43,8 @@
#define TYPE_BINARY_COMPLEX 'y'
#define TYPE_LONG 'l'
#define TYPE_STRING 's'
+#define TYPE_INTERNED 't'
+#define TYPE_REF 'r'
#define TYPE_TUPLE '('
#define TYPE_LIST '['
#define TYPE_DICT '{'
@@ -49,6 +53,7 @@
#define TYPE_UNKNOWN '?'
#define TYPE_SET '<'
#define TYPE_FROZENSET '>'
+#define FLAG_REF '\x80' /* with a type, add obj to index */
#define WFERR_OK 0
#define WFERR_UNMARSHALLABLE 1
@@ -65,6 +70,7 @@ typedef struct {
PyObject *current_filename;
char *ptr;
char *end;
+ PyObject *refs; /* dict on marshal, list on unmarshal */
int version;
} WFILE;
@@ -151,13 +157,17 @@ w_long(long x, WFILE *p)
#endif
#define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
+#define W_TYPE(t, p) do { \
+ w_byte((t) | flag, (p)); \
+} while(0)
+
static void
-w_PyLong(const PyLongObject *ob, WFILE *p)
+w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
{
Py_ssize_t i, j, n, l;
digit d;
- w_byte(TYPE_LONG, p);
+ W_TYPE(TYPE_LONG, p);
if (Py_SIZE(ob) == 0) {
w_long((long)0, p);
return;
@@ -194,10 +204,64 @@ w_PyLong(const PyLongObject *ob, WFILE *p)
} while (d != 0);
}
+static int
+w_ref(PyObject *v, char *flag, WFILE *p)
+{
+ PyObject *id;
+ PyObject *idx;
+
+ if (p->version < 3 || p->refs == NULL)
+ return 0; /* not writing object references */
+
+ /* if it has only one reference, it definitely isn't shared */
+ if (Py_REFCNT(v) == 1)
+ return 0;
+
+ id = PyLong_FromVoidPtr((void*)v);
+ if (id == NULL)
+ goto err;
+ idx = PyDict_GetItem(p->refs, id);
+ if (idx != NULL) {
+ /* write the reference index to the stream */
+ long w = PyLong_AsLong(idx);
+ Py_DECREF(id);
+ if (w == -1 && PyErr_Occurred()) {
+ goto err;
+ }
+ /* we don't store "long" indices in the dict */
+ assert(0 <= w && w <= 0x7fffffff);
+ w_byte(TYPE_REF, p);
+ w_long(w, p);
+ return 1;
+ } else {
+ int ok;
+ Py_ssize_t s = PyDict_Size(p->refs);
+ /* we don't support long indices */
+ if (s >= 0x7fffffff) {
+ PyErr_SetString(PyExc_ValueError, "too many objects");
+ goto err;
+ }
+ idx = PyLong_FromSsize_t(s);
+ ok = idx && PyDict_SetItem(p->refs, id, idx) == 0;
+ Py_DECREF(id);
+ Py_XDECREF(idx);
+ if (!ok)
+ goto err;
+ *flag |= FLAG_REF;
+ return 0;
+ }
+err:
+ p->error = WFERR_UNMARSHALLABLE;
+ return 1;
+}
+
+static void
+w_complex_object(PyObject *v, char flag, WFILE *p);
+
static void
w_object(PyObject *v, WFILE *p)
{
- Py_ssize_t i, n;
+ char flag = '\0';
p->depth++;
@@ -222,24 +286,35 @@ w_object(PyObject *v, WFILE *p)
else if (v == Py_True) {
w_byte(TYPE_TRUE, p);
}
- else if (PyLong_CheckExact(v)) {
+ else if (!w_ref(v, &flag, p))
+ w_complex_object(v, flag, p);
+
+ p->depth--;
+}
+
+static void
+w_complex_object(PyObject *v, char flag, WFILE *p)
+{
+ Py_ssize_t i, n;
+
+ if (PyLong_CheckExact(v)) {
long x = PyLong_AsLong(v);
if ((x == -1) && PyErr_Occurred()) {
PyLongObject *ob = (PyLongObject *)v;
PyErr_Clear();
- w_PyLong(ob, p);
+ w_PyLong(ob, flag, p);
}
else {
#if SIZEOF_LONG > 4
long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
if (y && y != -1) {
/* Too large for TYPE_INT */
- w_PyLong((PyLongObject*)v, p);
+ w_PyLong((PyLongObject*)v, flag, p);
}
else
#endif
{
- w_byte(TYPE_INT, p);
+ W_TYPE(TYPE_INT, p);
w_long(x, p);
}
}
@@ -252,7 +327,7 @@ w_object(PyObject *v, WFILE *p)
p->error = WFERR_UNMARSHALLABLE;
return;
}
- w_byte(TYPE_BINARY_FLOAT, p);
+ W_TYPE(TYPE_BINARY_FLOAT, p);
w_string((char*)buf, 8, p);
}
else {
@@ -263,7 +338,7 @@ w_object(PyObject *v, WFILE *p)
return;
}
n = strlen(buf);
- w_byte(TYPE_FLOAT, p);
+ W_TYPE(TYPE_FLOAT, p);
w_byte((int)n, p);
w_string(buf, n, p);
PyMem_Free(buf);
@@ -277,7 +352,7 @@ w_object(PyObject *v, WFILE *p)
p->error = WFERR_UNMARSHALLABLE;
return;
}
- w_byte(TYPE_BINARY_COMPLEX, p);
+ W_TYPE(TYPE_BINARY_COMPLEX, p);
w_string((char*)buf, 8, p);
if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
buf, 1) < 0) {
@@ -288,7 +363,7 @@ w_object(PyObject *v, WFILE *p)
}
else {
char *buf;
- w_byte(TYPE_COMPLEX, p);
+ W_TYPE(TYPE_COMPLEX, p);
buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
'g', 17, 0, NULL);
if (!buf) {
@@ -312,7 +387,7 @@ w_object(PyObject *v, WFILE *p)
}
}
else if (PyBytes_CheckExact(v)) {
- w_byte(TYPE_STRING, p);
+ W_TYPE(TYPE_STRING, p);
n = PyBytes_GET_SIZE(v);
W_SIZE(n, p);
w_string(PyBytes_AS_STRING(v), n, p);
@@ -325,14 +400,17 @@ w_object(PyObject *v, WFILE *p)
p->error = WFERR_UNMARSHALLABLE;
return;
}
- w_byte(TYPE_UNICODE, p);
+ if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v))
+ W_TYPE(TYPE_INTERNED, p);
+ else
+ W_TYPE(TYPE_UNICODE, p);
n = PyBytes_GET_SIZE(utf8);
W_SIZE(n, p);
w_string(PyBytes_AS_STRING(utf8), n, p);
Py_DECREF(utf8);
}
else if (PyTuple_CheckExact(v)) {
- w_byte(TYPE_TUPLE, p);
+ W_TYPE(TYPE_TUPLE, p);
n = PyTuple_Size(v);
W_SIZE(n, p);
for (i = 0; i < n; i++) {
@@ -340,7 +418,7 @@ w_object(PyObject *v, WFILE *p)
}
}
else if (PyList_CheckExact(v)) {
- w_byte(TYPE_LIST, p);
+ W_TYPE(TYPE_LIST, p);
n = PyList_GET_SIZE(v);
W_SIZE(n, p);
for (i = 0; i < n; i++) {
@@ -350,7 +428,7 @@ w_object(PyObject *v, WFILE *p)
else if (PyDict_CheckExact(v)) {
Py_ssize_t pos;
PyObject *key, *value;
- w_byte(TYPE_DICT, p);
+ W_TYPE(TYPE_DICT, p);
/* This one is NULL object terminated! */
pos = 0;
while (PyDict_Next(v, &pos, &key, &value)) {
@@ -363,9 +441,9 @@ w_object(PyObject *v, WFILE *p)
PyObject *value, *it;
if (PyObject_TypeCheck(v, &PySet_Type))
- w_byte(TYPE_SET, p);
+ W_TYPE(TYPE_SET, p);
else
- w_byte(TYPE_FROZENSET, p);
+ W_TYPE(TYPE_FROZENSET, p);
n = PyObject_Size(v);
if (n == -1) {
p->depth--;
@@ -392,7 +470,7 @@ w_object(PyObject *v, WFILE *p)
}
else if (PyCode_Check(v)) {
PyCodeObject *co = (PyCodeObject *)v;
- w_byte(TYPE_CODE, p);
+ W_TYPE(TYPE_CODE, p);
w_long(co->co_argcount, p);
w_long(co->co_kwonlyargcount, p);
w_long(co->co_nlocals, p);
@@ -419,7 +497,7 @@ w_object(PyObject *v, WFILE *p)
p->error = WFERR_UNMARSHALLABLE;
return;
}
- w_byte(TYPE_STRING, p);
+ W_TYPE(TYPE_STRING, p);
n = view.len;
s = view.buf;
W_SIZE(n, p);
@@ -427,10 +505,9 @@ w_object(PyObject *v, WFILE *p)
PyBuffer_Release(&view);
}
else {
- w_byte(TYPE_UNKNOWN, p);
+ W_TYPE(TYPE_UNKNOWN, p);
p->error = WFERR_UNMARSHALLABLE;
}
- p->depth--;
}
/* version currently has no effect for writing longs. */
@@ -441,6 +518,7 @@ PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
wf.fp = fp;
wf.error = WFERR_OK;
wf.depth = 0;
+ wf.refs = NULL;
wf.version = version;
w_long(x, &wf);
}
@@ -452,8 +530,14 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
wf.fp = fp;
wf.error = WFERR_OK;
wf.depth = 0;
+ if (version >= 3) {
+ if ((wf.refs = PyDict_New()) == NULL)
+ return; /* caller mush check PyErr_Occurred() */
+ } else
+ wf.refs = NULL;
wf.version = version;
w_object(x, &wf);
+ Py_XDECREF(wf.refs);
}
typedef WFILE RFILE; /* Same struct with different invariants */
@@ -489,7 +573,7 @@ r_string(char *s, Py_ssize_t n, RFILE *p)
data->ob_type->tp_name);
}
else {
- read = PyBytes_GET_SIZE(data);
+ read = (int)PyBytes_GET_SIZE(data);
if (read > 0) {
ptr = PyBytes_AS_STRING(data);
memcpy(s, ptr, read);
@@ -659,6 +743,59 @@ r_PyLong(RFILE *p)
return NULL;
}
+/* allocate the reflist index */
+static PyObject *
+r_ref_reserve(PyObject *o, Py_ssize_t *idx, int flag, RFILE *p)
+{
+ if (flag) { /* currently only FLAG_REF is defined */
+ *idx = PyList_Size(p->refs);
+ if (*idx < 0)
+ goto err;
+ if (*idx >= 0x7ffffffe) {
+ PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
+ goto err;
+ }
+ if (PyList_Append(p->refs, Py_None) < 0)
+ goto err;
+ } else
+ *idx = 0;
+ return o;
+err:
+ Py_XDECREF(o); /* release the new object */
+ *idx = -1;
+ return NULL;
+}
+
+/* insert actual object to the reflist */
+static PyObject *
+r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
+{
+ if (o && (flag & FLAG_REF)) {
+ if (PyList_SetItem(p->refs, idx, o) < 0) {
+ Py_DECREF(o); /* release the new object */
+ return NULL;
+ } else {
+ Py_INCREF(o); /* a reference for the list */
+ }
+ }
+ return o;
+}
+
+/* combination of both above, used when an object can be
+ * created whenever it is seen in the file, as opposed to
+ * after having loaded its sub-objects.
+ */
+static PyObject *
+r_ref(PyObject *o, int flag, RFILE *p)
+{
+ if (o && (flag & FLAG_REF)) {
+ if (PyList_Append(p->refs, o) < 0) {
+ Py_DECREF(o); /* release the new object */
+ return NULL;
+ }
+ }
+ return o;
+}
static PyObject *
r_object(RFILE *p)
@@ -666,8 +803,10 @@ r_object(RFILE *p)
/* NULL is a valid return value, it does not necessarily means that
an exception is set. */
PyObject *v, *v2;
+ Py_ssize_t idx;
long i, n;
int type = r_byte(p);
+ int flag;
PyObject *retval;
p->depth++;
@@ -678,6 +817,13 @@ r_object(RFILE *p)
return NULL;
}
+ flag = type & FLAG_REF;
+ type = type & ~FLAG_REF;
+
+#define R_REF(O) do{\
+ O = r_ref(O, flag, p);\
+} while (0)
+
switch (type) {
case EOF:
@@ -718,14 +864,17 @@ r_object(RFILE *p)
case TYPE_INT:
n = r_long(p);
retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
+ R_REF(retval);
break;
case TYPE_INT64:
retval = r_long64(p);
+ R_REF(retval);
break;
case TYPE_LONG:
retval = r_PyLong(p);
+ R_REF(retval);
break;
case TYPE_FLOAT:
@@ -744,6 +893,7 @@ r_object(RFILE *p)
if (dx == -1.0 && PyErr_Occurred())
break;
retval = PyFloat_FromDouble(dx);
+ R_REF(retval);
break;
}
@@ -763,6 +913,7 @@ r_object(RFILE *p)
break;
}
retval = PyFloat_FromDouble(x);
+ R_REF(retval);
break;
}
@@ -792,6 +943,7 @@ r_object(RFILE *p)
if (c.imag == -1.0 && PyErr_Occurred())
break;
retval = PyComplex_FromCComplex(c);
+ R_REF(retval);
break;
}
@@ -822,6 +974,7 @@ r_object(RFILE *p)
break;
}
retval = PyComplex_FromCComplex(c);
+ R_REF(retval);
break;
}
@@ -849,9 +1002,11 @@ r_object(RFILE *p)
break;
}
retval = v;
+ R_REF(retval);
break;
case TYPE_UNICODE:
+ case TYPE_INTERNED:
{
char *buffer;
@@ -879,7 +1034,10 @@ r_object(RFILE *p)
}
v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
PyMem_DEL(buffer);
+ if (type == TYPE_INTERNED)
+ PyUnicode_InternInPlace(&v);
retval = v;
+ R_REF(retval);
break;
}
@@ -895,6 +1053,7 @@ r_object(RFILE *p)
break;
}
v = PyTuple_New(n);
+ R_REF(v);
if (v == NULL) {
retval = NULL;
break;
@@ -926,6 +1085,7 @@ r_object(RFILE *p)
break;
}
v = PyList_New(n);
+ R_REF(v);
if (v == NULL) {
retval = NULL;
break;
@@ -947,6 +1107,7 @@ r_object(RFILE *p)
case TYPE_DICT:
v = PyDict_New();
+ R_REF(v);
if (v == NULL) {
retval = NULL;
break;
@@ -982,6 +1143,13 @@ r_object(RFILE *p)
break;
}
v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
+ /* must use delayed registration of frozensets because they must
+ * be init with a refcount of 1
+ */
+ if (type == TYPE_SET)
+ R_REF(v);
+ else
+ v = r_ref_reserve(v, &idx, flag, p);
if (v == NULL) {
retval = NULL;
break;
@@ -1004,6 +1172,8 @@ r_object(RFILE *p)
}
Py_DECREF(v2);
}
+ if (type != TYPE_SET)
+ v = r_ref_insert(v, idx, flag, p);
retval = v;
break;
@@ -1024,6 +1194,12 @@ r_object(RFILE *p)
PyObject *name = NULL;
int firstlineno;
PyObject *lnotab = NULL;
+
+ r_ref_reserve(NULL, &idx, flag, p);
+ if (idx < 0) {
+ retval = NULL;
+ break;
+ }
v = NULL;
@@ -1090,6 +1266,7 @@ r_object(RFILE *p)
code, consts, names, varnames,
freevars, cellvars, filename, name,
firstlineno, lnotab);
+ v = r_ref_insert(v, idx, flag, p);
code_error:
Py_XDECREF(code);
@@ -1105,6 +1282,23 @@ r_object(RFILE *p)
retval = v;
break;
+ case TYPE_REF:
+ n = r_long(p);
+ if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
+ PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
+ retval = NULL;
+ break;
+ }
+ v = PyList_GET_ITEM(p->refs, n);
+ if (v == Py_None) {
+ PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
+ retval = NULL;
+ break;
+ }
+ Py_INCREF(v);
+ retval = v;
+ break;
+
default:
/* Bogus data got written, which isn't ideal.
This will let you keep working and recover. */
@@ -1210,7 +1404,11 @@ PyMarshal_ReadObjectFromFile(FILE *fp)
rf.current_filename = NULL;
rf.depth = 0;
rf.ptr = rf.end = NULL;
+ rf.refs = PyList_New(0);
+ if (rf.refs == NULL)
+ return NULL;
result = r_object(&rf);
+ Py_DECREF(rf.refs);
return result;
}
@@ -1225,7 +1423,11 @@ PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len)
rf.ptr = str;
rf.end = str + len;
rf.depth = 0;
+ rf.refs = PyList_New(0);
+ if (rf.refs == NULL)
+ return NULL;
result = r_object(&rf);
+ Py_DECREF(rf.refs);
return result;
}
@@ -1244,7 +1446,13 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
wf.error = WFERR_OK;
wf.depth = 0;
wf.version = version;
+ if (version >= 3) {
+ if ((wf.refs = PyDict_New()) == NULL)
+ return NULL;
+ } else
+ wf.refs = NULL;
w_object(x, &wf);
+ Py_XDECREF(wf.refs);
if (wf.str != NULL) {
char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);
if (wf.ptr - base > PY_SSIZE_T_MAX) {
@@ -1316,6 +1524,8 @@ marshal_load(PyObject *self, PyObject *f)
* Make a call to the read method, but read zero bytes.
* This is to ensure that the object passed in at least
* has a read method which returns bytes.
+ * This can be removed if we guarantee good error handling
+ * for r_string()
*/
data = _PyObject_CallMethodId(f, &PyId_read, "i", 0);
if (data == NULL)
@@ -1331,7 +1541,11 @@ marshal_load(PyObject *self, PyObject *f)
rf.fp = NULL;
rf.readable = f;
rf.current_filename = NULL;
- result = read_object(&rf);
+ if ((rf.refs = PyList_New(0)) != NULL) {
+ result = read_object(&rf);
+ Py_DECREF(rf.refs);
+ } else
+ result = NULL;
}
Py_DECREF(data);
return result;
@@ -1388,8 +1602,11 @@ marshal_loads(PyObject *self, PyObject *args)
rf.ptr = s;
rf.end = s + n;
rf.depth = 0;
+ if ((rf.refs = PyList_New(0)) == NULL)
+ return NULL;
result = read_object(&rf);
PyBuffer_Release(&p);
+ Py_DECREF(rf.refs);
return result;
}
@@ -1429,6 +1646,7 @@ Variables:\n\
version -- indicates the format that the module uses. Version 0 is the\n\
historical format, version 1 shares interned strings and version 2\n\
uses a binary format for floating point numbers.\n\
+ Version 3 shares common object references (New in version 3.4).\n\
\n\
Functions:\n\
\n\