diff options
author | Kristján Valur Jónsson <sweskman@gmail.com> | 2013-03-20 01:02:10 (GMT) |
---|---|---|
committer | Kristján Valur Jónsson <sweskman@gmail.com> | 2013-03-20 01:02:10 (GMT) |
commit | d7009c69136a3809282804f460902ab42e9972f6 (patch) | |
tree | 4d63e0482a090ede2c41c6a8f347418665e93072 /Python/marshal.c | |
parent | c7c42efb16612f66af266073937cffa99ad49a33 (diff) | |
download | cpython-d7009c69136a3809282804f460902ab42e9972f6.zip cpython-d7009c69136a3809282804f460902ab42e9972f6.tar.gz cpython-d7009c69136a3809282804f460902ab42e9972f6.tar.bz2 |
Issue #16475: Support object instancing, recursion and interned strings
in marshal
Diffstat (limited to 'Python/marshal.c')
-rw-r--r-- | Python/marshal.c | 272 |
1 files changed, 245 insertions, 27 deletions
diff --git a/Python/marshal.c b/Python/marshal.c index 959f3f7..c3e75b1 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -1,8 +1,10 @@ /* Write Python objects to files and read them back. - This is intended for writing and reading compiled Python code only; - a true persistent storage facility would be much harder, since - it would have to take circular links and sharing into account. */ + This is primarily intended for writing and reading compiled Python code, + even though dicts, lists, sets and frozensets, not commonly seen in + code objects, are supported. + Version 3 of this protocol properly supports circular links + and sharing. */ #define PY_SSIZE_T_CLEAN @@ -41,6 +43,8 @@ #define TYPE_BINARY_COMPLEX 'y' #define TYPE_LONG 'l' #define TYPE_STRING 's' +#define TYPE_INTERNED 't' +#define TYPE_REF 'r' #define TYPE_TUPLE '(' #define TYPE_LIST '[' #define TYPE_DICT '{' @@ -49,6 +53,7 @@ #define TYPE_UNKNOWN '?' #define TYPE_SET '<' #define TYPE_FROZENSET '>' +#define FLAG_REF '\x80' /* with a type, add obj to index */ #define WFERR_OK 0 #define WFERR_UNMARSHALLABLE 1 @@ -65,6 +70,7 @@ typedef struct { PyObject *current_filename; char *ptr; char *end; + PyObject *refs; /* dict on marshal, list on unmarshal */ int version; } WFILE; @@ -151,13 +157,17 @@ w_long(long x, WFILE *p) #endif #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT) +#define W_TYPE(t, p) do { \ + w_byte((t) | flag, (p)); \ +} while(0) + static void -w_PyLong(const PyLongObject *ob, WFILE *p) +w_PyLong(const PyLongObject *ob, char flag, WFILE *p) { Py_ssize_t i, j, n, l; digit d; - w_byte(TYPE_LONG, p); + W_TYPE(TYPE_LONG, p); if (Py_SIZE(ob) == 0) { w_long((long)0, p); return; @@ -194,10 +204,64 @@ w_PyLong(const PyLongObject *ob, WFILE *p) } while (d != 0); } +static int +w_ref(PyObject *v, char *flag, WFILE *p) +{ + PyObject *id; + PyObject *idx; + + if (p->version < 3 || p->refs == NULL) + return 0; /* not writing object references */ + + /* if it has only one reference, it definitely isn't shared */ + if (Py_REFCNT(v) == 1) + return 0; + + id = PyLong_FromVoidPtr((void*)v); + if (id == NULL) + goto err; + idx = PyDict_GetItem(p->refs, id); + if (idx != NULL) { + /* write the reference index to the stream */ + long w = PyLong_AsLong(idx); + Py_DECREF(id); + if (w == -1 && PyErr_Occurred()) { + goto err; + } + /* we don't store "long" indices in the dict */ + assert(0 <= w && w <= 0x7fffffff); + w_byte(TYPE_REF, p); + w_long(w, p); + return 1; + } else { + int ok; + Py_ssize_t s = PyDict_Size(p->refs); + /* we don't support long indices */ + if (s >= 0x7fffffff) { + PyErr_SetString(PyExc_ValueError, "too many objects"); + goto err; + } + idx = PyLong_FromSsize_t(s); + ok = idx && PyDict_SetItem(p->refs, id, idx) == 0; + Py_DECREF(id); + Py_XDECREF(idx); + if (!ok) + goto err; + *flag |= FLAG_REF; + return 0; + } +err: + p->error = WFERR_UNMARSHALLABLE; + return 1; +} + +static void +w_complex_object(PyObject *v, char flag, WFILE *p); + static void w_object(PyObject *v, WFILE *p) { - Py_ssize_t i, n; + char flag = '\0'; p->depth++; @@ -222,24 +286,35 @@ w_object(PyObject *v, WFILE *p) else if (v == Py_True) { w_byte(TYPE_TRUE, p); } - else if (PyLong_CheckExact(v)) { + else if (!w_ref(v, &flag, p)) + w_complex_object(v, flag, p); + + p->depth--; +} + +static void +w_complex_object(PyObject *v, char flag, WFILE *p) +{ + Py_ssize_t i, n; + + if (PyLong_CheckExact(v)) { long x = PyLong_AsLong(v); if ((x == -1) && PyErr_Occurred()) { PyLongObject *ob = (PyLongObject *)v; PyErr_Clear(); - w_PyLong(ob, p); + w_PyLong(ob, flag, p); } else { #if SIZEOF_LONG > 4 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31); if (y && y != -1) { /* Too large for TYPE_INT */ - w_PyLong((PyLongObject*)v, p); + w_PyLong((PyLongObject*)v, flag, p); } else #endif { - w_byte(TYPE_INT, p); + W_TYPE(TYPE_INT, p); w_long(x, p); } } @@ -252,7 +327,7 @@ w_object(PyObject *v, WFILE *p) p->error = WFERR_UNMARSHALLABLE; return; } - w_byte(TYPE_BINARY_FLOAT, p); + W_TYPE(TYPE_BINARY_FLOAT, p); w_string((char*)buf, 8, p); } else { @@ -263,7 +338,7 @@ w_object(PyObject *v, WFILE *p) return; } n = strlen(buf); - w_byte(TYPE_FLOAT, p); + W_TYPE(TYPE_FLOAT, p); w_byte((int)n, p); w_string(buf, n, p); PyMem_Free(buf); @@ -277,7 +352,7 @@ w_object(PyObject *v, WFILE *p) p->error = WFERR_UNMARSHALLABLE; return; } - w_byte(TYPE_BINARY_COMPLEX, p); + W_TYPE(TYPE_BINARY_COMPLEX, p); w_string((char*)buf, 8, p); if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v), buf, 1) < 0) { @@ -288,7 +363,7 @@ w_object(PyObject *v, WFILE *p) } else { char *buf; - w_byte(TYPE_COMPLEX, p); + W_TYPE(TYPE_COMPLEX, p); buf = PyOS_double_to_string(PyComplex_RealAsDouble(v), 'g', 17, 0, NULL); if (!buf) { @@ -312,7 +387,7 @@ w_object(PyObject *v, WFILE *p) } } else if (PyBytes_CheckExact(v)) { - w_byte(TYPE_STRING, p); + W_TYPE(TYPE_STRING, p); n = PyBytes_GET_SIZE(v); W_SIZE(n, p); w_string(PyBytes_AS_STRING(v), n, p); @@ -325,14 +400,17 @@ w_object(PyObject *v, WFILE *p) p->error = WFERR_UNMARSHALLABLE; return; } - w_byte(TYPE_UNICODE, p); + if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v)) + W_TYPE(TYPE_INTERNED, p); + else + W_TYPE(TYPE_UNICODE, p); n = PyBytes_GET_SIZE(utf8); W_SIZE(n, p); w_string(PyBytes_AS_STRING(utf8), n, p); Py_DECREF(utf8); } else if (PyTuple_CheckExact(v)) { - w_byte(TYPE_TUPLE, p); + W_TYPE(TYPE_TUPLE, p); n = PyTuple_Size(v); W_SIZE(n, p); for (i = 0; i < n; i++) { @@ -340,7 +418,7 @@ w_object(PyObject *v, WFILE *p) } } else if (PyList_CheckExact(v)) { - w_byte(TYPE_LIST, p); + W_TYPE(TYPE_LIST, p); n = PyList_GET_SIZE(v); W_SIZE(n, p); for (i = 0; i < n; i++) { @@ -350,7 +428,7 @@ w_object(PyObject *v, WFILE *p) else if (PyDict_CheckExact(v)) { Py_ssize_t pos; PyObject *key, *value; - w_byte(TYPE_DICT, p); + W_TYPE(TYPE_DICT, p); /* This one is NULL object terminated! */ pos = 0; while (PyDict_Next(v, &pos, &key, &value)) { @@ -363,9 +441,9 @@ w_object(PyObject *v, WFILE *p) PyObject *value, *it; if (PyObject_TypeCheck(v, &PySet_Type)) - w_byte(TYPE_SET, p); + W_TYPE(TYPE_SET, p); else - w_byte(TYPE_FROZENSET, p); + W_TYPE(TYPE_FROZENSET, p); n = PyObject_Size(v); if (n == -1) { p->depth--; @@ -392,7 +470,7 @@ w_object(PyObject *v, WFILE *p) } else if (PyCode_Check(v)) { PyCodeObject *co = (PyCodeObject *)v; - w_byte(TYPE_CODE, p); + W_TYPE(TYPE_CODE, p); w_long(co->co_argcount, p); w_long(co->co_kwonlyargcount, p); w_long(co->co_nlocals, p); @@ -419,7 +497,7 @@ w_object(PyObject *v, WFILE *p) p->error = WFERR_UNMARSHALLABLE; return; } - w_byte(TYPE_STRING, p); + W_TYPE(TYPE_STRING, p); n = view.len; s = view.buf; W_SIZE(n, p); @@ -427,10 +505,9 @@ w_object(PyObject *v, WFILE *p) PyBuffer_Release(&view); } else { - w_byte(TYPE_UNKNOWN, p); + W_TYPE(TYPE_UNKNOWN, p); p->error = WFERR_UNMARSHALLABLE; } - p->depth--; } /* version currently has no effect for writing longs. */ @@ -441,6 +518,7 @@ PyMarshal_WriteLongToFile(long x, FILE *fp, int version) wf.fp = fp; wf.error = WFERR_OK; wf.depth = 0; + wf.refs = NULL; wf.version = version; w_long(x, &wf); } @@ -452,8 +530,14 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version) wf.fp = fp; wf.error = WFERR_OK; wf.depth = 0; + if (version >= 3) { + if ((wf.refs = PyDict_New()) == NULL) + return; /* caller mush check PyErr_Occurred() */ + } else + wf.refs = NULL; wf.version = version; w_object(x, &wf); + Py_XDECREF(wf.refs); } typedef WFILE RFILE; /* Same struct with different invariants */ @@ -489,7 +573,7 @@ r_string(char *s, Py_ssize_t n, RFILE *p) data->ob_type->tp_name); } else { - read = PyBytes_GET_SIZE(data); + read = (int)PyBytes_GET_SIZE(data); if (read > 0) { ptr = PyBytes_AS_STRING(data); memcpy(s, ptr, read); @@ -659,6 +743,59 @@ r_PyLong(RFILE *p) return NULL; } +/* allocate the reflist index */ +static PyObject * +r_ref_reserve(PyObject *o, Py_ssize_t *idx, int flag, RFILE *p) +{ + if (flag) { /* currently only FLAG_REF is defined */ + *idx = PyList_Size(p->refs); + if (*idx < 0) + goto err; + if (*idx >= 0x7ffffffe) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)"); + goto err; + } + if (PyList_Append(p->refs, Py_None) < 0) + goto err; + } else + *idx = 0; + return o; +err: + Py_XDECREF(o); /* release the new object */ + *idx = -1; + return NULL; +} + +/* insert actual object to the reflist */ +static PyObject * +r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p) +{ + if (o && (flag & FLAG_REF)) { + if (PyList_SetItem(p->refs, idx, o) < 0) { + Py_DECREF(o); /* release the new object */ + return NULL; + } else { + Py_INCREF(o); /* a reference for the list */ + } + } + return o; +} + +/* combination of both above, used when an object can be + * created whenever it is seen in the file, as opposed to + * after having loaded its sub-objects. + */ +static PyObject * +r_ref(PyObject *o, int flag, RFILE *p) +{ + if (o && (flag & FLAG_REF)) { + if (PyList_Append(p->refs, o) < 0) { + Py_DECREF(o); /* release the new object */ + return NULL; + } + } + return o; +} static PyObject * r_object(RFILE *p) @@ -666,8 +803,10 @@ r_object(RFILE *p) /* NULL is a valid return value, it does not necessarily means that an exception is set. */ PyObject *v, *v2; + Py_ssize_t idx; long i, n; int type = r_byte(p); + int flag; PyObject *retval; p->depth++; @@ -678,6 +817,13 @@ r_object(RFILE *p) return NULL; } + flag = type & FLAG_REF; + type = type & ~FLAG_REF; + +#define R_REF(O) do{\ + O = r_ref(O, flag, p);\ +} while (0) + switch (type) { case EOF: @@ -718,14 +864,17 @@ r_object(RFILE *p) case TYPE_INT: n = r_long(p); retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n); + R_REF(retval); break; case TYPE_INT64: retval = r_long64(p); + R_REF(retval); break; case TYPE_LONG: retval = r_PyLong(p); + R_REF(retval); break; case TYPE_FLOAT: @@ -744,6 +893,7 @@ r_object(RFILE *p) if (dx == -1.0 && PyErr_Occurred()) break; retval = PyFloat_FromDouble(dx); + R_REF(retval); break; } @@ -763,6 +913,7 @@ r_object(RFILE *p) break; } retval = PyFloat_FromDouble(x); + R_REF(retval); break; } @@ -792,6 +943,7 @@ r_object(RFILE *p) if (c.imag == -1.0 && PyErr_Occurred()) break; retval = PyComplex_FromCComplex(c); + R_REF(retval); break; } @@ -822,6 +974,7 @@ r_object(RFILE *p) break; } retval = PyComplex_FromCComplex(c); + R_REF(retval); break; } @@ -849,9 +1002,11 @@ r_object(RFILE *p) break; } retval = v; + R_REF(retval); break; case TYPE_UNICODE: + case TYPE_INTERNED: { char *buffer; @@ -879,7 +1034,10 @@ r_object(RFILE *p) } v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass"); PyMem_DEL(buffer); + if (type == TYPE_INTERNED) + PyUnicode_InternInPlace(&v); retval = v; + R_REF(retval); break; } @@ -895,6 +1053,7 @@ r_object(RFILE *p) break; } v = PyTuple_New(n); + R_REF(v); if (v == NULL) { retval = NULL; break; @@ -926,6 +1085,7 @@ r_object(RFILE *p) break; } v = PyList_New(n); + R_REF(v); if (v == NULL) { retval = NULL; break; @@ -947,6 +1107,7 @@ r_object(RFILE *p) case TYPE_DICT: v = PyDict_New(); + R_REF(v); if (v == NULL) { retval = NULL; break; @@ -982,6 +1143,13 @@ r_object(RFILE *p) break; } v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL); + /* must use delayed registration of frozensets because they must + * be init with a refcount of 1 + */ + if (type == TYPE_SET) + R_REF(v); + else + v = r_ref_reserve(v, &idx, flag, p); if (v == NULL) { retval = NULL; break; @@ -1004,6 +1172,8 @@ r_object(RFILE *p) } Py_DECREF(v2); } + if (type != TYPE_SET) + v = r_ref_insert(v, idx, flag, p); retval = v; break; @@ -1024,6 +1194,12 @@ r_object(RFILE *p) PyObject *name = NULL; int firstlineno; PyObject *lnotab = NULL; + + r_ref_reserve(NULL, &idx, flag, p); + if (idx < 0) { + retval = NULL; + break; + } v = NULL; @@ -1090,6 +1266,7 @@ r_object(RFILE *p) code, consts, names, varnames, freevars, cellvars, filename, name, firstlineno, lnotab); + v = r_ref_insert(v, idx, flag, p); code_error: Py_XDECREF(code); @@ -1105,6 +1282,23 @@ r_object(RFILE *p) retval = v; break; + case TYPE_REF: + n = r_long(p); + if (n < 0 || n >= PyList_GET_SIZE(p->refs)) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); + retval = NULL; + break; + } + v = PyList_GET_ITEM(p->refs, n); + if (v == Py_None) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); + retval = NULL; + break; + } + Py_INCREF(v); + retval = v; + break; + default: /* Bogus data got written, which isn't ideal. This will let you keep working and recover. */ @@ -1210,7 +1404,11 @@ PyMarshal_ReadObjectFromFile(FILE *fp) rf.current_filename = NULL; rf.depth = 0; rf.ptr = rf.end = NULL; + rf.refs = PyList_New(0); + if (rf.refs == NULL) + return NULL; result = r_object(&rf); + Py_DECREF(rf.refs); return result; } @@ -1225,7 +1423,11 @@ PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len) rf.ptr = str; rf.end = str + len; rf.depth = 0; + rf.refs = PyList_New(0); + if (rf.refs == NULL) + return NULL; result = r_object(&rf); + Py_DECREF(rf.refs); return result; } @@ -1244,7 +1446,13 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) wf.error = WFERR_OK; wf.depth = 0; wf.version = version; + if (version >= 3) { + if ((wf.refs = PyDict_New()) == NULL) + return NULL; + } else + wf.refs = NULL; w_object(x, &wf); + Py_XDECREF(wf.refs); if (wf.str != NULL) { char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str); if (wf.ptr - base > PY_SSIZE_T_MAX) { @@ -1316,6 +1524,8 @@ marshal_load(PyObject *self, PyObject *f) * Make a call to the read method, but read zero bytes. * This is to ensure that the object passed in at least * has a read method which returns bytes. + * This can be removed if we guarantee good error handling + * for r_string() */ data = _PyObject_CallMethodId(f, &PyId_read, "i", 0); if (data == NULL) @@ -1331,7 +1541,11 @@ marshal_load(PyObject *self, PyObject *f) rf.fp = NULL; rf.readable = f; rf.current_filename = NULL; - result = read_object(&rf); + if ((rf.refs = PyList_New(0)) != NULL) { + result = read_object(&rf); + Py_DECREF(rf.refs); + } else + result = NULL; } Py_DECREF(data); return result; @@ -1388,8 +1602,11 @@ marshal_loads(PyObject *self, PyObject *args) rf.ptr = s; rf.end = s + n; rf.depth = 0; + if ((rf.refs = PyList_New(0)) == NULL) + return NULL; result = read_object(&rf); PyBuffer_Release(&p); + Py_DECREF(rf.refs); return result; } @@ -1429,6 +1646,7 @@ Variables:\n\ version -- indicates the format that the module uses. Version 0 is the\n\ historical format, version 1 shares interned strings and version 2\n\ uses a binary format for floating point numbers.\n\ + Version 3 shares common object references (New in version 3.4).\n\ \n\ Functions:\n\ \n\ |