diff options
Diffstat (limited to 'Python/marshal.c')
| -rw-r--r-- | Python/marshal.c | 799 | 
1 files changed, 546 insertions, 253 deletions
| diff --git a/Python/marshal.c b/Python/marshal.c index cc17329..dc5411c 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -1,8 +1,10 @@  /* Write Python objects to files and read them back. -   This is intended for writing and reading compiled Python code only; -   a true persistent storage facility would be much harder, since -   it would have to take circular links and sharing into account. */ +   This is primarily intended for writing and reading compiled Python code, +   even though dicts, lists, sets and frozensets, not commonly seen in +   code objects, are supported. +   Version 3 of this protocol properly supports circular links +   and sharing. */  #define PY_SSIZE_T_CLEAN @@ -31,16 +33,14 @@  #define TYPE_STOPITER           'S'  #define TYPE_ELLIPSIS           '.'  #define TYPE_INT                'i' -/* TYPE_INT64 is deprecated. It is not -   generated anymore, and support for reading it -   will be removed in Python 3.4. */ -#define TYPE_INT64              'I'  #define TYPE_FLOAT              'f'  #define TYPE_BINARY_FLOAT       'g'  #define TYPE_COMPLEX            'x'  #define TYPE_BINARY_COMPLEX     'y'  #define TYPE_LONG               'l'  #define TYPE_STRING             's' +#define TYPE_INTERNED           't' +#define TYPE_REF                'r'  #define TYPE_TUPLE              '('  #define TYPE_LIST               '['  #define TYPE_DICT               '{' @@ -49,6 +49,13 @@  #define TYPE_UNKNOWN            '?'  #define TYPE_SET                '<'  #define TYPE_FROZENSET          '>' +#define FLAG_REF                '\x80' /* with a type, add obj to index */ + +#define TYPE_ASCII              'a' +#define TYPE_ASCII_INTERNED     'A' +#define TYPE_SMALL_TUPLE        ')' +#define TYPE_SHORT_ASCII        'z' +#define TYPE_SHORT_ASCII_INTERNED 'Z'  #define WFERR_OK 0  #define WFERR_UNMARSHALLABLE 1 @@ -65,15 +72,18 @@ typedef struct {      PyObject *current_filename;      char *ptr;      char *end; +    char *buf; +    Py_ssize_t buf_size; +    PyObject *refs; /* dict on marshal, list on unmarshal */      int version;  } WFILE;  #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \                        else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \ -                           else w_more(c, p) +                           else w_more((c), p)  static void -w_more(int c, WFILE *p) +w_more(char c, WFILE *p)  {      Py_ssize_t size, newsize;      if (p->str == NULL) @@ -90,7 +100,7 @@ w_more(int c, WFILE *p)          p->ptr = PyBytes_AS_STRING((PyBytesObject *)p->str) + size;          p->end =              PyBytes_AS_STRING((PyBytesObject *)p->str) + newsize; -        *p->ptr++ = Py_SAFE_DOWNCAST(c, int, char); +        *p->ptr++ = c;      }  } @@ -146,6 +156,13 @@ w_pstring(const char *s, Py_ssize_t n, WFILE *p)          w_string(s, n, p);  } +static void +w_short_pstring(const char *s, Py_ssize_t n, WFILE *p) +{ +    w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p); +    w_string(s, n, p); +} +  /* We assume that Python ints are stored internally in base some power of     2**15; for the sake of portability we'll always read and write them in base     exactly 2**15. */ @@ -158,13 +175,17 @@ w_pstring(const char *s, Py_ssize_t n, WFILE *p)  #endif  #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT) +#define W_TYPE(t, p) do { \ +    w_byte((t) | flag, (p)); \ +} while(0) +  static void -w_PyLong(const PyLongObject *ob, WFILE *p) +w_PyLong(const PyLongObject *ob, char flag, WFILE *p)  {      Py_ssize_t i, j, n, l;      digit d; -    w_byte(TYPE_LONG, p); +    W_TYPE(TYPE_LONG, p);      if (Py_SIZE(ob) == 0) {          w_long((long)0, p);          return; @@ -201,10 +222,64 @@ w_PyLong(const PyLongObject *ob, WFILE *p)      } while (d != 0);  } +static int +w_ref(PyObject *v, char *flag, WFILE *p) +{ +    PyObject *id; +    PyObject *idx; + +    if (p->version < 3 || p->refs == NULL) +        return 0; /* not writing object references */ + +    /* if it has only one reference, it definitely isn't shared */ +    if (Py_REFCNT(v) == 1) +        return 0; + +    id = PyLong_FromVoidPtr((void*)v); +    if (id == NULL) +        goto err; +    idx = PyDict_GetItem(p->refs, id); +    if (idx != NULL) { +        /* write the reference index to the stream */ +        long w = PyLong_AsLong(idx); +        Py_DECREF(id); +        if (w == -1 && PyErr_Occurred()) { +            goto err; +        } +        /* we don't store "long" indices in the dict */ +        assert(0 <= w && w <= 0x7fffffff); +        w_byte(TYPE_REF, p); +        w_long(w, p); +        return 1; +    } else { +        int ok; +        Py_ssize_t s = PyDict_Size(p->refs); +        /* we don't support long indices */ +        if (s >= 0x7fffffff) { +            PyErr_SetString(PyExc_ValueError, "too many objects"); +            goto err; +        } +        idx = PyLong_FromSsize_t(s); +        ok = idx && PyDict_SetItem(p->refs, id, idx) == 0; +        Py_DECREF(id); +        Py_XDECREF(idx); +        if (!ok) +            goto err; +        *flag |= FLAG_REF; +        return 0; +    } +err: +    p->error = WFERR_UNMARSHALLABLE; +    return 1; +} + +static void +w_complex_object(PyObject *v, char flag, WFILE *p); +  static void  w_object(PyObject *v, WFILE *p)  { -    Py_ssize_t i, n; +    char flag = '\0';      p->depth++; @@ -229,24 +304,35 @@ w_object(PyObject *v, WFILE *p)      else if (v == Py_True) {          w_byte(TYPE_TRUE, p);      } -    else if (PyLong_CheckExact(v)) { +    else if (!w_ref(v, &flag, p)) +        w_complex_object(v, flag, p); + +    p->depth--; +} + +static void +w_complex_object(PyObject *v, char flag, WFILE *p) +{ +    Py_ssize_t i, n; + +    if (PyLong_CheckExact(v)) {          long x = PyLong_AsLong(v);          if ((x == -1)  && PyErr_Occurred()) {              PyLongObject *ob = (PyLongObject *)v;              PyErr_Clear(); -            w_PyLong(ob, p); +            w_PyLong(ob, flag, p);          }          else {  #if SIZEOF_LONG > 4              long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);              if (y && y != -1) {                  /* Too large for TYPE_INT */ -                w_PyLong((PyLongObject*)v, p); +                w_PyLong((PyLongObject*)v, flag, p);              }              else  #endif              { -                w_byte(TYPE_INT, p); +                W_TYPE(TYPE_INT, p);                  w_long(x, p);              }          } @@ -259,7 +345,7 @@ w_object(PyObject *v, WFILE *p)                  p->error = WFERR_UNMARSHALLABLE;                  return;              } -            w_byte(TYPE_BINARY_FLOAT, p); +            W_TYPE(TYPE_BINARY_FLOAT, p);              w_string((char*)buf, 8, p);          }          else { @@ -270,7 +356,7 @@ w_object(PyObject *v, WFILE *p)                  return;              }              n = strlen(buf); -            w_byte(TYPE_FLOAT, p); +            W_TYPE(TYPE_FLOAT, p);              w_byte((int)n, p);              w_string(buf, n, p);              PyMem_Free(buf); @@ -284,7 +370,7 @@ w_object(PyObject *v, WFILE *p)                  p->error = WFERR_UNMARSHALLABLE;                  return;              } -            w_byte(TYPE_BINARY_COMPLEX, p); +            W_TYPE(TYPE_BINARY_COMPLEX, p);              w_string((char*)buf, 8, p);              if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),                                 buf, 1) < 0) { @@ -295,7 +381,7 @@ w_object(PyObject *v, WFILE *p)          }          else {              char *buf; -            w_byte(TYPE_COMPLEX, p); +            W_TYPE(TYPE_COMPLEX, p);              buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),                                          'g', 17, 0, NULL);              if (!buf) { @@ -319,31 +405,61 @@ w_object(PyObject *v, WFILE *p)          }      }      else if (PyBytes_CheckExact(v)) { -        w_byte(TYPE_STRING, p); +        W_TYPE(TYPE_STRING, p);          w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);      }      else if (PyUnicode_CheckExact(v)) { -        PyObject *utf8; -        utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass"); -        if (utf8 == NULL) { -            p->depth--; -            p->error = WFERR_UNMARSHALLABLE; -            return; +        if (p->version >= 4 && PyUnicode_IS_ASCII(v)) { +            int is_short = PyUnicode_GET_LENGTH(v) < 256; +            if (is_short) { +                if (PyUnicode_CHECK_INTERNED(v)) +                    W_TYPE(TYPE_SHORT_ASCII_INTERNED, p); +                else +                    W_TYPE(TYPE_SHORT_ASCII, p); +                w_short_pstring((char *) PyUnicode_1BYTE_DATA(v), +                                PyUnicode_GET_LENGTH(v), p); +            } +            else { +                if (PyUnicode_CHECK_INTERNED(v)) +                    W_TYPE(TYPE_ASCII_INTERNED, p); +                else +                    W_TYPE(TYPE_ASCII, p); +                w_pstring((char *) PyUnicode_1BYTE_DATA(v), +                          PyUnicode_GET_LENGTH(v), p); +            } +        } +        else { +            PyObject *utf8; +            utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass"); +            if (utf8 == NULL) { +                p->depth--; +                p->error = WFERR_UNMARSHALLABLE; +                return; +            } +            if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v)) +                W_TYPE(TYPE_INTERNED, p); +            else +                W_TYPE(TYPE_UNICODE, p); +            w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p); +            Py_DECREF(utf8);          } -        w_byte(TYPE_UNICODE, p); -        w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p); -        Py_DECREF(utf8);      }      else if (PyTuple_CheckExact(v)) { -        w_byte(TYPE_TUPLE, p);          n = PyTuple_Size(v); -        W_SIZE(n, p); +        if (p->version >= 4 && n < 256) { +            W_TYPE(TYPE_SMALL_TUPLE, p); +            w_byte((unsigned char)n, p); +        } +        else { +            W_TYPE(TYPE_TUPLE, p); +            W_SIZE(n, p); +        }          for (i = 0; i < n; i++) {              w_object(PyTuple_GET_ITEM(v, i), p);          }      }      else if (PyList_CheckExact(v)) { -        w_byte(TYPE_LIST, p); +        W_TYPE(TYPE_LIST, p);          n = PyList_GET_SIZE(v);          W_SIZE(n, p);          for (i = 0; i < n; i++) { @@ -353,7 +469,7 @@ w_object(PyObject *v, WFILE *p)      else if (PyDict_CheckExact(v)) {          Py_ssize_t pos;          PyObject *key, *value; -        w_byte(TYPE_DICT, p); +        W_TYPE(TYPE_DICT, p);          /* This one is NULL object terminated! */          pos = 0;          while (PyDict_Next(v, &pos, &key, &value)) { @@ -366,9 +482,9 @@ w_object(PyObject *v, WFILE *p)          PyObject *value, *it;          if (PyObject_TypeCheck(v, &PySet_Type)) -            w_byte(TYPE_SET, p); +            W_TYPE(TYPE_SET, p);          else -            w_byte(TYPE_FROZENSET, p); +            W_TYPE(TYPE_FROZENSET, p);          n = PyObject_Size(v);          if (n == -1) {              p->depth--; @@ -395,7 +511,7 @@ w_object(PyObject *v, WFILE *p)      }      else if (PyCode_Check(v)) {          PyCodeObject *co = (PyCodeObject *)v; -        w_byte(TYPE_CODE, p); +        W_TYPE(TYPE_CODE, p);          w_long(co->co_argcount, p);          w_long(co->co_kwonlyargcount, p);          w_long(co->co_nlocals, p); @@ -421,15 +537,14 @@ w_object(PyObject *v, WFILE *p)              p->error = WFERR_UNMARSHALLABLE;              return;          } -        w_byte(TYPE_STRING, p); +        W_TYPE(TYPE_STRING, p);          w_pstring(view.buf, view.len, p);          PyBuffer_Release(&view);      }      else { -        w_byte(TYPE_UNKNOWN, p); +        W_TYPE(TYPE_UNKNOWN, p);          p->error = WFERR_UNMARSHALLABLE;      } -    p->depth--;  }  /* version currently has no effect for writing ints. */ @@ -440,6 +555,7 @@ PyMarshal_WriteLongToFile(long x, FILE *fp, int version)      wf.fp = fp;      wf.error = WFERR_OK;      wf.depth = 0; +    wf.refs = NULL;      wf.version = version;      w_long(x, &wf);  } @@ -451,81 +567,107 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)      wf.fp = fp;      wf.error = WFERR_OK;      wf.depth = 0; +    if (version >= 3) { +        if ((wf.refs = PyDict_New()) == NULL) +            return; /* caller mush check PyErr_Occurred() */ +    } else +        wf.refs = NULL;      wf.version = version;      w_object(x, &wf); +    Py_XDECREF(wf.refs);  }  typedef WFILE RFILE; /* Same struct with different invariants */ -#define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF) - -static Py_ssize_t -r_string(char *s, Py_ssize_t n, RFILE *p) +static char * +r_string(Py_ssize_t n, RFILE *p)  { -    char *ptr; -    Py_ssize_t read, left; +    Py_ssize_t read = -1; + +    if (p->ptr != NULL) { +        /* Fast path for loads() */ +        char *res = p->ptr; +        Py_ssize_t left = p->end - p->ptr; +        if (left < n) { +            PyErr_SetString(PyExc_EOFError, +                            "marshal data too short"); +            return NULL; +        } +        p->ptr += n; +        return res; +    } +    if (p->buf == NULL) { +        p->buf = PyMem_MALLOC(n); +        if (p->buf == NULL) { +            PyErr_NoMemory(); +            return NULL; +        } +        p->buf_size = n; +    } +    else if (p->buf_size < n) { +        p->buf = PyMem_REALLOC(p->buf, n); +        if (p->buf == NULL) { +            PyErr_NoMemory(); +            return NULL; +        } +        p->buf_size = n; +    }      if (!p->readable) { -        if (p->fp != NULL) -            /* The result fits into int because it must be <=n. */ -            read = fread(s, 1, n, p->fp); -        else { -            left = p->end - p->ptr; -            read = (left < n) ? left : n; -            memcpy(s, p->ptr, read); -            p->ptr += read; -        } +        assert(p->fp != NULL); +        read = fread(p->buf, 1, n, p->fp);      }      else { -        _Py_IDENTIFIER(read); - -        PyObject *data = _PyObject_CallMethodId(p->readable, &PyId_read, "n", n); -        read = 0; -        if (data != NULL) { -            if (!PyBytes_Check(data)) { -                PyErr_Format(PyExc_TypeError, -                             "f.read() returned not bytes but %.100s", -                             data->ob_type->tp_name); -            } -            else { -                read = PyBytes_GET_SIZE(data); -                if (read > 0) { -                    if (read > n) { -                        PyErr_Format(PyExc_ValueError, -                                    "read() returned too much data: " -                                    "%zd bytes requested, %zd returned", -                                    n, read); -                        read = -1; -                    } -                    else { -                        ptr = PyBytes_AS_STRING(data); -                        memcpy(s, ptr, read); -                    } -                } -            } -            Py_DECREF(data); +        _Py_IDENTIFIER(readinto); +        PyObject *res, *mview; +        Py_buffer buf; + +        if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1) +            return NULL; +        mview = PyMemoryView_FromBuffer(&buf); +        if (mview == NULL) +            return NULL; + +        res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview); +        if (res != NULL) { +            read = PyNumber_AsSsize_t(res, PyExc_ValueError); +            Py_DECREF(res);          }      } -    if (!PyErr_Occurred() && (read < n)) { -        PyErr_SetString(PyExc_EOFError, "EOF read where not expected"); +    if (read != n) { +        if (!PyErr_Occurred()) { +            if (read > n) +                PyErr_Format(PyExc_ValueError, +                             "read() returned too much data: " +                             "%zd bytes requested, %zd returned", +                             n, read); +            else +                PyErr_SetString(PyExc_EOFError, +                                "EOF read where not expected"); +        } +        return NULL;      } -    return read; +    return p->buf;  } -  static int  r_byte(RFILE *p)  {      int c = EOF; -    unsigned char ch; -    Py_ssize_t n; -    if (!p->readable) -        c = p->fp ? getc(p->fp) : rs_byte(p); +    if (p->ptr != NULL) { +        if (p->ptr < p->end) +            c = (unsigned char) *p->ptr++; +        return c; +    } +    if (!p->readable) { +        assert(p->fp); +        c = getc(p->fp); +    }      else { -        n = r_string((char *) &ch, 1, p); -        if (n > 0) -            c = ch; +        char *ptr = r_string(1, p); +        if (ptr != NULL) +            c = *(unsigned char *) ptr;      }      return c;  } @@ -533,69 +675,37 @@ r_byte(RFILE *p)  static int  r_short(RFILE *p)  { -    register short x; -    unsigned char buffer[2]; - -    r_string((char *) buffer, 2, p); -    x = buffer[0]; -    x |= buffer[1] << 8; -    /* Sign-extension, in case short greater than 16 bits */ -    x |= -(x & 0x8000); +    short x = -1; +    unsigned char *buffer; + +    buffer = (unsigned char *) r_string(2, p); +    if (buffer != NULL) { +        x = buffer[0]; +        x |= buffer[1] << 8; +        /* Sign-extension, in case short greater than 16 bits */ +        x |= -(x & 0x8000); +    }      return x;  }  static long  r_long(RFILE *p)  { -    register long x; -    unsigned char buffer[4]; - -    r_string((char *) buffer, 4, p); -    x = buffer[0]; -    x |= (long)buffer[1] << 8; -    x |= (long)buffer[2] << 16; -    x |= (long)buffer[3] << 24; +    long x = -1; +    unsigned char *buffer; + +    buffer = (unsigned char *) r_string(4, p); +    if (buffer != NULL) { +        x = buffer[0]; +        x |= (long)buffer[1] << 8; +        x |= (long)buffer[2] << 16; +        x |= (long)buffer[3] << 24;  #if SIZEOF_LONG > 4 -    /* Sign extension for 64-bit machines */ -    x |= -(x & 0x80000000L); -#endif -    return x; -} - -/* r_long64 deals with the TYPE_INT64 code.  On a machine with -   sizeof(long) > 4, it returns a Python int object, else a Python long -   object.  Note that w_long64 writes out TYPE_INT if 32 bits is enough, -   so there's no inefficiency here in returning a PyLong on 32-bit boxes -   for everything written via TYPE_INT64 (i.e., if an int is written via -   TYPE_INT64, it *needs* more than 32 bits). -*/ -static PyObject * -r_long64(RFILE *p) -{ -    PyObject *result = NULL; -    long lo4 = r_long(p); -    long hi4 = r_long(p); - -    if (!PyErr_Occurred()) { -#if SIZEOF_LONG > 4 -        long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL); -        result = PyLong_FromLong(x); -#else -        unsigned char buf[8]; -        int one = 1; -        int is_little_endian = (int)*(char*)&one; -        if (is_little_endian) { -            memcpy(buf, &lo4, 4); -            memcpy(buf+4, &hi4, 4); -        } -        else { -            memcpy(buf, &hi4, 4); -            memcpy(buf+4, &lo4, 4); -        } -        result = _PyLong_FromByteArray(buf, 8, is_little_endian, 1); +        /* Sign extension for 64-bit machines */ +        x |= -(x & 0x80000000L);  #endif      } -    return result; +    return x;  }  static PyObject * @@ -622,25 +732,31 @@ r_PyLong(RFILE *p)      ob = _PyLong_New(size);      if (ob == NULL)          return NULL; +      Py_SIZE(ob) = n > 0 ? size : -size;      for (i = 0; i < size-1; i++) {          d = 0;          for (j=0; j < PyLong_MARSHAL_RATIO; j++) {              md = r_short(p); -            if (PyErr_Occurred()) -                break; +            if (PyErr_Occurred()) { +                Py_DECREF(ob); +                return NULL; +            }              if (md < 0 || md > PyLong_MARSHAL_BASE)                  goto bad_digit;              d += (digit)md << j*PyLong_MARSHAL_SHIFT;          }          ob->ob_digit[i] = d;      } +      d = 0;      for (j=0; j < shorts_in_top_digit; j++) {          md = r_short(p); -        if (PyErr_Occurred()) -            break; +        if (PyErr_Occurred()) { +            Py_DECREF(ob); +            return NULL; +        }          if (md < 0 || md > PyLong_MARSHAL_BASE)              goto bad_digit;          /* topmost marshal digit should be nonzero */ @@ -667,6 +783,59 @@ r_PyLong(RFILE *p)      return NULL;  } +/* allocate the reflist index for a new object. Return -1 on failure */ +static Py_ssize_t +r_ref_reserve(int flag, RFILE *p) +{ +    if (flag) { /* currently only FLAG_REF is defined */ +        Py_ssize_t idx = PyList_GET_SIZE(p->refs); +        if (idx >= 0x7ffffffe) { +            PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)"); +            return -1; +        } +        if (PyList_Append(p->refs, Py_None) < 0) +            return -1; +        return idx; +    } else +        return 0; +} + +/* insert the new object 'o' to the reflist at previously + * allocated index 'idx'. + * 'o' can be NULL, in which case nothing is done. + * if 'o' was non-NULL, and the function succeeds, 'o' is returned. + * if 'o' was non-NULL, and the function fails, 'o' is released and + * NULL returned. This simplifies error checking at the call site since + * a single test for NULL for the function result is enough. + */ +static PyObject * +r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p) +{ +    if (o != NULL && flag) { /* currently only FLAG_REF is defined */ +        PyObject *tmp = PyList_GET_ITEM(p->refs, idx); +        Py_INCREF(o); +        PyList_SET_ITEM(p->refs, idx, o); +        Py_DECREF(tmp); +    } +    return o; +} + +/* combination of both above, used when an object can be + * created whenever it is seen in the file, as opposed to + * after having loaded its sub-objects. + */ +static PyObject * +r_ref(PyObject *o, int flag, RFILE *p) +{ +    assert(flag & FLAG_REF); +    if (o == NULL) +        return NULL; +    if (PyList_Append(p->refs, o) < 0) { +        Py_DECREF(o); /* release the new object */ +        return NULL; +    } +    return o; +}  static PyObject *  r_object(RFILE *p) @@ -674,9 +843,17 @@ r_object(RFILE *p)      /* NULL is a valid return value, it does not necessarily means that         an exception is set. */      PyObject *v, *v2; +    Py_ssize_t idx = 0;      long i, n; -    int type = r_byte(p); -    PyObject *retval; +    int type, code = r_byte(p); +    int flag, is_interned = 0; +    PyObject *retval = NULL; + +    if (code == EOF) { +        PyErr_SetString(PyExc_EOFError, +                        "EOF read where object expected"); +        return NULL; +    }      p->depth++; @@ -686,16 +863,17 @@ r_object(RFILE *p)          return NULL;      } -    switch (type) { +    flag = code & FLAG_REF; +    type = code & ~FLAG_REF; -    case EOF: -        PyErr_SetString(PyExc_EOFError, -                        "EOF read where object expected"); -        retval = NULL; -        break; +#define R_REF(O) do{\ +    if (flag) \ +        O = r_ref(O, flag, p);\ +} while (0) + +    switch (type) {      case TYPE_NULL: -        retval = NULL;          break;      case TYPE_NONE: @@ -726,67 +904,66 @@ r_object(RFILE *p)      case TYPE_INT:          n = r_long(p);          retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n); -        break; - -    case TYPE_INT64: -        retval = r_long64(p); +        R_REF(retval);          break;      case TYPE_LONG:          retval = r_PyLong(p); +        R_REF(retval);          break;      case TYPE_FLOAT:          { -            char buf[256]; +            char buf[256], *ptr;              double dx; -            retval = NULL;              n = r_byte(p);              if (n == EOF) {                  PyErr_SetString(PyExc_EOFError,                      "EOF read where object expected");                  break;              } -            if (r_string(buf, n, p) != n) +            ptr = r_string(n, p); +            if (ptr == NULL)                  break; +            memcpy(buf, ptr, n);              buf[n] = '\0';              dx = PyOS_string_to_double(buf, NULL, NULL);              if (dx == -1.0 && PyErr_Occurred())                  break;              retval = PyFloat_FromDouble(dx); +            R_REF(retval);              break;          }      case TYPE_BINARY_FLOAT:          { -            unsigned char buf[8]; +            unsigned char *buf;              double x; -            if (r_string((char*)buf, 8, p) != 8) { -                retval = NULL; +            buf = (unsigned char *) r_string(8, p); +            if (buf == NULL)                  break; -            }              x = _PyFloat_Unpack8(buf, 1); -            if (x == -1.0 && PyErr_Occurred()) { -                retval = NULL; +            if (x == -1.0 && PyErr_Occurred())                  break; -            }              retval = PyFloat_FromDouble(x); +            R_REF(retval);              break;          }      case TYPE_COMPLEX:          { -            char buf[256]; +            char buf[256], *ptr;              Py_complex c; -            retval = NULL;              n = r_byte(p);              if (n == EOF) {                  PyErr_SetString(PyExc_EOFError,                      "EOF read where object expected");                  break;              } -            if (r_string(buf, n, p) != n) +            ptr = r_string(n, p); +            if (ptr == NULL)                  break; +            memcpy(buf, ptr, n);              buf[n] = '\0';              c.real = PyOS_string_to_double(buf, NULL, NULL);              if (c.real == -1.0 && PyErr_Occurred()) @@ -797,112 +974,151 @@ r_object(RFILE *p)                      "EOF read where object expected");                  break;              } -            if (r_string(buf, n, p) != n) +            ptr = r_string(n, p); +            if (ptr == NULL)                  break; +            memcpy(buf, ptr, n);              buf[n] = '\0';              c.imag = PyOS_string_to_double(buf, NULL, NULL);              if (c.imag == -1.0 && PyErr_Occurred())                  break;              retval = PyComplex_FromCComplex(c); +            R_REF(retval);              break;          }      case TYPE_BINARY_COMPLEX:          { -            unsigned char buf[8]; +            unsigned char *buf;              Py_complex c; -            if (r_string((char*)buf, 8, p) != 8) { -                retval = NULL; +            buf = (unsigned char *) r_string(8, p); +            if (buf == NULL)                  break; -            }              c.real = _PyFloat_Unpack8(buf, 1); -            if (c.real == -1.0 && PyErr_Occurred()) { -                retval = NULL; +            if (c.real == -1.0 && PyErr_Occurred())                  break; -            } -            if (r_string((char*)buf, 8, p) != 8) { -                retval = NULL; +            buf = (unsigned char *) r_string(8, p); +            if (buf == NULL)                  break; -            }              c.imag = _PyFloat_Unpack8(buf, 1); -            if (c.imag == -1.0 && PyErr_Occurred()) { -                retval = NULL; +            if (c.imag == -1.0 && PyErr_Occurred())                  break; -            }              retval = PyComplex_FromCComplex(c); +            R_REF(retval);              break;          }      case TYPE_STRING: -        n = r_long(p); -        if (PyErr_Occurred()) { -            retval = NULL; +        { +            char *ptr; +            n = r_long(p); +            if (PyErr_Occurred()) +                break; +            if (n < 0 || n > SIZE32_MAX) { +                PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); +                break; +            } +            v = PyBytes_FromStringAndSize((char *)NULL, n); +            if (v == NULL) +                break; +            ptr = r_string(n, p); +            if (ptr == NULL) { +                Py_DECREF(v); +                break; +            } +            memcpy(PyBytes_AS_STRING(v), ptr, n); +            retval = v; +            R_REF(retval);              break;          } + +    case TYPE_ASCII_INTERNED: +        is_interned = 1; +    case TYPE_ASCII: +        n = r_long(p); +        if (PyErr_Occurred()) +            break;          if (n < 0 || n > SIZE32_MAX) { -            PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); -            retval = NULL; +            PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");              break;          } -        v = PyBytes_FromStringAndSize((char *)NULL, n); -        if (v == NULL) { -            retval = NULL; +        goto _read_ascii; + +    case TYPE_SHORT_ASCII_INTERNED: +        is_interned = 1; +    case TYPE_SHORT_ASCII: +        n = r_byte(p); +        if (n == EOF) { +            PyErr_SetString(PyExc_EOFError, +                "EOF read where object expected");              break;          } -        if (r_string(PyBytes_AS_STRING(v), n, p) != n) { -            Py_DECREF(v); -            retval = NULL; +    _read_ascii: +        { +            char *ptr; +            ptr = r_string(n, p); +            if (ptr == NULL) +                break; +            v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n); +            if (v == NULL) +                break; +            if (is_interned) +                PyUnicode_InternInPlace(&v); +            retval = v; +            R_REF(retval);              break;          } -        retval = v; -        break; +    case TYPE_INTERNED: +        is_interned = 1;      case TYPE_UNICODE:          {          char *buffer;          n = r_long(p); -        if (PyErr_Occurred()) { -            retval = NULL; +        if (PyErr_Occurred())              break; -        }          if (n < 0 || n > SIZE32_MAX) {              PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); -            retval = NULL;              break;          } -        buffer = PyMem_NEW(char, n); -        if (buffer == NULL) { -            retval = PyErr_NoMemory(); -            break; +        if (n != 0) { +            buffer = r_string(n, p); +            if (buffer == NULL) +                break; +            v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");          } -        if (r_string(buffer, n, p) != n) { -            PyMem_DEL(buffer); -            retval = NULL; -            break; +        else { +            v = PyUnicode_New(0, 0);          } -        v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass"); -        PyMem_DEL(buffer); +        if (v == NULL) +            break; +        if (is_interned) +            PyUnicode_InternInPlace(&v);          retval = v; +        R_REF(retval);          break;          } +    case TYPE_SMALL_TUPLE: +        n = (unsigned char) r_byte(p); +        if (PyErr_Occurred()) +            break; +        goto _read_tuple;      case TYPE_TUPLE:          n = r_long(p); -        if (PyErr_Occurred()) { -            retval = NULL; +        if (PyErr_Occurred())              break; -        }          if (n < 0 || n > SIZE32_MAX) {              PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); -            retval = NULL;              break;          } +    _read_tuple:          v = PyTuple_New(n); -        if (v == NULL) { -            retval = NULL; +        R_REF(v); +        if (v == NULL)              break; -        } +          for (i = 0; i < n; i++) {              v2 = r_object(p);              if ( v2 == NULL ) { @@ -920,20 +1136,16 @@ r_object(RFILE *p)      case TYPE_LIST:          n = r_long(p); -        if (PyErr_Occurred()) { -            retval = NULL; +        if (PyErr_Occurred())              break; -        }          if (n < 0 || n > SIZE32_MAX) {              PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); -            retval = NULL;              break;          }          v = PyList_New(n); -        if (v == NULL) { -            retval = NULL; +        R_REF(v); +        if (v == NULL)              break; -        }          for (i = 0; i < n; i++) {              v2 = r_object(p);              if ( v2 == NULL ) { @@ -951,20 +1163,26 @@ r_object(RFILE *p)      case TYPE_DICT:          v = PyDict_New(); -        if (v == NULL) { -            retval = NULL; +        R_REF(v); +        if (v == NULL)              break; -        }          for (;;) {              PyObject *key, *val;              key = r_object(p);              if (key == NULL)                  break;              val = r_object(p); -            if (val != NULL) -                PyDict_SetItem(v, key, val); +            if (val == NULL) { +                Py_DECREF(key); +                break; +            } +            if (PyDict_SetItem(v, key, val) < 0) { +                Py_DECREF(key); +                Py_DECREF(val); +                break; +            }              Py_DECREF(key); -            Py_XDECREF(val); +            Py_DECREF(val);          }          if (PyErr_Occurred()) {              Py_DECREF(v); @@ -976,20 +1194,26 @@ r_object(RFILE *p)      case TYPE_SET:      case TYPE_FROZENSET:          n = r_long(p); -        if (PyErr_Occurred()) { -            retval = NULL; +        if (PyErr_Occurred())              break; -        }          if (n < 0 || n > SIZE32_MAX) {              PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); -            retval = NULL;              break;          }          v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL); -        if (v == NULL) { -            retval = NULL; -            break; +        if (type == TYPE_SET) { +            R_REF(v); +        } else { +            /* must use delayed registration of frozensets because they must +             * be init with a refcount of 1 +             */ +            idx = r_ref_reserve(flag, p); +            if (idx < 0) +                Py_CLEAR(v); /* signal error */          } +        if (v == NULL) +            break; +          for (i = 0; i < n; i++) {              v2 = r_object(p);              if ( v2 == NULL ) { @@ -1008,6 +1232,8 @@ r_object(RFILE *p)              }              Py_DECREF(v2);          } +        if (type != TYPE_SET) +            v = r_ref_insert(v, idx, flag, p);          retval = v;          break; @@ -1029,6 +1255,10 @@ r_object(RFILE *p)              int firstlineno;              PyObject *lnotab = NULL; +            idx = r_ref_reserve(flag, p); +            if (idx < 0) +                break; +              v = NULL;              /* XXX ignore long->int overflows for now */ @@ -1084,6 +1314,8 @@ r_object(RFILE *p)              if (name == NULL)                  goto code_error;              firstlineno = (int)r_long(p); +            if (firstlineno == -1 && PyErr_Occurred()) +                break;              lnotab = r_object(p);              if (lnotab == NULL)                  goto code_error; @@ -1094,6 +1326,7 @@ r_object(RFILE *p)                              code, consts, names, varnames,                              freevars, cellvars, filename, name,                              firstlineno, lnotab); +            v = r_ref_insert(v, idx, flag, p);            code_error:              Py_XDECREF(code); @@ -1109,11 +1342,27 @@ r_object(RFILE *p)          retval = v;          break; +    case TYPE_REF: +        n = r_long(p); +        if (n < 0 || n >= PyList_GET_SIZE(p->refs)) { +            if (n == -1 && PyErr_Occurred()) +                break; +            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); +            break; +        } +        v = PyList_GET_ITEM(p->refs, n); +        if (v == Py_None) { +            PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); +            break; +        } +        Py_INCREF(v); +        retval = v; +        break; +      default:          /* Bogus data got written, which isn't ideal.             This will let you keep working and recover. */          PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)"); -        retval = NULL;          break;      } @@ -1139,23 +1388,33 @@ int  PyMarshal_ReadShortFromFile(FILE *fp)  {      RFILE rf; +    int res;      assert(fp);      rf.readable = NULL;      rf.fp = fp;      rf.current_filename = NULL;      rf.end = rf.ptr = NULL; -    return r_short(&rf); +    rf.buf = NULL; +    res = r_short(&rf); +    if (rf.buf != NULL) +        PyMem_FREE(rf.buf); +    return res;  }  long  PyMarshal_ReadLongFromFile(FILE *fp)  {      RFILE rf; +    long res;      rf.fp = fp;      rf.readable = NULL;      rf.current_filename = NULL;      rf.ptr = rf.end = NULL; -    return r_long(&rf); +    rf.buf = NULL; +    res = r_long(&rf); +    if (rf.buf != NULL) +        PyMem_FREE(rf.buf); +    return res;  }  #ifdef HAVE_FSTAT @@ -1214,22 +1473,36 @@ PyMarshal_ReadObjectFromFile(FILE *fp)      rf.current_filename = NULL;      rf.depth = 0;      rf.ptr = rf.end = NULL; +    rf.buf = NULL; +    rf.refs = PyList_New(0); +    if (rf.refs == NULL) +        return NULL;      result = r_object(&rf); +    Py_DECREF(rf.refs); +    if (rf.buf != NULL) +        PyMem_FREE(rf.buf);      return result;  }  PyObject * -PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len) +PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)  {      RFILE rf;      PyObject *result;      rf.fp = NULL;      rf.readable = NULL;      rf.current_filename = NULL; -    rf.ptr = str; -    rf.end = str + len; +    rf.ptr = (char *)str; +    rf.end = (char *)str + len; +    rf.buf = NULL;      rf.depth = 0; +    rf.refs = PyList_New(0); +    if (rf.refs == NULL) +        return NULL;      result = r_object(&rf); +    Py_DECREF(rf.refs); +    if (rf.buf != NULL) +        PyMem_FREE(rf.buf);      return result;  } @@ -1248,7 +1521,13 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)      wf.error = WFERR_OK;      wf.depth = 0;      wf.version = version; +    if (version >= 3) { +        if ((wf.refs = PyDict_New()) == NULL) +            return NULL; +    } else +        wf.refs = NULL;      w_object(x, &wf); +    Py_XDECREF(wf.refs);      if (wf.str != NULL) {          char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);          if (wf.ptr - base > PY_SSIZE_T_MAX) { @@ -1320,6 +1599,8 @@ marshal_load(PyObject *self, PyObject *f)       * Make a call to the read method, but read zero bytes.       * This is to ensure that the object passed in at least       * has a read method which returns bytes. +     * This can be removed if we guarantee good error handling +     * for r_string()       */      data = _PyObject_CallMethodId(f, &PyId_read, "i", 0);      if (data == NULL) @@ -1335,7 +1616,15 @@ marshal_load(PyObject *self, PyObject *f)          rf.fp = NULL;          rf.readable = f;          rf.current_filename = NULL; -        result = read_object(&rf); +        rf.ptr = rf.end = NULL; +        rf.buf = NULL; +        if ((rf.refs = PyList_New(0)) != NULL) { +            result = read_object(&rf); +            Py_DECREF(rf.refs); +            if (rf.buf != NULL) +                PyMem_FREE(rf.buf); +        } else +            result = NULL;      }      Py_DECREF(data);      return result; @@ -1392,8 +1681,11 @@ marshal_loads(PyObject *self, PyObject *args)      rf.ptr = s;      rf.end = s + n;      rf.depth = 0; +    if ((rf.refs = PyList_New(0)) == NULL) +        return NULL;      result = read_object(&rf);      PyBuffer_Release(&p); +    Py_DECREF(rf.refs);      return result;  } @@ -1433,6 +1725,7 @@ Variables:\n\  version -- indicates the format that the module uses. Version 0 is the\n\      historical format, version 1 shares interned strings and version 2\n\      uses a binary format for floating point numbers.\n\ +    Version 3 shares common object references (New in version 3.4).\n\  \n\  Functions:\n\  \n\ | 
