diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2013-10-12 20:25:39 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2013-10-12 20:25:39 (GMT) |
commit | 1164dfcb86757ebaeb68276e4b8f6ee266c9968d (patch) | |
tree | 763f8772f413d230b2a56248ab5ecd28b6b0f1b1 /Python/marshal.c | |
parent | 4c6ed25b9621c58d081f06660ca7f970836ec3c6 (diff) | |
download | cpython-1164dfcb86757ebaeb68276e4b8f6ee266c9968d.zip cpython-1164dfcb86757ebaeb68276e4b8f6ee266c9968d.tar.gz cpython-1164dfcb86757ebaeb68276e4b8f6ee266c9968d.tar.bz2 |
Issue #19219: Speed up marshal.loads(), and make pyc files slightly (5% to 10%) smaller.
Diffstat (limited to 'Python/marshal.c')
-rw-r--r-- | Python/marshal.c | 392 |
1 files changed, 266 insertions, 126 deletions
diff --git a/Python/marshal.c b/Python/marshal.c index 727605a..12565f3 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -51,6 +51,12 @@ #define TYPE_FROZENSET '>' #define FLAG_REF '\x80' /* with a type, add obj to index */ +#define TYPE_ASCII 'a' +#define TYPE_ASCII_INTERNED 'A' +#define TYPE_SMALL_TUPLE ')' +#define TYPE_SHORT_ASCII 'z' +#define TYPE_SHORT_ASCII_INTERNED 'Z' + #define WFERR_OK 0 #define WFERR_UNMARSHALLABLE 1 #define WFERR_NESTEDTOODEEP 2 @@ -66,6 +72,8 @@ typedef struct { PyObject *current_filename; char *ptr; char *end; + char *buf; + Py_ssize_t buf_size; PyObject *refs; /* dict on marshal, list on unmarshal */ int version; } WFILE; @@ -148,6 +156,13 @@ w_pstring(const char *s, Py_ssize_t n, WFILE *p) w_string(s, n, p); } +static void +w_short_pstring(const char *s, Py_ssize_t n, WFILE *p) +{ + w_byte(n, p); + w_string(s, n, p); +} + /* We assume that Python ints are stored internally in base some power of 2**15; for the sake of portability we'll always read and write them in base exactly 2**15. */ @@ -394,24 +409,51 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p); } else if (PyUnicode_CheckExact(v)) { - PyObject *utf8; - utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass"); - if (utf8 == NULL) { - p->depth--; - p->error = WFERR_UNMARSHALLABLE; - return; + if (p->version >= 4 && PyUnicode_IS_ASCII(v)) { + int is_short = PyUnicode_GET_LENGTH(v) < 256; + if (is_short) { + if (PyUnicode_CHECK_INTERNED(v)) + W_TYPE(TYPE_SHORT_ASCII_INTERNED, p); + else + W_TYPE(TYPE_SHORT_ASCII, p); + w_short_pstring((char *) PyUnicode_1BYTE_DATA(v), + PyUnicode_GET_LENGTH(v), p); + } + else { + if (PyUnicode_CHECK_INTERNED(v)) + W_TYPE(TYPE_ASCII_INTERNED, p); + else + W_TYPE(TYPE_ASCII, p); + w_pstring((char *) PyUnicode_1BYTE_DATA(v), + PyUnicode_GET_LENGTH(v), p); + } + } + else { + PyObject *utf8; + utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass"); + if (utf8 == NULL) { + p->depth--; + p->error = WFERR_UNMARSHALLABLE; + return; + } + if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v)) + W_TYPE(TYPE_INTERNED, p); + else + W_TYPE(TYPE_UNICODE, p); + w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p); + Py_DECREF(utf8); } - if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v)) - W_TYPE(TYPE_INTERNED, p); - else - W_TYPE(TYPE_UNICODE, p); - w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p); - Py_DECREF(utf8); } else if (PyTuple_CheckExact(v)) { - W_TYPE(TYPE_TUPLE, p); n = PyTuple_Size(v); - W_SIZE(n, p); + if (p->version >= 4 && n < 256) { + W_TYPE(TYPE_SMALL_TUPLE, p); + w_byte(n, p); + } + else { + W_TYPE(TYPE_TUPLE, p); + W_SIZE(n, p); + } for (i = 0; i < n; i++) { w_object(PyTuple_GET_ITEM(v, i), p); } @@ -537,59 +579,75 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version) typedef WFILE RFILE; /* Same struct with different invariants */ -#define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF) - -static Py_ssize_t -r_string(char *s, Py_ssize_t n, RFILE *p) +static char * +r_string(Py_ssize_t n, RFILE *p) { - char *ptr; - Py_ssize_t read, left; - - if (!p->readable) { - if (p->fp != NULL) - /* The result fits into int because it must be <=n. */ - read = fread(s, 1, n, p->fp); - else { - left = p->end - p->ptr; - read = (left < n) ? left : n; - memcpy(s, p->ptr, read); - p->ptr += read; + Py_ssize_t read = -1; + + if (p->ptr != NULL) { + /* Fast path for loads() */ + char *res = p->ptr; + Py_ssize_t left = p->end - p->ptr; + if (left < n) { + PyErr_SetString(PyExc_EOFError, + "marshal data too short"); + return NULL; } + p->ptr += n; + return res; + } + if (p->buf == NULL) { + p->buf = PyMem_MALLOC(n); + if (p->buf == NULL) { + PyErr_NoMemory(); + return NULL; + } + p->buf_size = n; + } + else if (p->buf_size < n) { + p->buf = PyMem_REALLOC(p->buf, n); + if (p->buf == NULL) { + PyErr_NoMemory(); + return NULL; + } + p->buf_size = n; + } + if (!p->readable) { + assert(p->fp != NULL); + /* The result fits into int because it must be <=n. */ + read = fread(p->buf, 1, n, p->fp); } else { - _Py_IDENTIFIER(read); - - PyObject *data = _PyObject_CallMethodId(p->readable, &PyId_read, "n", n); - read = 0; - if (data != NULL) { - if (!PyBytes_Check(data)) { - PyErr_Format(PyExc_TypeError, - "f.read() returned not bytes but %.100s", - data->ob_type->tp_name); - } - else { - read = (int)PyBytes_GET_SIZE(data); - if (read > 0) { - if (read > n) { - PyErr_Format(PyExc_ValueError, - "read() returned too much data: " - "%zd bytes requested, %zd returned", - n, read); - read = -1; - } - else { - ptr = PyBytes_AS_STRING(data); - memcpy(s, ptr, read); - } - } - } - Py_DECREF(data); + _Py_IDENTIFIER(readinto); + PyObject *res, *mview; + Py_buffer buf; + + if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1) + return NULL; + mview = PyMemoryView_FromBuffer(&buf); + if (mview == NULL) + return NULL; + + res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview); + if (res != NULL) { + read = PyNumber_AsSsize_t(res, PyExc_ValueError); + Py_DECREF(res); } } - if (!PyErr_Occurred() && (read < n)) { - PyErr_SetString(PyExc_EOFError, "EOF read where not expected"); + if (read != n) { + if (!PyErr_Occurred()) { + if (read > n) + PyErr_Format(PyExc_ValueError, + "read() returned too much data: " + "%zd bytes requested, %zd returned", + n, read); + else + PyErr_SetString(PyExc_EOFError, + "EOF read where not expected"); + } + return NULL; } - return read; + return p->buf; } @@ -597,15 +655,20 @@ static int r_byte(RFILE *p) { int c = EOF; - unsigned char ch; - Py_ssize_t n; - if (!p->readable) - c = p->fp ? getc(p->fp) : rs_byte(p); + if (p->ptr != NULL) { + if (p->ptr < p->end) + c = (unsigned char) *p->ptr++; + return c; + } + if (!p->readable) { + assert(p->fp); + c = getc(p->fp); + } else { - n = r_string((char *) &ch, 1, p); - if (n > 0) - c = ch; + char *ptr = r_string(1, p); + if (ptr != NULL) + c = *(unsigned char *) ptr; } return c; } @@ -613,32 +676,36 @@ r_byte(RFILE *p) static int r_short(RFILE *p) { - short x; - unsigned char buffer[2]; - - r_string((char *) buffer, 2, p); - x = buffer[0]; - x |= buffer[1] << 8; - /* Sign-extension, in case short greater than 16 bits */ - x |= -(x & 0x8000); + short x = -1; + unsigned char *buffer; + + buffer = (unsigned char *) r_string(2, p); + if (buffer != NULL) { + x = buffer[0]; + x |= buffer[1] << 8; + /* Sign-extension, in case short greater than 16 bits */ + x |= -(x & 0x8000); + } return x; } static long r_long(RFILE *p) { - long x; - unsigned char buffer[4]; - - r_string((char *) buffer, 4, p); - x = buffer[0]; - x |= (long)buffer[1] << 8; - x |= (long)buffer[2] << 16; - x |= (long)buffer[3] << 24; + long x = -1; + unsigned char *buffer; + + buffer = (unsigned char *) r_string(4, p); + if (buffer != NULL) { + x = buffer[0]; + x |= (long)buffer[1] << 8; + x |= (long)buffer[2] << 16; + x |= (long)buffer[3] << 24; #if SIZEOF_LONG > 4 - /* Sign extension for 64-bit machines */ - x |= -(x & 0x80000000L); + /* Sign extension for 64-bit machines */ + x |= -(x & 0x80000000L); #endif + } return x; } @@ -716,9 +783,7 @@ static Py_ssize_t r_ref_reserve(int flag, RFILE *p) { if (flag) { /* currently only FLAG_REF is defined */ - Py_ssize_t idx = PyList_Size(p->refs); - if (idx < 0) - return -1; + Py_ssize_t idx = PyList_GET_SIZE(p->refs); if (idx >= 0x7ffffffe) { PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)"); return -1; @@ -742,12 +807,10 @@ static PyObject * r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p) { if (o != NULL && flag) { /* currently only FLAG_REF is defined */ - if (PyList_SetItem(p->refs, idx, o) < 0) { - Py_DECREF(o); /* release the new object */ - return NULL; - } else { - Py_INCREF(o); /* a reference for the list */ - } + PyObject *tmp = PyList_GET_ITEM(p->refs, idx); + Py_INCREF(o); + PyList_SET_ITEM(p->refs, idx, o); + Py_DECREF(tmp); } return o; } @@ -777,7 +840,7 @@ r_object(RFILE *p) Py_ssize_t idx = 0; long i, n; int type, code = r_byte(p); - int flag; + int flag, is_interned = 0; PyObject *retval; if (code == EOF) { @@ -846,7 +909,7 @@ r_object(RFILE *p) case TYPE_FLOAT: { - char buf[256]; + char buf[256], *ptr; double dx; retval = NULL; n = r_byte(p); @@ -855,8 +918,10 @@ r_object(RFILE *p) "EOF read where object expected"); break; } - if (r_string(buf, n, p) != n) + ptr = r_string(n, p); + if (ptr == NULL) break; + memcpy(buf, ptr, n); buf[n] = '\0'; dx = PyOS_string_to_double(buf, NULL, NULL); if (dx == -1.0 && PyErr_Occurred()) @@ -868,9 +933,10 @@ r_object(RFILE *p) case TYPE_BINARY_FLOAT: { - unsigned char buf[8]; + unsigned char *buf; double x; - if (r_string((char*)buf, 8, p) != 8) { + buf = (unsigned char *) r_string(8, p); + if (buf == NULL) { retval = NULL; break; } @@ -886,7 +952,7 @@ r_object(RFILE *p) case TYPE_COMPLEX: { - char buf[256]; + char buf[256], *ptr; Py_complex c; retval = NULL; n = r_byte(p); @@ -895,8 +961,10 @@ r_object(RFILE *p) "EOF read where object expected"); break; } - if (r_string(buf, n, p) != n) + ptr = r_string(n, p); + if (ptr == NULL) break; + memcpy(buf, ptr, n); buf[n] = '\0'; c.real = PyOS_string_to_double(buf, NULL, NULL); if (c.real == -1.0 && PyErr_Occurred()) @@ -907,8 +975,10 @@ r_object(RFILE *p) "EOF read where object expected"); break; } - if (r_string(buf, n, p) != n) + ptr = r_string(n, p); + if (ptr == NULL) break; + memcpy(buf, ptr, n); buf[n] = '\0'; c.imag = PyOS_string_to_double(buf, NULL, NULL); if (c.imag == -1.0 && PyErr_Occurred()) @@ -920,9 +990,10 @@ r_object(RFILE *p) case TYPE_BINARY_COMPLEX: { - unsigned char buf[8]; + unsigned char *buf; Py_complex c; - if (r_string((char*)buf, 8, p) != 8) { + buf = (unsigned char *) r_string(8, p); + if (buf == NULL) { retval = NULL; break; } @@ -931,7 +1002,8 @@ r_object(RFILE *p) retval = NULL; break; } - if (r_string((char*)buf, 8, p) != 8) { + buf = (unsigned char *) r_string(8, p); + if (buf == NULL) { retval = NULL; break; } @@ -946,32 +1018,82 @@ r_object(RFILE *p) } case TYPE_STRING: + { + char *ptr; + n = r_long(p); + if (PyErr_Occurred()) { + retval = NULL; + break; + } + if (n < 0 || n > SIZE32_MAX) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); + retval = NULL; + break; + } + v = PyBytes_FromStringAndSize((char *)NULL, n); + if (v == NULL) { + retval = NULL; + break; + } + ptr = r_string(n, p); + if (ptr == NULL) { + Py_DECREF(v); + retval = NULL; + break; + } + memcpy(PyBytes_AS_STRING(v), ptr, n); + retval = v; + R_REF(retval); + break; + } + + case TYPE_ASCII_INTERNED: + is_interned = 1; + case TYPE_ASCII: n = r_long(p); if (PyErr_Occurred()) { retval = NULL; break; } if (n < 0 || n > SIZE32_MAX) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); + PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); retval = NULL; break; } - v = PyBytes_FromStringAndSize((char *)NULL, n); - if (v == NULL) { - retval = NULL; + goto _read_ascii; + + case TYPE_SHORT_ASCII_INTERNED: + is_interned = 1; + case TYPE_SHORT_ASCII: + n = r_byte(p); + if (n == EOF) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); break; } - if (r_string(PyBytes_AS_STRING(v), n, p) != n) { - Py_DECREF(v); - retval = NULL; + _read_ascii: + { + char *ptr; + ptr = r_string(n, p); + if (ptr == NULL) { + retval = NULL; + break; + } + v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n); + if (v == NULL) { + retval = NULL; + break; + } + if (is_interned) + PyUnicode_InternInPlace(&v); + retval = v; + R_REF(retval); break; } - retval = v; - R_REF(retval); - break; - case TYPE_UNICODE: case TYPE_INTERNED: + is_interned = 1; + case TYPE_UNICODE: { char *buffer; @@ -986,18 +1108,12 @@ r_object(RFILE *p) break; } if (n != 0) { - buffer = PyMem_NEW(char, n); + buffer = r_string(n, p); if (buffer == NULL) { - retval = PyErr_NoMemory(); - break; - } - if (r_string(buffer, n, p) != n) { - PyMem_DEL(buffer); retval = NULL; break; } v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass"); - PyMem_DEL(buffer); } else { v = PyUnicode_New(0, 0); @@ -1006,13 +1122,16 @@ r_object(RFILE *p) retval = NULL; break; } - if (type == TYPE_INTERNED) + if (is_interned) PyUnicode_InternInPlace(&v); retval = v; R_REF(retval); break; } + case TYPE_SMALL_TUPLE: + n = (unsigned char) r_byte(p); + goto _read_tuple; case TYPE_TUPLE: n = r_long(p); if (PyErr_Occurred()) { @@ -1024,6 +1143,7 @@ r_object(RFILE *p) retval = NULL; break; } + _read_tuple: v = PyTuple_New(n); R_REF(v); if (v == NULL) { @@ -1304,23 +1424,33 @@ int PyMarshal_ReadShortFromFile(FILE *fp) { RFILE rf; + int res; assert(fp); rf.readable = NULL; rf.fp = fp; rf.current_filename = NULL; rf.end = rf.ptr = NULL; - return r_short(&rf); + rf.buf = NULL; + res = r_short(&rf); + if (rf.buf != NULL) + PyMem_FREE(rf.buf); + return res; } long PyMarshal_ReadLongFromFile(FILE *fp) { RFILE rf; + long res; rf.fp = fp; rf.readable = NULL; rf.current_filename = NULL; rf.ptr = rf.end = NULL; - return r_long(&rf); + rf.buf = NULL; + res = r_long(&rf); + if (rf.buf != NULL) + PyMem_FREE(rf.buf); + return res; } #ifdef HAVE_FSTAT @@ -1379,11 +1509,14 @@ PyMarshal_ReadObjectFromFile(FILE *fp) rf.current_filename = NULL; rf.depth = 0; rf.ptr = rf.end = NULL; + rf.buf = NULL; rf.refs = PyList_New(0); if (rf.refs == NULL) return NULL; result = r_object(&rf); Py_DECREF(rf.refs); + if (rf.buf != NULL) + PyMem_FREE(rf.buf); return result; } @@ -1397,12 +1530,15 @@ PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len) rf.current_filename = NULL; rf.ptr = str; rf.end = str + len; + rf.buf = NULL; rf.depth = 0; rf.refs = PyList_New(0); if (rf.refs == NULL) return NULL; result = r_object(&rf); Py_DECREF(rf.refs); + if (rf.buf != NULL) + PyMem_FREE(rf.buf); return result; } @@ -1516,9 +1652,13 @@ marshal_load(PyObject *self, PyObject *f) rf.fp = NULL; rf.readable = f; rf.current_filename = NULL; + rf.ptr = rf.end = NULL; + rf.buf = NULL; if ((rf.refs = PyList_New(0)) != NULL) { result = read_object(&rf); Py_DECREF(rf.refs); + if (rf.buf != NULL) + PyMem_FREE(rf.buf); } else result = NULL; } |