diff options
Diffstat (limited to 'Python')
-rw-r--r-- | Python/ast.c | 66 | ||||
-rw-r--r-- | Python/ceval.c | 17 | ||||
-rw-r--r-- | Python/codecs.c | 21 |
3 files changed, 80 insertions, 24 deletions
diff --git a/Python/ast.c b/Python/ast.c index 76daf6f..91e7d01 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -4113,8 +4113,34 @@ decode_utf8(struct compiling *c, const char **sPtr, const char *end) return PyUnicode_DecodeUTF8(t, s - t, NULL); } +static int +warn_invalid_escape_sequence(struct compiling *c, const node *n, + char first_invalid_escape_char) +{ + PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c", + first_invalid_escape_char); + if (msg == NULL) { + return -1; + } + if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, + c->c_filename, LINENO(n), + NULL, NULL) < 0 && + PyErr_ExceptionMatches(PyExc_DeprecationWarning)) + { + const char *s = PyUnicode_AsUTF8(msg); + if (s != NULL) { + ast_error(c, n, s); + } + Py_DECREF(msg); + return -1; + } + Py_DECREF(msg); + return 0; +} + static PyObject * -decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len) +decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s, + size_t len) { PyObject *v, *u; char *buf; @@ -4167,11 +4193,41 @@ decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len) len = p - buf; s = buf; - v = PyUnicode_DecodeUnicodeEscape(s, len, NULL); + const char *first_invalid_escape; + v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape); + + if (v != NULL && first_invalid_escape != NULL) { + if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) { + /* We have not decref u before because first_invalid_escape points + inside u. */ + Py_XDECREF(u); + Py_DECREF(v); + return NULL; + } + } Py_XDECREF(u); return v; } +static PyObject * +decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s, + size_t len) +{ + const char *first_invalid_escape; + PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL, + &first_invalid_escape); + if (result == NULL) + return NULL; + + if (first_invalid_escape != NULL) { + if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) { + Py_DECREF(result); + return NULL; + } + } + return result; +} + /* Compile this expression in to an expr_ty. Add parens around the expression, in order to allow leading spaces in the expression. */ static expr_ty @@ -4310,7 +4366,7 @@ done: literal_end-literal_start, NULL, NULL); else - *literal = decode_unicode_with_escapes(c, literal_start, + *literal = decode_unicode_with_escapes(c, n, literal_start, literal_end-literal_start); if (!*literal) return -1; @@ -5048,12 +5104,12 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode, if (*rawmode) *result = PyBytes_FromStringAndSize(s, len); else - *result = PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL); + *result = decode_bytes_with_escapes(c, n, s, len); } else { if (*rawmode) *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); else - *result = decode_unicode_with_escapes(c, s, len); + *result = decode_unicode_with_escapes(c, n, s, len); } return *result == NULL ? -1 : 0; } diff --git a/Python/ceval.c b/Python/ceval.c index 82cc9a2..2df94cf 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1424,6 +1424,12 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) PyObject *right = POP(); PyObject *left = TOP(); PyObject *sum; + /* NOTE(haypo): Please don't try to micro-optimize int+int on + CPython using bytecode, it is simply worthless. + See http://bugs.python.org/issue21955 and + http://bugs.python.org/issue10044 for the discussion. In short, + no patch shown any impact on a realistic benchmark, only a minor + speedup on microbenchmarks. */ if (PyUnicode_CheckExact(left) && PyUnicode_CheckExact(right)) { sum = unicode_concatenate(left, right, f, next_instr); @@ -1538,7 +1544,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) TARGET(SET_ADD) { PyObject *v = POP(); - PyObject *set = stack_pointer[-oparg]; + PyObject *set = PEEK(oparg); int err; err = PySet_Add(set, v); Py_DECREF(v); @@ -2796,7 +2802,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) PyObject *map; int err; STACKADJ(-2); - map = stack_pointer[-oparg]; /* dict */ + map = PEEK(oparg); /* dict */ assert(PyDict_CheckExact(map)); err = PyDict_SetItem(map, key, value); /* map[key] = value */ Py_DECREF(value); @@ -4266,6 +4272,9 @@ do_raise(PyObject *exc, PyObject *cause) goto raise_error; } + assert(type != NULL); + assert(value != NULL); + if (cause) { PyObject *fixed_cause; if (PyExceptionClass_Check(cause)) { @@ -4292,8 +4301,8 @@ do_raise(PyObject *exc, PyObject *cause) PyErr_SetObject(type, value); /* PyErr_SetObject incref's its arguments */ - Py_XDECREF(value); - Py_XDECREF(type); + Py_DECREF(value); + Py_DECREF(type); return 0; raise_error: diff --git a/Python/codecs.c b/Python/codecs.c index fe57d0d..96b3611 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -867,17 +867,14 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) Py_UCS4 c; if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { - unsigned char *p; + const unsigned char *p; if (PyUnicodeDecodeError_GetStart(exc, &start)) return NULL; if (PyUnicodeDecodeError_GetEnd(exc, &end)) return NULL; if (!(object = PyUnicodeDecodeError_GetObject(exc))) return NULL; - if (!(p = (unsigned char*)PyBytes_AsString(object))) { - Py_DECREF(object); - return NULL; - } + p = (const unsigned char*)PyBytes_AS_STRING(object); res = PyUnicode_New(4 * (end - start), 127); if (res == NULL) { Py_DECREF(object); @@ -1220,7 +1217,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc) return restuple; } else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { - unsigned char *p; + const unsigned char *p; Py_UCS4 ch = 0; if (PyUnicodeDecodeError_GetStart(exc, &start)) return NULL; @@ -1228,10 +1225,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc) return NULL; if (!(object = PyUnicodeDecodeError_GetObject(exc))) return NULL; - if (!(p = (unsigned char*)PyBytes_AsString(object))) { - Py_DECREF(object); - return NULL; - } + p = (const unsigned char*)PyBytes_AS_STRING(object); if (!(encode = PyUnicodeDecodeError_GetEncoding(exc))) { Py_DECREF(object); return NULL; @@ -1338,7 +1332,7 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc) } else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { PyObject *str; - unsigned char *p; + const unsigned char *p; Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */ int consumed = 0; if (PyUnicodeDecodeError_GetStart(exc, &start)) @@ -1347,10 +1341,7 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc) return NULL; if (!(object = PyUnicodeDecodeError_GetObject(exc))) return NULL; - if (!(p = (unsigned char*)PyBytes_AsString(object))) { - Py_DECREF(object); - return NULL; - } + p = (const unsigned char*)PyBytes_AS_STRING(object); while (consumed < 4 && consumed < end-start) { /* Refuse to escape ASCII bytes. */ if (p[start+consumed] < 128) |