diff options
author | Guido van Rossum <guido@python.org> | 2007-11-12 01:13:56 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-11-12 01:13:56 (GMT) |
commit | 29fd7120e420014d5c32f5864a1d3c19ffa73c7f (patch) | |
tree | f6701c188935b679e3406f6bc6f6450bb0baa523 /Python | |
parent | aec75c33b7090c8c6eb714eb8c4b133bef9880d8 (diff) | |
download | cpython-29fd7120e420014d5c32f5864a1d3c19ffa73c7f.zip cpython-29fd7120e420014d5c32f5864a1d3c19ffa73c7f.tar.gz cpython-29fd7120e420014d5c32f5864a1d3c19ffa73c7f.tar.bz2 |
Minimal change that disables (AFAICT) the interpolation of \u and \U inside
raw string literals. I added a whole bunch of tests but am still not sure
I am testing all paths through the code. I really think the code could be
simplified quite a bit.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/ast.c | 20 |
1 files changed, 12 insertions, 8 deletions
diff --git a/Python/ast.c b/Python/ast.c index 0afb408..2a5d8b8 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -1292,7 +1292,7 @@ ast_for_atom(struct compiling *c, const node *n) case STRING: { PyObject *str = parsestrplus(c, n, &bytesmode); if (!str) { - if (PyErr_ExceptionMatches(PyExc_UnicodeError)){ + if (PyErr_ExceptionMatches(PyExc_UnicodeError)) { PyObject *type, *value, *tback, *errstr; PyErr_Fetch(&type, &value, &tback); errstr = ((PyUnicodeErrorObject *)value)->reason; @@ -3117,6 +3117,7 @@ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding) char *buf; char *p; const char *end; + if (encoding == NULL) { buf = (char *)s; u = NULL; @@ -3218,7 +3219,7 @@ parsestr(const node *n, const char *encoding, int *bytesmode) return NULL; } } - if (!*bytesmode) { + if (!*bytesmode && !rawmode) { return decode_unicode(s, len, rawmode, encoding); } if (*bytesmode) { @@ -3238,13 +3239,17 @@ parsestr(const node *n, const char *encoding, int *bytesmode) if (rawmode || strchr(s, '\\') == NULL) { if (need_encoding) { PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL); - if (u == NULL) - return NULL; + if (u == NULL || !*bytesmode) + return u; v = PyUnicode_AsEncodedString(u, encoding, NULL); Py_DECREF(u); return v; - } else { + } else if (*bytesmode) { return PyString_FromStringAndSize(s, len); + } else if (strcmp(encoding, "utf-8") == 0) { + return PyUnicode_FromStringAndSize(s, len); + } else { + return PyUnicode_DecodeLatin1(s, len, NULL); } } @@ -3252,7 +3257,7 @@ parsestr(const node *n, const char *encoding, int *bytesmode) need_encoding ? encoding : NULL); } -/* Build a Python string object out of a STRING atom. This takes care of +/* Build a Python string object out of a STRING+ atom. This takes care of * compile-time literal catenation, calling parsestr() on each piece, and * pasting the intermediate results together. */ @@ -3272,8 +3277,7 @@ parsestrplus(struct compiling *c, const node *n, int *bytesmode) if (s == NULL) goto onError; if (*bytesmode != subbm) { - ast_error(n, "cannot mix bytes and nonbytes" - "literals"); + ast_error(n, "cannot mix bytes and nonbytes literals"); goto onError; } if (PyString_Check(v) && PyString_Check(s)) { |