diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2002-08-04 17:29:52 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2002-08-04 17:29:52 (GMT) |
commit | 00f1e3f5a54adb0a7159a446edeca2e36da4092e (patch) | |
tree | 86d731c4fd7c0141686044f5eefbb7f475da9647 /Python | |
parent | a729daf2e43f3ffa2d1b3b6cd31491c840091e66 (diff) | |
download | cpython-00f1e3f5a54adb0a7159a446edeca2e36da4092e.zip cpython-00f1e3f5a54adb0a7159a446edeca2e36da4092e.tar.gz cpython-00f1e3f5a54adb0a7159a446edeca2e36da4092e.tar.bz2 |
Patch #534304: Implement phase 1 of PEP 263.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/compile.c | 123 | ||||
-rw-r--r-- | Python/graminit.c | 21 | ||||
-rw-r--r-- | Python/pythonrun.c | 13 |
3 files changed, 143 insertions, 14 deletions
diff --git a/Python/compile.c b/Python/compile.c index 3a0948e..512b5a3 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -485,6 +485,7 @@ struct compiling { int c_closure; /* Is nested w/freevars? */ struct symtable *c_symtable; /* pointer to module symbol table */ PyFutureFeatures *c_future; /* pointer to module's __future__ */ + char *c_encoding; /* source encoding (a borrowed reference) */ }; static int @@ -1182,6 +1183,23 @@ parsenumber(struct compiling *co, char *s) } static PyObject * +decode_utf8(char **sPtr, char *end, char* encoding) +{ + PyObject *u, *v; + char *s, *t; + t = s = *sPtr; + /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */ + while (s < end && (*s & 0x80)) s++; + *sPtr = s; + u = PyUnicode_DecodeUTF8(t, s - t, NULL); + if (u == NULL) + return NULL; + v = PyUnicode_AsEncodedString(u, encoding, NULL); + Py_DECREF(u); + return v; +} + +static PyObject * parsestr(struct compiling *com, char *s) { PyObject *v; @@ -1193,6 +1211,8 @@ parsestr(struct compiling *com, char *s) int first = *s; int quote = first; int rawmode = 0; + char* encoding = ((com == NULL) ? NULL : com->c_encoding); + int need_encoding; int unicode = 0; if (isalpha(quote) || quote == '_') { @@ -1230,28 +1250,101 @@ parsestr(struct compiling *com, char *s) } #ifdef Py_USING_UNICODE if (unicode || Py_UnicodeFlag) { + PyObject *u, *w; + if (encoding == NULL) { + buf = s; + u = NULL; + } else if (strcmp(encoding, "iso-8859-1") == 0) { + buf = s; + u = NULL; + } else { + /* "\XX" may become "\u005c\uHHLL" (12 bytes) */ + u = PyString_FromStringAndSize((char *)NULL, len * 4); + if (u == NULL) + return NULL; + p = buf = PyString_AsString(u); + end = s + len; + while (s < end) { + if (*s == '\\') { + *p++ = *s++; + if (*s & 0x80) { + strcpy(p, "u005c"); + p += 5; + } + } + if (*s & 0x80) { /* XXX inefficient */ + char *r; + int rn, i; + w = decode_utf8(&s, end, "utf-16-be"); + if (w == NULL) { + Py_DECREF(u); + return NULL; + } + r = PyString_AsString(w); + rn = PyString_Size(w); + assert(rn % 2 == 0); + for (i = 0; i < rn; i += 2) { + sprintf(p, "\\u%02x%02x", + r[i + 0] & 0xFF, + r[i + 1] & 0xFF); + p += 6; + } + Py_DECREF(w); + } else { + *p++ = *s++; + } + } + len = p - buf; + } if (rawmode) - v = PyUnicode_DecodeRawUnicodeEscape( - s, len, NULL); + v = PyUnicode_DecodeRawUnicodeEscape(buf, len, NULL); else - v = PyUnicode_DecodeUnicodeEscape( - s, len, NULL); + v = PyUnicode_DecodeUnicodeEscape(buf, len, NULL); + Py_XDECREF(u); if (v == NULL) PyErr_SyntaxLocation(com->c_filename, com->c_lineno); return v; } #endif - if (rawmode || strchr(s, '\\') == NULL) - return PyString_FromStringAndSize(s, len); - v = PyString_FromStringAndSize((char *)NULL, len); + need_encoding = (encoding != NULL && + strcmp(encoding, "utf-8") != 0 && + strcmp(encoding, "iso-8859-1") != 0); + if (rawmode || strchr(s, '\\') == NULL) { + if (need_encoding) { + PyObject* u = PyUnicode_DecodeUTF8(s, len, NULL); + if (u == NULL) + return NULL; + v = PyUnicode_AsEncodedString(u, encoding, NULL); + Py_DECREF(u); + return v; + } else { + return PyString_FromStringAndSize(s, len); + } + } + v = PyString_FromStringAndSize((char *)NULL, /* XXX 4 is enough? */ + need_encoding ? len * 4 : len); if (v == NULL) return NULL; p = buf = PyString_AsString(v); end = s + len; while (s < end) { if (*s != '\\') { - *p++ = *s++; + ORDINAL: + if (need_encoding && (*s & 0x80)) { + char *r; + int rn; + PyObject* w = decode_utf8(&s, end, encoding); + if (w == NULL) + return NULL; + r = PyString_AsString(w); + rn = PyString_Size(w); + memcpy(p, r, rn); + p += rn; + Py_DECREF(w); + } else { + *p++ = *s++; + } continue; } s++; @@ -1320,8 +1413,8 @@ parsestr(struct compiling *com, char *s) #endif default: *p++ = '\\'; - *p++ = s[-1]; - break; + s--; + goto ORDINAL; } } _PyString_Resize(&v, (int)(p - buf)); @@ -4149,6 +4242,12 @@ jcompile(node *n, char *filename, struct compiling *base, PyCodeObject *co; if (!com_init(&sc, filename)) return NULL; + if (TYPE(n) == encoding_decl) { + sc.c_encoding = STR(n); + n = CHILD(n, 0); + } else { + sc.c_encoding = NULL; + } if (base) { sc.c_private = base->c_private; sc.c_symtable = base->c_symtable; @@ -4157,6 +4256,10 @@ jcompile(node *n, char *filename, struct compiling *base, || (sc.c_symtable->st_cur->ste_type == TYPE_FUNCTION)) sc.c_nested = 1; sc.c_flags |= base->c_flags & PyCF_MASK; + if (base->c_encoding != NULL) { + assert(sc.c_encoding == NULL); + sc.c_encoding = base->c_encoding; + } } else { sc.c_private = NULL; sc.c_future = PyNode_Future(n, filename); diff --git a/Python/graminit.c b/Python/graminit.c index ef7d467..98bad94 100644 --- a/Python/graminit.c +++ b/Python/graminit.c @@ -1463,7 +1463,17 @@ static state states_66[2] = { {1, arcs_66_0}, {2, arcs_66_1}, }; -static dfa dfas[67] = { +static arc arcs_67_0[1] = { + {12, 1}, +}; +static arc arcs_67_1[1] = { + {0, 1}, +}; +static state states_67[2] = { + {1, arcs_67_0}, + {1, arcs_67_1}, +}; +static dfa dfas[68] = { {256, "single_input", 0, 3, states_0, "\004\030\001\000\000\000\124\360\213\011\162\000\002\000\140\210\244\005\001"}, {257, "file_input", 0, 2, states_1, @@ -1598,8 +1608,10 @@ static dfa dfas[67] = { "\000\000\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000"}, {322, "testlist1", 0, 2, states_66, "\000\020\001\000\000\000\000\000\000\000\000\000\002\000\140\210\244\005\000"}, + {323, "encoding_decl", 0, 2, states_67, + "\000\020\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, }; -static label labels[148] = { +static label labels[149] = { {0, "EMPTY"}, {256, 0}, {4, 0}, @@ -1748,10 +1760,11 @@ static label labels[148] = { {318, 0}, {319, 0}, {321, 0}, + {323, 0}, }; grammar _PyParser_Grammar = { - 67, + 68, dfas, - {148, labels}, + {149, labels}, 256 }; diff --git a/Python/pythonrun.c b/Python/pythonrun.c index b1fde29..006ff08 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1221,6 +1221,7 @@ static void err_input(perrdetail *err) { PyObject *v, *w, *errtype; + PyObject* u = NULL; char *msg = NULL; errtype = PyExc_SyntaxError; v = Py_BuildValue("(ziiz)", err->filename, @@ -1272,12 +1273,24 @@ err_input(perrdetail *err) errtype = PyExc_IndentationError; msg = "too many levels of indentation"; break; + case E_DECODE: { /* XXX */ + PyThreadState* tstate = PyThreadState_Get(); + PyObject* value = tstate->curexc_value; + if (value != NULL) { + u = PyObject_Repr(value); + if (u != NULL) { + msg = PyString_AsString(u); + break; + } + } + } default: fprintf(stderr, "error=%d\n", err->error); msg = "unknown parsing error"; break; } w = Py_BuildValue("(sO)", msg, v); + Py_XDECREF(u); Py_XDECREF(v); PyErr_SetObject(errtype, w); Py_XDECREF(w); |