From 15244f7b126b2eab94a0755a82c431a4933330f4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 19 Oct 2010 01:22:07 +0000 Subject: Recorded merge of revisions 85569-85570 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r85569 | victor.stinner | 2010-10-16 15:14:10 +0200 (sam., 16 oct. 2010) | 4 lines Issue #9713, #10114: Parser functions (eg. PyParser_ASTFromFile) expects filenames encoded to the filesystem encoding with surrogateescape error handler (to support undecodable bytes), instead of UTF-8 in strict mode. ........ r85570 | victor.stinner | 2010-10-16 15:42:53 +0200 (sam., 16 oct. 2010) | 4 lines Fix ast_error_finish() and err_input(): filename can be NULL Fix my previous commit (r85569). ........ --- Misc/NEWS | 4 ++++ Parser/tokenizer.h | 2 +- Python/ast.c | 12 +++++++++++- Python/bltinmodule.c | 33 ++++++++++++++++++++++----------- Python/compile.c | 2 +- Python/pythonrun.c | 17 ++++++++++++++--- Python/traceback.c | 38 ++++++++++++++++++++++++++++---------- 7 files changed, 81 insertions(+), 27 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index dc852ee..ac86186 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ What's New in Python 3.1.3? Core and Builtins ----------------- +- Issue #9713, #10114: Parser functions (eg. PyParser_ASTFromFile) expects + filenames encoded to the filesystem encoding with surrogateescape error + handler (to support undecodable bytes), instead of UTF-8 in strict mode. + - Issue #10006: type.__abstractmethods__ now raises an AttributeError. As a result metaclasses can now be ABCs (see #9533). diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 5a6d060..c153cbf 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -40,7 +40,7 @@ struct tok_state { int level; /* () [] {} Parentheses nesting level */ /* Used to allow free continuations inside them */ /* Stuff for checking on different tab sizes */ - const char *filename; /* For error messages */ + const char *filename; /* encoded to the filesystem encoding */ int altwarning; /* Issue warning if alternate tabs don't match */ int alterror; /* Issue error if alternate tabs don't match */ int alttabsize; /* Alternate tab spacing */ diff --git a/Python/ast.c b/Python/ast.c index 5c17133..590bc90 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -102,6 +102,7 @@ static void ast_error_finish(const char *filename) { PyObject *type, *value, *tback, *errstr, *loc, *tmp; + PyObject *filename_obj; long lineno; assert(PyErr_Occurred()); @@ -125,7 +126,16 @@ ast_error_finish(const char *filename) Py_INCREF(Py_None); loc = Py_None; } - tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc); + if (filename != NULL) + filename_obj = PyUnicode_DecodeFSDefault(filename); + else { + Py_INCREF(Py_None); + filename_obj = Py_None; + } + if (filename_obj != NULL) + tmp = Py_BuildValue("(NlOO)", filename_obj, lineno, Py_None, loc); + else + tmp = NULL; Py_DECREF(loc); if (!tmp) { Py_DECREF(errstr); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index cd85156..3e44aa0 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -536,6 +536,7 @@ static PyObject * builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) { char *str; + PyObject *filename_obj; char *filename; char *startstr; int mode = -1; @@ -547,12 +548,16 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) static char *kwlist[] = {"source", "filename", "mode", "flags", "dont_inherit", NULL}; int start[] = {Py_file_input, Py_eval_input, Py_single_input}; + PyObject *result; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oss|ii:compile", - kwlist, &cmd, &filename, &startstr, - &supplied_flags, &dont_inherit)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&s|ii:compile", kwlist, + &cmd, + PyUnicode_FSConverter, &filename_obj, + &startstr, &supplied_flags, + &dont_inherit)) return NULL; + filename = PyBytes_AS_STRING(filename_obj); cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8; if (supplied_flags & @@ -560,7 +565,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) { PyErr_SetString(PyExc_ValueError, "compile(): unrecognised flags"); - return NULL; + goto error; } /* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */ @@ -577,14 +582,13 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) else { PyErr_SetString(PyExc_ValueError, "compile() arg 3 must be 'exec', 'eval' or 'single'"); - return NULL; + goto error; } is_ast = PyAST_Check(cmd); if (is_ast == -1) - return NULL; + goto error; if (is_ast) { - PyObject *result; if (supplied_flags & PyCF_ONLY_AST) { Py_INCREF(cmd); result = cmd; @@ -597,20 +601,27 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) mod = PyAST_obj2mod(cmd, arena, mode); if (mod == NULL) { PyArena_Free(arena); - return NULL; + goto error; } result = (PyObject*)PyAST_Compile(mod, filename, &cf, arena); PyArena_Free(arena); } - return result; + goto finally; } str = source_as_string(cmd, "compile", "string, bytes, AST or code", &cf); if (str == NULL) - return NULL; + goto error; - return Py_CompileStringFlags(str, filename, start[mode], &cf); + result = Py_CompileStringFlags(str, filename, start[mode], &cf); + goto finally; + +error: + result = NULL; +finally: + Py_DECREF(filename_obj); + return result; } PyDoc_STRVAR(compile_doc, diff --git a/Python/compile.c b/Python/compile.c index a7ac5a0..19b2add 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -4046,7 +4046,7 @@ makecode(struct compiler *c, struct assembler *a) freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars)); if (!freevars) goto error; - filename = PyUnicode_FromString(c->c_filename); + filename = PyUnicode_DecodeFSDefault(c->c_filename); if (!filename) goto error; diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 84d72f0..d08d0ed 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1160,7 +1160,7 @@ PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit, d = PyModule_GetDict(m); if (PyDict_GetItemString(d, "__file__") == NULL) { PyObject *f; - f = PyUnicode_FromString(filename); + f = PyUnicode_DecodeFSDefault(filename); if (f == NULL) return -1; if (PyDict_SetItemString(d, "__file__", f) < 0) { @@ -1911,7 +1911,9 @@ err_input(perrdetail *err) { PyObject *v, *w, *errtype, *errtext; PyObject* u = NULL; + PyObject *filename; char *msg = NULL; + errtype = PyExc_SyntaxError; switch (err->error) { case E_ERROR: @@ -2000,8 +2002,17 @@ err_input(perrdetail *err) errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text), "replace"); } - v = Py_BuildValue("(ziiN)", err->filename, - err->lineno, err->offset, errtext); + if (err->filename != NULL) + filename = PyUnicode_DecodeFSDefault(err->filename); + else { + Py_INCREF(Py_None); + filename = Py_None; + } + if (filename != NULL) + v = Py_BuildValue("(NiiN)", filename, + err->lineno, err->offset, errtext); + else + v = NULL; w = NULL; if (v != NULL) w = Py_BuildValue("(sO)", msg, v); diff --git a/Python/traceback.c b/Python/traceback.c index e74d442..2f1c213 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -143,16 +143,20 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject * Py_ssize_t npath; size_t taillen; PyObject *syspath; - const char* path; + PyObject *path; const char* tail; + PyObject *filebytes; const char* filepath; Py_ssize_t len; + PyObject* result; - filepath = _PyUnicode_AsString(filename); - if (filepath == NULL) { + filebytes = PyUnicode_AsEncodedObject(filename, + Py_FileSystemDefaultEncoding, "surrogateescape"); + if (filebytes == NULL) { PyErr_Clear(); return NULL; } + filepath = PyBytes_AS_STRING(filebytes); /* Search tail of filename in sys.path before giving up */ tail = strrchr(filepath, SEP); @@ -164,7 +168,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject * syspath = PySys_GetObject("path"); if (syspath == NULL || !PyList_Check(syspath)) - return NULL; + goto error; npath = PyList_Size(syspath); for (i = 0; i < npath; i++) { @@ -175,14 +179,20 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject * } if (!PyUnicode_Check(v)) continue; - path = _PyUnicode_AsStringAndSize(v, &len); + + path = PyUnicode_AsEncodedObject(v, Py_FileSystemDefaultEncoding, + "surrogateescape"); if (path == NULL) { PyErr_Clear(); continue; } - if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1) + len = PyBytes_GET_SIZE(path); + if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1) { + Py_DECREF(path); continue; /* Too long */ - strcpy(namebuf, path); + } + strcpy(namebuf, PyBytes_AS_STRING(path)); + Py_DECREF(path); if (strlen(namebuf) != len) continue; /* v contains '\0' */ if (len > 0 && namebuf[len-1] != SEP) @@ -190,11 +200,19 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject * strcpy(namebuf+len, tail); binary = PyObject_CallMethod(io, "open", "ss", namebuf, "rb"); - if (binary != NULL) - return binary; + if (binary != NULL) { + result = binary; + goto finally; + } PyErr_Clear(); } - return NULL; + goto error; + +error: + result = NULL; +finally: + Py_DECREF(filebytes); + return result; } int -- cgit v0.12