summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Misc/NEWS4
-rw-r--r--Parser/tokenizer.h2
-rw-r--r--Python/ast.c12
-rw-r--r--Python/bltinmodule.c33
-rw-r--r--Python/compile.c2
-rw-r--r--Python/pythonrun.c17
-rw-r--r--Python/traceback.c38
7 files changed, 81 insertions, 27 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index dc852ee..ac86186 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@ What's New in Python 3.1.3?
Core and Builtins
-----------------
+- Issue #9713, #10114: Parser functions (eg. PyParser_ASTFromFile) expects
+ filenames encoded to the filesystem encoding with surrogateescape error
+ handler (to support undecodable bytes), instead of UTF-8 in strict mode.
+
- Issue #10006: type.__abstractmethods__ now raises an AttributeError. As a
result metaclasses can now be ABCs (see #9533).
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 5a6d060..c153cbf 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -40,7 +40,7 @@ struct tok_state {
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */
- const char *filename; /* For error messages */
+ const char *filename; /* encoded to the filesystem encoding */
int altwarning; /* Issue warning if alternate tabs don't match */
int alterror; /* Issue error if alternate tabs don't match */
int alttabsize; /* Alternate tab spacing */
diff --git a/Python/ast.c b/Python/ast.c
index 5c17133..590bc90 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -102,6 +102,7 @@ static void
ast_error_finish(const char *filename)
{
PyObject *type, *value, *tback, *errstr, *loc, *tmp;
+ PyObject *filename_obj;
long lineno;
assert(PyErr_Occurred());
@@ -125,7 +126,16 @@ ast_error_finish(const char *filename)
Py_INCREF(Py_None);
loc = Py_None;
}
- tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc);
+ if (filename != NULL)
+ filename_obj = PyUnicode_DecodeFSDefault(filename);
+ else {
+ Py_INCREF(Py_None);
+ filename_obj = Py_None;
+ }
+ if (filename_obj != NULL)
+ tmp = Py_BuildValue("(NlOO)", filename_obj, lineno, Py_None, loc);
+ else
+ tmp = NULL;
Py_DECREF(loc);
if (!tmp) {
Py_DECREF(errstr);
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index cd85156..3e44aa0 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -536,6 +536,7 @@ static PyObject *
builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
{
char *str;
+ PyObject *filename_obj;
char *filename;
char *startstr;
int mode = -1;
@@ -547,12 +548,16 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
static char *kwlist[] = {"source", "filename", "mode", "flags",
"dont_inherit", NULL};
int start[] = {Py_file_input, Py_eval_input, Py_single_input};
+ PyObject *result;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oss|ii:compile",
- kwlist, &cmd, &filename, &startstr,
- &supplied_flags, &dont_inherit))
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&s|ii:compile", kwlist,
+ &cmd,
+ PyUnicode_FSConverter, &filename_obj,
+ &startstr, &supplied_flags,
+ &dont_inherit))
return NULL;
+ filename = PyBytes_AS_STRING(filename_obj);
cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8;
if (supplied_flags &
@@ -560,7 +565,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
{
PyErr_SetString(PyExc_ValueError,
"compile(): unrecognised flags");
- return NULL;
+ goto error;
}
/* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */
@@ -577,14 +582,13 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
else {
PyErr_SetString(PyExc_ValueError,
"compile() arg 3 must be 'exec', 'eval' or 'single'");
- return NULL;
+ goto error;
}
is_ast = PyAST_Check(cmd);
if (is_ast == -1)
- return NULL;
+ goto error;
if (is_ast) {
- PyObject *result;
if (supplied_flags & PyCF_ONLY_AST) {
Py_INCREF(cmd);
result = cmd;
@@ -597,20 +601,27 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
mod = PyAST_obj2mod(cmd, arena, mode);
if (mod == NULL) {
PyArena_Free(arena);
- return NULL;
+ goto error;
}
result = (PyObject*)PyAST_Compile(mod, filename,
&cf, arena);
PyArena_Free(arena);
}
- return result;
+ goto finally;
}
str = source_as_string(cmd, "compile", "string, bytes, AST or code", &cf);
if (str == NULL)
- return NULL;
+ goto error;
- return Py_CompileStringFlags(str, filename, start[mode], &cf);
+ result = Py_CompileStringFlags(str, filename, start[mode], &cf);
+ goto finally;
+
+error:
+ result = NULL;
+finally:
+ Py_DECREF(filename_obj);
+ return result;
}
PyDoc_STRVAR(compile_doc,
diff --git a/Python/compile.c b/Python/compile.c
index a7ac5a0..19b2add 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -4046,7 +4046,7 @@ makecode(struct compiler *c, struct assembler *a)
freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars));
if (!freevars)
goto error;
- filename = PyUnicode_FromString(c->c_filename);
+ filename = PyUnicode_DecodeFSDefault(c->c_filename);
if (!filename)
goto error;
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index 84d72f0..d08d0ed 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -1160,7 +1160,7 @@ PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit,
d = PyModule_GetDict(m);
if (PyDict_GetItemString(d, "__file__") == NULL) {
PyObject *f;
- f = PyUnicode_FromString(filename);
+ f = PyUnicode_DecodeFSDefault(filename);
if (f == NULL)
return -1;
if (PyDict_SetItemString(d, "__file__", f) < 0) {
@@ -1911,7 +1911,9 @@ err_input(perrdetail *err)
{
PyObject *v, *w, *errtype, *errtext;
PyObject* u = NULL;
+ PyObject *filename;
char *msg = NULL;
+
errtype = PyExc_SyntaxError;
switch (err->error) {
case E_ERROR:
@@ -2000,8 +2002,17 @@ err_input(perrdetail *err)
errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text),
"replace");
}
- v = Py_BuildValue("(ziiN)", err->filename,
- err->lineno, err->offset, errtext);
+ if (err->filename != NULL)
+ filename = PyUnicode_DecodeFSDefault(err->filename);
+ else {
+ Py_INCREF(Py_None);
+ filename = Py_None;
+ }
+ if (filename != NULL)
+ v = Py_BuildValue("(NiiN)", filename,
+ err->lineno, err->offset, errtext);
+ else
+ v = NULL;
w = NULL;
if (v != NULL)
w = Py_BuildValue("(sO)", msg, v);
diff --git a/Python/traceback.c b/Python/traceback.c
index e74d442..2f1c213 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -143,16 +143,20 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
Py_ssize_t npath;
size_t taillen;
PyObject *syspath;
- const char* path;
+ PyObject *path;
const char* tail;
+ PyObject *filebytes;
const char* filepath;
Py_ssize_t len;
+ PyObject* result;
- filepath = _PyUnicode_AsString(filename);
- if (filepath == NULL) {
+ filebytes = PyUnicode_AsEncodedObject(filename,
+ Py_FileSystemDefaultEncoding, "surrogateescape");
+ if (filebytes == NULL) {
PyErr_Clear();
return NULL;
}
+ filepath = PyBytes_AS_STRING(filebytes);
/* Search tail of filename in sys.path before giving up */
tail = strrchr(filepath, SEP);
@@ -164,7 +168,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
syspath = PySys_GetObject("path");
if (syspath == NULL || !PyList_Check(syspath))
- return NULL;
+ goto error;
npath = PyList_Size(syspath);
for (i = 0; i < npath; i++) {
@@ -175,14 +179,20 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
}
if (!PyUnicode_Check(v))
continue;
- path = _PyUnicode_AsStringAndSize(v, &len);
+
+ path = PyUnicode_AsEncodedObject(v, Py_FileSystemDefaultEncoding,
+ "surrogateescape");
if (path == NULL) {
PyErr_Clear();
continue;
}
- if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1)
+ len = PyBytes_GET_SIZE(path);
+ if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1) {
+ Py_DECREF(path);
continue; /* Too long */
- strcpy(namebuf, path);
+ }
+ strcpy(namebuf, PyBytes_AS_STRING(path));
+ Py_DECREF(path);
if (strlen(namebuf) != len)
continue; /* v contains '\0' */
if (len > 0 && namebuf[len-1] != SEP)
@@ -190,11 +200,19 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
strcpy(namebuf+len, tail);
binary = PyObject_CallMethod(io, "open", "ss", namebuf, "rb");
- if (binary != NULL)
- return binary;
+ if (binary != NULL) {
+ result = binary;
+ goto finally;
+ }
PyErr_Clear();
}
- return NULL;
+ goto error;
+
+error:
+ result = NULL;
+finally:
+ Py_DECREF(filebytes);
+ return result;
}
int