summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2010-10-16 13:14:10 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2010-10-16 13:14:10 (GMT)
commit4c7c8c30235e42c47500b91549c2b6154b61f883 (patch)
tree649a94a99ea257c19a3e5ba17fc05a8044459243
parent5a7913eb3bf390a2f3fd28116fc789bf2c7e4b64 (diff)
downloadcpython-4c7c8c30235e42c47500b91549c2b6154b61f883.zip
cpython-4c7c8c30235e42c47500b91549c2b6154b61f883.tar.gz
cpython-4c7c8c30235e42c47500b91549c2b6154b61f883.tar.bz2
Issue #9713, #10114: Parser functions (eg. PyParser_ASTFromFile) expects
filenames encoded to the filesystem encoding with surrogateescape error handler (to support undecodable bytes), instead of UTF-8 in strict mode.
-rw-r--r--Misc/NEWS6
-rw-r--r--Parser/tokenizer.h2
-rw-r--r--Python/ast.c7
-rw-r--r--Python/bltinmodule.c33
-rw-r--r--Python/compile.c2
-rw-r--r--Python/pythonrun.c12
-rw-r--r--Python/traceback.c35
7 files changed, 69 insertions, 28 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index 01a2442..b32f485 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@ What's New in Python 3.2 Beta 1?
Core and Builtins
-----------------
+- Issue #9713, #10114: Parser functions (eg. PyParser_ASTFromFile) expects
+ filenames encoded to the filesystem encoding with surrogateescape error
+ handler (to support undecodable bytes), instead of UTF-8 in strict mode.
+
- Issue #9997: Don't let the name "top" have special significance in scope
resolution.
@@ -39,7 +43,7 @@ Library
XML namespace attribute is encountered.
- Issue #2830: Add the ``html.escape()`` function, which quotes all problematic
- characters by default. Deprecate ``cgi.escape()``.
+ characters by default. Deprecate ``cgi.escape()``.
- Issue #9409: Fix the regex to match all kind of filenames, for interactive
debugging in doctests.
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index d9866f6..424567d 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -40,7 +40,7 @@ struct tok_state {
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */
- const char *filename; /* For error messages */
+ const char *filename; /* encoded to the filesystem encoding */
int altwarning; /* Issue warning if alternate tabs don't match */
int alterror; /* Issue error if alternate tabs don't match */
int alttabsize; /* Alternate tab spacing */
diff --git a/Python/ast.c b/Python/ast.c
index 38643f6..b9beef8 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -102,6 +102,7 @@ static void
ast_error_finish(const char *filename)
{
PyObject *type, *value, *tback, *errstr, *offset, *loc, *tmp;
+ PyObject *filename_obj;
long lineno;
assert(PyErr_Occurred());
@@ -130,7 +131,11 @@ ast_error_finish(const char *filename)
Py_INCREF(Py_None);
loc = Py_None;
}
- tmp = Py_BuildValue("(zlOO)", filename, lineno, offset, loc);
+ filename_obj = PyUnicode_DecodeFSDefault(filename);
+ if (filename_obj != NULL)
+ tmp = Py_BuildValue("(NlOO)", filename_obj, lineno, offset, loc);
+ else
+ tmp = NULL;
Py_DECREF(loc);
if (!tmp) {
Py_DECREF(errstr);
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 2e8d6e2..ece2a37 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -524,6 +524,7 @@ static PyObject *
builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
{
char *str;
+ PyObject *filename_obj;
char *filename;
char *startstr;
int mode = -1;
@@ -535,12 +536,16 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
static char *kwlist[] = {"source", "filename", "mode", "flags",
"dont_inherit", NULL};
int start[] = {Py_file_input, Py_eval_input, Py_single_input};
+ PyObject *result;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oss|ii:compile",
- kwlist, &cmd, &filename, &startstr,
- &supplied_flags, &dont_inherit))
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&s|ii:compile", kwlist,
+ &cmd,
+ PyUnicode_FSConverter, &filename_obj,
+ &startstr, &supplied_flags,
+ &dont_inherit))
return NULL;
+ filename = PyBytes_AS_STRING(filename_obj);
cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8;
if (supplied_flags &
@@ -548,7 +553,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
{
PyErr_SetString(PyExc_ValueError,
"compile(): unrecognised flags");
- return NULL;
+ goto error;
}
/* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */
@@ -565,14 +570,13 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
else {
PyErr_SetString(PyExc_ValueError,
"compile() arg 3 must be 'exec', 'eval' or 'single'");
- return NULL;
+ goto error;
}
is_ast = PyAST_Check(cmd);
if (is_ast == -1)
- return NULL;
+ goto error;
if (is_ast) {
- PyObject *result;
if (supplied_flags & PyCF_ONLY_AST) {
Py_INCREF(cmd);
result = cmd;
@@ -585,20 +589,27 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
mod = PyAST_obj2mod(cmd, arena, mode);
if (mod == NULL) {
PyArena_Free(arena);
- return NULL;
+ goto error;
}
result = (PyObject*)PyAST_Compile(mod, filename,
&cf, arena);
PyArena_Free(arena);
}
- return result;
+ goto finally;
}
str = source_as_string(cmd, "compile", "string, bytes, AST or code", &cf);
if (str == NULL)
- return NULL;
+ goto error;
- return Py_CompileStringFlags(str, filename, start[mode], &cf);
+ result = Py_CompileStringFlags(str, filename, start[mode], &cf);
+ goto finally;
+
+error:
+ result = NULL;
+finally:
+ Py_DECREF(filename_obj);
+ return result;
}
PyDoc_STRVAR(compile_doc,
diff --git a/Python/compile.c b/Python/compile.c
index d29e48c..1ff0859 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -3942,7 +3942,7 @@ makecode(struct compiler *c, struct assembler *a)
freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars));
if (!freevars)
goto error;
- filename = PyUnicode_FromString(c->c_filename);
+ filename = PyUnicode_DecodeFSDefault(c->c_filename);
if (!filename)
goto error;
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index 73fef75..8c535fd 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -1213,7 +1213,7 @@ PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit,
d = PyModule_GetDict(m);
if (PyDict_GetItemString(d, "__file__") == NULL) {
PyObject *f;
- f = PyUnicode_FromString(filename);
+ f = PyUnicode_DecodeFSDefault(filename);
if (f == NULL)
return -1;
if (PyDict_SetItemString(d, "__file__", f) < 0) {
@@ -1968,7 +1968,9 @@ err_input(perrdetail *err)
{
PyObject *v, *w, *errtype, *errtext;
PyObject *msg_obj = NULL;
+ PyObject *filename;
char *msg = NULL;
+
errtype = PyExc_SyntaxError;
switch (err->error) {
case E_ERROR:
@@ -2052,8 +2054,12 @@ err_input(perrdetail *err)
errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text),
"replace");
}
- v = Py_BuildValue("(ziiN)", err->filename,
- err->lineno, err->offset, errtext);
+ filename = PyUnicode_DecodeFSDefault(err->filename);
+ if (filename != NULL)
+ v = Py_BuildValue("(NiiN)", filename,
+ err->lineno, err->offset, errtext);
+ else
+ v = NULL;
if (v != NULL) {
if (msg_obj)
w = Py_BuildValue("(OO)", msg_obj, v);
diff --git a/Python/traceback.c b/Python/traceback.c
index 558755d..ab10cfd 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -142,16 +142,19 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
Py_ssize_t npath;
size_t taillen;
PyObject *syspath;
- const char* path;
+ PyObject *path;
const char* tail;
+ PyObject *filebytes;
const char* filepath;
Py_ssize_t len;
+ PyObject* result;
- filepath = _PyUnicode_AsString(filename);
- if (filepath == NULL) {
+ filebytes = PyUnicode_EncodeFSDefault(filename);
+ if (filebytes == NULL) {
PyErr_Clear();
return NULL;
}
+ filepath = PyBytes_AS_STRING(filebytes);
/* Search tail of filename in sys.path before giving up */
tail = strrchr(filepath, SEP);
@@ -163,7 +166,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
syspath = PySys_GetObject("path");
if (syspath == NULL || !PyList_Check(syspath))
- return NULL;
+ goto error;
npath = PyList_Size(syspath);
for (i = 0; i < npath; i++) {
@@ -174,14 +177,18 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
}
if (!PyUnicode_Check(v))
continue;
- path = _PyUnicode_AsStringAndSize(v, &len);
+ path = PyUnicode_EncodeFSDefault(v);
if (path == NULL) {
PyErr_Clear();
continue;
}
- if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1)
+ len = PyBytes_GET_SIZE(path);
+ if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1) {
+ Py_DECREF(path);
continue; /* Too long */
- strcpy(namebuf, path);
+ }
+ strcpy(namebuf, PyBytes_AS_STRING(path));
+ Py_DECREF(path);
if (strlen(namebuf) != len)
continue; /* v contains '\0' */
if (len > 0 && namebuf[len-1] != SEP)
@@ -189,11 +196,19 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
strcpy(namebuf+len, tail);
binary = PyObject_CallMethod(io, "open", "ss", namebuf, "rb");
- if (binary != NULL)
- return binary;
+ if (binary != NULL) {
+ result = binary;
+ goto finally;
+ }
PyErr_Clear();
}
- return NULL;
+ goto error;
+
+error:
+ result = NULL;
+finally:
+ Py_DECREF(filebytes);
+ return result;
}
int