summaryrefslogtreecommitdiffstats
path: root/Parser/tokenizer.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2010-12-27 20:12:13 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2010-12-27 20:12:13 (GMT)
commit83098a4095a66b6562db88802fec5ea108414d58 (patch)
treec25cfb3b50826ccb72add185ab0623f516602cc0 /Parser/tokenizer.c
parentcb428f01625f99937b59431a53cc8f6013bd3cc2 (diff)
downloadcpython-83098a4095a66b6562db88802fec5ea108414d58.zip
cpython-83098a4095a66b6562db88802fec5ea108414d58.tar.gz
cpython-83098a4095a66b6562db88802fec5ea108414d58.tar.bz2
Issue #10778: decoding_fgets() decodes the filename from the filesystem
encoding instead of UTF-8.
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r--Parser/tokenizer.c17
1 files changed, 11 insertions, 6 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 28dcea1..441d05a 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -545,6 +545,7 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
{
char *line = NULL;
int badchar = 0;
+ PyObject *filename;
for (;;) {
if (tok->decoding_state == STATE_NORMAL) {
/* We already have a codec associated with
@@ -585,12 +586,16 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
if (badchar) {
/* Need to add 1 to the line number, since this line
has not been counted, yet. */
- PyErr_Format(PyExc_SyntaxError,
- "Non-UTF-8 code starting with '\\x%.2x' "
- "in file %.200s on line %i, "
- "but no encoding declared; "
- "see http://python.org/dev/peps/pep-0263/ for details",
- badchar, tok->filename, tok->lineno + 1);
+ filename = PyUnicode_DecodeFSDefault(tok->filename);
+ if (filename != NULL) {
+ PyErr_Format(PyExc_SyntaxError,
+ "Non-UTF-8 code starting with '\\x%.2x' "
+ "in file %.200U on line %i, "
+ "but no encoding declared; "
+ "see http://python.org/dev/peps/pep-0263/ for details",
+ badchar, filename, tok->lineno + 1);
+ Py_DECREF(filename);
+ }
return error_ret(tok);
}
#endif