Issue #9319: Include the filename in "Non-UTF8 code ..." syntax error.

author: Victor Stinner <victor.stinner@haypocalc.com> 2011-04-04 23:48:03 (GMT)
committer: Victor Stinner <victor.stinner@haypocalc.com> 2011-04-04 23:48:03 (GMT)
commit: fe7c5b5bdf7c21551b56be563fc604f2d4d3c756 (patch)
tree: 831d9e33e02ad3e1c9bf2d0c113a9de8cdad5770 /Parser
parent: 7f2fee36401f7b987a368fe043637b3ae7116600 (diff)
download: cpython-fe7c5b5bdf7c21551b56be563fc604f2d4d3c756.zip
cpython-fe7c5b5bdf7c21551b56be563fc604f2d4d3c756.tar.gz
cpython-fe7c5b5bdf7c21551b56be563fc604f2d4d3c756.tar.bz2
2 files changed, 27 insertions, 15 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 5edd958..f4d7e3f 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1690,17 +1690,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
     return result;
 }
 
-/* Get -*- encoding -*- from a Python file.
+/* Get the encoding of a Python file. Check for the coding cookie and check if
+   the file starts with a BOM.
 
-   PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
-   the first or second line of the file (in which case the encoding
-   should be assumed to be PyUnicode_GetDefaultEncoding()).
+   PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
+   encoding in the first or second line of the file (in which case the encoding
+   should be assumed to be UTF-8).
+
+   The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed
+   by the caller. */
 
-   The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
-   by the caller.
-*/
 char *
-PyTokenizer_FindEncoding(int fd)
+PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
 {
     struct tok_state *tok;
     FILE *fp;
@@ -1720,9 +1721,18 @@ PyTokenizer_FindEncoding(int fd)
         return NULL;
     }
 #ifndef PGEN
-    tok->filename = PyUnicode_FromString("<string>");
-    if (tok->filename == NULL)
-        goto error;
+    if (filename != NULL) {
+        Py_INCREF(filename);
+        tok->filename = filename;
+    }
+    else {
+        tok->filename = PyUnicode_FromString("<string>");
+        if (tok->filename == NULL) {
+            fclose(fp);
+            PyTokenizer_Free(tok);
+            return encoding;
+        }
+    }
 #endif
     while (tok->lineno < 2 && tok->done == E_OK) {
         PyTokenizer_Get(tok, &p_start, &p_end);
@@ -1733,13 +1743,16 @@ PyTokenizer_FindEncoding(int fd)
         if (encoding)
         strcpy(encoding, tok->encoding);
     }
-#ifndef PGEN
-error:
-#endif
     PyTokenizer_Free(tok);
     return encoding;
 }
 
+char *
+PyTokenizer_FindEncoding(int fd)
+{
+    return PyTokenizer_FindEncodingFilename(fd, NULL);
+}
+
 #ifdef Py_DEBUG
 
 void
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 3a0d3cb..ed1f3aa 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -75,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *);
 extern int PyTokenizer_Get(struct tok_state *, char **, char **);
 extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
                                           int len, int *offset);
-extern char * PyTokenizer_FindEncoding(int);
 
 #ifdef __cplusplus
 }
author	Victor Stinner <victor.stinner@haypocalc.com>	2011-04-04 23:48:03 (GMT)
committer	Victor Stinner <victor.stinner@haypocalc.com>	2011-04-04 23:48:03 (GMT)
commit	fe7c5b5bdf7c21551b56be563fc604f2d4d3c756 (patch)
tree	831d9e33e02ad3e1c9bf2d0c113a9de8cdad5770 /Parser
parent	7f2fee36401f7b987a368fe043637b3ae7116600 (diff)
download	cpython-fe7c5b5bdf7c21551b56be563fc604f2d4d3c756.zip cpython-fe7c5b5bdf7c21551b56be563fc604f2d4d3c756.tar.gz cpython-fe7c5b5bdf7c21551b56be563fc604f2d4d3c756.tar.bz2