summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-04-04 23:48:03 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-04-04 23:48:03 (GMT)
commitfe7c5b5bdf7c21551b56be563fc604f2d4d3c756 (patch)
tree831d9e33e02ad3e1c9bf2d0c113a9de8cdad5770 /Parser
parent7f2fee36401f7b987a368fe043637b3ae7116600 (diff)
downloadcpython-fe7c5b5bdf7c21551b56be563fc604f2d4d3c756.zip
cpython-fe7c5b5bdf7c21551b56be563fc604f2d4d3c756.tar.gz
cpython-fe7c5b5bdf7c21551b56be563fc604f2d4d3c756.tar.bz2
Issue #9319: Include the filename in "Non-UTF8 code ..." syntax error.
Diffstat (limited to 'Parser')
-rw-r--r--Parser/tokenizer.c41
-rw-r--r--Parser/tokenizer.h1
2 files changed, 27 insertions, 15 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 5edd958..f4d7e3f 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1690,17 +1690,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
return result;
}
-/* Get -*- encoding -*- from a Python file.
+/* Get the encoding of a Python file. Check for the coding cookie and check if
+ the file starts with a BOM.
- PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
- the first or second line of the file (in which case the encoding
- should be assumed to be PyUnicode_GetDefaultEncoding()).
+ PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
+ encoding in the first or second line of the file (in which case the encoding
+ should be assumed to be UTF-8).
+
+ The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed
+ by the caller. */
- The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
- by the caller.
-*/
char *
-PyTokenizer_FindEncoding(int fd)
+PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
{
struct tok_state *tok;
FILE *fp;
@@ -1720,9 +1721,18 @@ PyTokenizer_FindEncoding(int fd)
return NULL;
}
#ifndef PGEN
- tok->filename = PyUnicode_FromString("<string>");
- if (tok->filename == NULL)
- goto error;
+ if (filename != NULL) {
+ Py_INCREF(filename);
+ tok->filename = filename;
+ }
+ else {
+ tok->filename = PyUnicode_FromString("<string>");
+ if (tok->filename == NULL) {
+ fclose(fp);
+ PyTokenizer_Free(tok);
+ return encoding;
+ }
+ }
#endif
while (tok->lineno < 2 && tok->done == E_OK) {
PyTokenizer_Get(tok, &p_start, &p_end);
@@ -1733,13 +1743,16 @@ PyTokenizer_FindEncoding(int fd)
if (encoding)
strcpy(encoding, tok->encoding);
}
-#ifndef PGEN
-error:
-#endif
PyTokenizer_Free(tok);
return encoding;
}
+char *
+PyTokenizer_FindEncoding(int fd)
+{
+ return PyTokenizer_FindEncodingFilename(fd, NULL);
+}
+
#ifdef Py_DEBUG
void
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 3a0d3cb..ed1f3aa 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -75,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
int len, int *offset);
-extern char * PyTokenizer_FindEncoding(int);
#ifdef __cplusplus
}