diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-04-04 23:48:03 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-04-04 23:48:03 (GMT) |
commit | fe7c5b5bdf7c21551b56be563fc604f2d4d3c756 (patch) | |
tree | 831d9e33e02ad3e1c9bf2d0c113a9de8cdad5770 /Parser | |
parent | 7f2fee36401f7b987a368fe043637b3ae7116600 (diff) | |
download | cpython-fe7c5b5bdf7c21551b56be563fc604f2d4d3c756.zip cpython-fe7c5b5bdf7c21551b56be563fc604f2d4d3c756.tar.gz cpython-fe7c5b5bdf7c21551b56be563fc604f2d4d3c756.tar.bz2 |
Issue #9319: Include the filename in "Non-UTF8 code ..." syntax error.
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/tokenizer.c | 41 | ||||
-rw-r--r-- | Parser/tokenizer.h | 1 |
2 files changed, 27 insertions, 15 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 5edd958..f4d7e3f 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1690,17 +1690,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) return result; } -/* Get -*- encoding -*- from a Python file. +/* Get the encoding of a Python file. Check for the coding cookie and check if + the file starts with a BOM. - PyTokenizer_FindEncoding returns NULL when it can't find the encoding in - the first or second line of the file (in which case the encoding - should be assumed to be PyUnicode_GetDefaultEncoding()). + PyTokenizer_FindEncodingFilename() returns NULL when it can't find the + encoding in the first or second line of the file (in which case the encoding + should be assumed to be UTF-8). + + The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed + by the caller. */ - The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed - by the caller. -*/ char * -PyTokenizer_FindEncoding(int fd) +PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) { struct tok_state *tok; FILE *fp; @@ -1720,9 +1721,18 @@ PyTokenizer_FindEncoding(int fd) return NULL; } #ifndef PGEN - tok->filename = PyUnicode_FromString("<string>"); - if (tok->filename == NULL) - goto error; + if (filename != NULL) { + Py_INCREF(filename); + tok->filename = filename; + } + else { + tok->filename = PyUnicode_FromString("<string>"); + if (tok->filename == NULL) { + fclose(fp); + PyTokenizer_Free(tok); + return encoding; + } + } #endif while (tok->lineno < 2 && tok->done == E_OK) { PyTokenizer_Get(tok, &p_start, &p_end); @@ -1733,13 +1743,16 @@ PyTokenizer_FindEncoding(int fd) if (encoding) strcpy(encoding, tok->encoding); } -#ifndef PGEN -error: -#endif PyTokenizer_Free(tok); return encoding; } +char * +PyTokenizer_FindEncoding(int fd) +{ + return PyTokenizer_FindEncodingFilename(fd, NULL); +} + #ifdef Py_DEBUG void diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 3a0d3cb..ed1f3aa 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -75,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *); extern int PyTokenizer_Get(struct tok_state *, char **, char **); extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset); -extern char * PyTokenizer_FindEncoding(int); #ifdef __cplusplus } |