summaryrefslogtreecommitdiffstats
path: root/Parser/tokenizer.c
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-01-09 16:36:09 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2014-01-09 16:36:09 (GMT)
commit768c16ce0273a74fa846cc388753280b17b02cfc (patch)
treed2fc7f94a08fb20f882e3e0b299a59fea1251aa8 /Parser/tokenizer.c
parent21e7d4cd5eb5a1ee153baf4c7915db80e6ca59e1 (diff)
downloadcpython-768c16ce0273a74fa846cc388753280b17b02cfc.zip
cpython-768c16ce0273a74fa846cc388753280b17b02cfc.tar.gz
cpython-768c16ce0273a74fa846cc388753280b17b02cfc.tar.bz2
Issue #18960: Fix bugs with Python source code encoding in the second line.
* The first line of Python script could be executed twice when the source encoding (not equal to 'utf-8') was specified on the second line. * Now the source encoding declaration on the second line isn't effective if the first line contains anything except a comment. * As a consequence, 'python -x' works now again with files with the source encoding declarations specified on the second file, and can be used again to make Python batch files on Windows. * The tokenize module now ignore the source encoding declaration on the second line if the first line contains anything except a comment. * IDLE now ignores the source encoding declaration on the second line if the first line contains anything except a comment. * 2to3 and the findnocoding.py script now ignore the source encoding declaration on the second line if the first line contains anything except a comment.
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r--Parser/tokenizer.c26
1 files changed, 22 insertions, 4 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 5c0bd6e..a69d788 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -283,13 +283,27 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
char *cs;
int r = 1;
- if (tok->cont_line)
+ if (tok->cont_line) {
/* It's a continuation line, so it can't be a coding spec. */
+ tok->read_coding_spec = 1;
return 1;
+ }
if (!get_coding_spec(line, &cs, size, tok))
return 0;
- if (!cs)
+ if (!cs) {
+ Py_ssize_t i;
+ for (i = 0; i < size; i++) {
+ if (line[i] == '#' || line[i] == '\n' || line[i] == '\r')
+ break;
+ if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') {
+ /* Stop checking coding spec after a line containing
+ * anything except a comment. */
+ tok->read_coding_spec = 1;
+ break;
+ }
+ }
return 1;
+ }
tok->read_coding_spec = 1;
if (tok->encoding == NULL) {
assert(tok->decoding_state == STATE_RAW);
@@ -476,13 +490,17 @@ fp_setreadl(struct tok_state *tok, const char* enc)
_Py_IDENTIFIER(open);
_Py_IDENTIFIER(readline);
int fd;
+ long pos;
io = PyImport_ImportModuleNoBlock("io");
if (io == NULL)
goto cleanup;
fd = fileno(tok->fp);
- if (lseek(fd, 0, SEEK_SET) == (off_t)-1) {
+ /* Due to buffering the file offset for fd can be different from the file
+ * position of tok->fp. */
+ pos = ftell(tok->fp);
+ if (pos == -1 || lseek(fd, (off_t)pos, SEEK_SET) == (off_t)-1) {
PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL);
goto cleanup;
}
@@ -751,7 +769,7 @@ decode_str(const char *input, int single, struct tok_state *tok)
if (newl[0]) {
if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
return error_ret(tok);
- if (tok->enc == NULL && newl[1]) {
+ if (tok->enc == NULL && !tok->read_coding_spec && newl[1]) {
if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
tok, buf_setreadl))
return error_ret(tok);