diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-01-09 16:36:09 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-01-09 16:36:09 (GMT) |
commit | 768c16ce0273a74fa846cc388753280b17b02cfc (patch) | |
tree | d2fc7f94a08fb20f882e3e0b299a59fea1251aa8 /Parser | |
parent | 21e7d4cd5eb5a1ee153baf4c7915db80e6ca59e1 (diff) | |
download | cpython-768c16ce0273a74fa846cc388753280b17b02cfc.zip cpython-768c16ce0273a74fa846cc388753280b17b02cfc.tar.gz cpython-768c16ce0273a74fa846cc388753280b17b02cfc.tar.bz2 |
Issue #18960: Fix bugs with Python source code encoding in the second line.
* The first line of Python script could be executed twice when the source
encoding (not equal to 'utf-8') was specified on the second line.
* Now the source encoding declaration on the second line isn't effective if
the first line contains anything except a comment.
* As a consequence, 'python -x' works now again with files with the source
encoding declarations specified on the second file, and can be used again
to make Python batch files on Windows.
* The tokenize module now ignore the source encoding declaration on the second
line if the first line contains anything except a comment.
* IDLE now ignores the source encoding declaration on the second line if the
first line contains anything except a comment.
* 2to3 and the findnocoding.py script now ignore the source encoding
declaration on the second line if the first line contains anything except
a comment.
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/tokenizer.c | 26 |
1 files changed, 22 insertions, 4 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 5c0bd6e..a69d788 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -283,13 +283,27 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, char *cs; int r = 1; - if (tok->cont_line) + if (tok->cont_line) { /* It's a continuation line, so it can't be a coding spec. */ + tok->read_coding_spec = 1; return 1; + } if (!get_coding_spec(line, &cs, size, tok)) return 0; - if (!cs) + if (!cs) { + Py_ssize_t i; + for (i = 0; i < size; i++) { + if (line[i] == '#' || line[i] == '\n' || line[i] == '\r') + break; + if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') { + /* Stop checking coding spec after a line containing + * anything except a comment. */ + tok->read_coding_spec = 1; + break; + } + } return 1; + } tok->read_coding_spec = 1; if (tok->encoding == NULL) { assert(tok->decoding_state == STATE_RAW); @@ -476,13 +490,17 @@ fp_setreadl(struct tok_state *tok, const char* enc) _Py_IDENTIFIER(open); _Py_IDENTIFIER(readline); int fd; + long pos; io = PyImport_ImportModuleNoBlock("io"); if (io == NULL) goto cleanup; fd = fileno(tok->fp); - if (lseek(fd, 0, SEEK_SET) == (off_t)-1) { + /* Due to buffering the file offset for fd can be different from the file + * position of tok->fp. */ + pos = ftell(tok->fp); + if (pos == -1 || lseek(fd, (off_t)pos, SEEK_SET) == (off_t)-1) { PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL); goto cleanup; } @@ -751,7 +769,7 @@ decode_str(const char *input, int single, struct tok_state *tok) if (newl[0]) { if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) return error_ret(tok); - if (tok->enc == NULL && newl[1]) { + if (tok->enc == NULL && !tok->read_coding_spec && newl[1]) { if (!check_coding_spec(newl[0]+1, newl[1] - newl[0], tok, buf_setreadl)) return error_ret(tok); |