Issue #18960: Fix bugs with Python source code encoding in the second line.

* The first line of Python script could be executed twice when the source encoding (not equal to 'utf-8') was specified on the second line. * Now the source encoding declaration on the second line isn't effective if the first line contains anything except a comment. * As a consequence, 'python -x' works now again with files with the source encoding declarations specified on the second file, and can be used again to make Python batch files on Windows. * The tokenize module now ignore the source encoding declaration on the second line if the first line contains anything except a comment. * IDLE now ignores the source encoding declaration on the second line if the first line contains anything except a comment. * 2to3 and the findnocoding.py script now ignore the source encoding declaration on the second line if the first line contains anything except a comment.
author: Serhiy Storchaka <storchaka@gmail.com> 2014-01-09 16:41:59 (GMT)
committer: Serhiy Storchaka <storchaka@gmail.com> 2014-01-09 16:41:59 (GMT)
commit: 7282ff6d5b56825e74c0715aea86e927d2fd339f (patch)
tree: 5bdc07d3601764c5cecdb78c276151f4ba03eef6 /Parser
parent: 766e10c4a808727ecefca7dec59819121477d27f (diff)
parent: 768c16ce0273a74fa846cc388753280b17b02cfc (diff)
download: cpython-7282ff6d5b56825e74c0715aea86e927d2fd339f.zip
cpython-7282ff6d5b56825e74c0715aea86e927d2fd339f.tar.gz
cpython-7282ff6d5b56825e74c0715aea86e927d2fd339f.tar.bz2
1 files changed, 22 insertions, 4 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 5bf7e84..0c95b63 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -283,13 +283,27 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
     char *cs;
     int r = 1;
 
-    if (tok->cont_line)
+    if (tok->cont_line) {
         /* It's a continuation line, so it can't be a coding spec. */
+        tok->read_coding_spec = 1;
         return 1;
+    }
     if (!get_coding_spec(line, &cs, size, tok))
         return 0;
-    if (!cs)
+    if (!cs) {
+        Py_ssize_t i;
+        for (i = 0; i < size; i++) {
+            if (line[i] == '#' || line[i] == '\n' || line[i] == '\r')
+                break;
+            if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') {
+                /* Stop checking coding spec after a line containing
+                 * anything except a comment. */
+                tok->read_coding_spec = 1;
+                break;
+            }
+        }
         return 1;
+    }
     tok->read_coding_spec = 1;
     if (tok->encoding == NULL) {
         assert(tok->decoding_state == STATE_RAW);
@@ -476,13 +490,17 @@ fp_setreadl(struct tok_state *tok, const char* enc)
     _Py_IDENTIFIER(open);
     _Py_IDENTIFIER(readline);
     int fd;
+    long pos;
 
     io = PyImport_ImportModuleNoBlock("io");
     if (io == NULL)
         goto cleanup;
 
     fd = fileno(tok->fp);
-    if (lseek(fd, 0, SEEK_SET) == (off_t)-1) {
+    /* Due to buffering the file offset for fd can be different from the file
+     * position of tok->fp. */
+    pos = ftell(tok->fp);
+    if (pos == -1 || lseek(fd, (off_t)pos, SEEK_SET) == (off_t)-1) {
         PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL);
         goto cleanup;
     }
@@ -752,7 +770,7 @@ decode_str(const char *input, int single, struct tok_state *tok)
     if (newl[0]) {
         if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
             return error_ret(tok);
-        if (tok->enc == NULL && newl[1]) {
+        if (tok->enc == NULL && !tok->read_coding_spec && newl[1]) {
             if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
                                    tok, buf_setreadl))
                 return error_ret(tok);
author	Serhiy Storchaka <storchaka@gmail.com>	2014-01-09 16:41:59 (GMT)
committer	Serhiy Storchaka <storchaka@gmail.com>	2014-01-09 16:41:59 (GMT)
commit	7282ff6d5b56825e74c0715aea86e927d2fd339f (patch)
tree	5bdc07d3601764c5cecdb78c276151f4ba03eef6 /Parser
parent	766e10c4a808727ecefca7dec59819121477d27f (diff)
parent	768c16ce0273a74fa846cc388753280b17b02cfc (diff)
download	cpython-7282ff6d5b56825e74c0715aea86e927d2fd339f.zip cpython-7282ff6d5b56825e74c0715aea86e927d2fd339f.tar.gz cpython-7282ff6d5b56825e74c0715aea86e927d2fd339f.tar.bz2