summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2016-03-20 21:36:29 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2016-03-20 21:36:29 (GMT)
commite431d3c9aadb52dd1eea4d1e606e94f1c8471459 (patch)
tree5154063f2e6048881fa88675efd6dfb4bd2f5c4f
parent97eee1cfda602df25866a6a62796f675caf52323 (diff)
downloadcpython-e431d3c9aadb52dd1eea4d1e606e94f1c8471459.zip
cpython-e431d3c9aadb52dd1eea4d1e606e94f1c8471459.tar.gz
cpython-e431d3c9aadb52dd1eea4d1e606e94f1c8471459.tar.bz2
Issue #26581: Use the first coding cookie on a line, not the last one.
-rw-r--r--Lib/idlelib/IOBinding.py2
-rw-r--r--Lib/lib2to3/pgen2/tokenize.py2
-rw-r--r--Lib/test/test_importlib/source/test_source_encoding.py2
-rw-r--r--Lib/test/test_source_encoding.py2
-rw-r--r--Lib/tokenize.py2
-rw-r--r--Misc/NEWS3
-rw-r--r--Parser/tokenizer.c1
-rwxr-xr-xTools/scripts/findnocoding.py2
8 files changed, 10 insertions, 6 deletions
diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py
index a4cc205..84f39a2 100644
--- a/Lib/idlelib/IOBinding.py
+++ b/Lib/idlelib/IOBinding.py
@@ -62,7 +62,7 @@ locale_encoding = locale_encoding.lower()
encoding = locale_encoding ### KBK 07Sep07 This is used all over IDLE, check!
### 'encoding' is used below in encode(), check!
-coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+coding_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
def coding_spec(data):
diff --git a/Lib/lib2to3/pgen2/tokenize.py b/Lib/lib2to3/pgen2/tokenize.py
index 1ff1c61..d14db60 100644
--- a/Lib/lib2to3/pgen2/tokenize.py
+++ b/Lib/lib2to3/pgen2/tokenize.py
@@ -236,7 +236,7 @@ class Untokenizer:
startline = False
toks_append(tokval)
-cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
def _get_normal_name(orig_enc):
diff --git a/Lib/test/test_importlib/source/test_source_encoding.py b/Lib/test/test_importlib/source/test_source_encoding.py
index b604afb..1e0771b 100644
--- a/Lib/test/test_importlib/source/test_source_encoding.py
+++ b/Lib/test/test_importlib/source/test_source_encoding.py
@@ -14,7 +14,7 @@ import unittest
import warnings
-CODING_RE = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+CODING_RE = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
class EncodingTest:
diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py
index 7979c82..3873400 100644
--- a/Lib/test/test_source_encoding.py
+++ b/Lib/test/test_source_encoding.py
@@ -178,7 +178,7 @@ class AbstractSourceEncodingTest:
def test_double_coding_same_line(self):
src = (b'#coding:iso8859-15 coding:latin1\n'
b'print(ascii("\xc3\xa4"))\n')
- self.check_script_output(src, br"'\xc3\xa4'")
+ self.check_script_output(src, br"'\xc3\u20ac'")
def test_first_non_utf8_coding_line(self):
src = (b'#coding:iso-8859-15 \xa4\n'
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 9fd676c..b1d0c83 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -33,7 +33,7 @@ import re
import sys
from token import *
-cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
import token
diff --git a/Misc/NEWS b/Misc/NEWS
index bdcfebf..b5672c2 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ Release date: tba
Core and Builtins
-----------------
+- Issue #26581: If coding cookie is specified multiple times on a line in
+ Python source code file, only the first one is taken to account.
+
- Issue #26464: Fix str.translate() when string is ASCII and first replacements
removes character, but next replacement uses a non-ASCII character or a
string longer than 1 character. Regression introduced in Python 3.5.0.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 1cdbae2..50ce2e8 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -275,6 +275,7 @@ get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *t
return 0;
}
*spec = r;
+ break;
}
}
}
diff --git a/Tools/scripts/findnocoding.py b/Tools/scripts/findnocoding.py
index 5f3795e..6c16b1c 100755
--- a/Tools/scripts/findnocoding.py
+++ b/Tools/scripts/findnocoding.py
@@ -32,7 +32,7 @@ except ImportError:
"no sophisticated Python source file search will be done.", file=sys.stderr)
-decl_re = re.compile(rb'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
+decl_re = re.compile(rb'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
blank_re = re.compile(rb'^[ \t\f]*(?:[#\r\n]|$)')
def get_declaration(line):