summaryrefslogtreecommitdiffstats
path: root/Lib/tokenize.py
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2012-04-20 12:36:47 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2012-04-20 12:36:47 (GMT)
commit63674f4b52aa7c2832fec09a026e24cd521e491b (patch)
tree666adcb3196093e019e256d5ab9408df9436ea22 /Lib/tokenize.py
parent8e6e0fdb7fee3796df8b578c1311b5e46005f2d9 (diff)
downloadcpython-63674f4b52aa7c2832fec09a026e24cd521e491b.zip
cpython-63674f4b52aa7c2832fec09a026e24cd521e491b.tar.gz
cpython-63674f4b52aa7c2832fec09a026e24cd521e491b.tar.bz2
Issue #14629: Raise SyntaxError in tokenizer.detect_encoding
if the first two lines have non-UTF-8 characters without an encoding declaration.
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r--Lib/tokenize.py7
1 files changed, 5 insertions, 2 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index f575e9b..f283c6d 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -292,9 +292,12 @@ def detect_encoding(readline):
def find_cookie(line):
try:
- line_string = line.decode('ascii')
+ # Decode as UTF-8. Either the line is an encoding declaration,
+ # in which case it should be pure ASCII, or it must be UTF-8
+ # per default encoding.
+ line_string = line.decode('utf-8')
except UnicodeDecodeError:
- return None
+ raise SyntaxError("invalid or missing encoding declaration")
matches = cookie_re.findall(line_string)
if not matches: