diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2012-04-20 12:36:47 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2012-04-20 12:36:47 (GMT) |
commit | 63674f4b52aa7c2832fec09a026e24cd521e491b (patch) | |
tree | 666adcb3196093e019e256d5ab9408df9436ea22 /Lib/tokenize.py | |
parent | 8e6e0fdb7fee3796df8b578c1311b5e46005f2d9 (diff) | |
download | cpython-63674f4b52aa7c2832fec09a026e24cd521e491b.zip cpython-63674f4b52aa7c2832fec09a026e24cd521e491b.tar.gz cpython-63674f4b52aa7c2832fec09a026e24cd521e491b.tar.bz2 |
Issue #14629: Raise SyntaxError in tokenizer.detect_encoding
if the first two lines have non-UTF-8 characters without an encoding declaration.
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r-- | Lib/tokenize.py | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py index f575e9b..f283c6d 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -292,9 +292,12 @@ def detect_encoding(readline): def find_cookie(line): try: - line_string = line.decode('ascii') + # Decode as UTF-8. Either the line is an encoding declaration, + # in which case it should be pure ASCII, or it must be UTF-8 + # per default encoding. + line_string = line.decode('utf-8') except UnicodeDecodeError: - return None + raise SyntaxError("invalid or missing encoding declaration") matches = cookie_re.findall(line_string) if not matches: |