Issue #14629: Mention the filename in SyntaxError exceptions from

tokenizer.detect_encoding() (when available).
author: Brett Cannon <brett@python.org> 2012-04-20 17:23:54 (GMT)
committer: Brett Cannon <brett@python.org> 2012-04-20 17:23:54 (GMT)
commit: c33f3f2339fd3217a0c6fe3df916616abab2fab4 (patch)
tree: 45b2793ea426faee0669f4a6d34387cd236aff06 /Lib
parent: dd9a56953e561076b5573d53f6e4fdd7f42b208c (diff)
download: cpython-c33f3f2339fd3217a0c6fe3df916616abab2fab4.zip
cpython-c33f3f2339fd3217a0c6fe3df916616abab2fab4.tar.gz
cpython-c33f3f2339fd3217a0c6fe3df916616abab2fab4.tar.bz2
2 files changed, 48 insertions, 3 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 11590ea..915eda9 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -904,6 +904,35 @@ class TestDetectEncoding(TestCase):
             self.assertEqual(fp.encoding, 'utf-8-sig')
             self.assertEqual(fp.mode, 'r')
 
+    def test_filename_in_exception(self):
+        # When possible, include the file name in the exception.
+        path = 'some_file_path'
+        lines = (
+            b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
+            )
+        class Bunk:
+            def __init__(self, lines, path):
+                self.name = path
+                self._lines = lines
+                self._index = 0
+
+            def readline(self):
+                if self._index == len(lines):
+                    raise StopIteration
+                line = lines[self._index]
+                self._index += 1
+                return line
+
+        with self.assertRaises(SyntaxError):
+            ins = Bunk(lines, path)
+            # Make sure lacking a name isn't an issue.
+            del ins.name
+            detect_encoding(ins.readline)
+        with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
+            ins = Bunk(lines, path)
+            detect_encoding(ins.readline)
+
+
 class TestTokenize(TestCase):
 
     def test_tokenize(self):
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index c05f764..e4c9d3c 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -353,6 +353,10 @@ def detect_encoding(readline):
 
     If no encoding is specified, then the default of 'utf-8' will be returned.
     """
+    try:
+        filename = readline.__self__.name
+    except AttributeError:
+        filename = None
     bom_found = False
     encoding = None
     default = 'utf-8'
@@ -369,7 +373,10 @@ def detect_encoding(readline):
             # per default encoding.
             line_string = line.decode('utf-8')
         except UnicodeDecodeError:
-            raise SyntaxError("invalid or missing encoding declaration")
+            msg = "invalid or missing encoding declaration"
+            if filename is not None:
+                msg = '{} for {!r}'.format(msg, filename)
+            raise SyntaxError(msg)
 
         matches = cookie_re.findall(line_string)
         if not matches:
@@ -379,12 +386,21 @@ def detect_encoding(readline):
             codec = lookup(encoding)
         except LookupError:
             # This behaviour mimics the Python interpreter
-            raise SyntaxError("unknown encoding: " + encoding)
+            if filename is None:
+                msg = "unknown encoding: " + encoding
+            else:
+                msg = "unknown encoding for {!r}: {}".format(filename,
+                        encoding)
+            raise SyntaxError(msg)
 
         if bom_found:
             if codec.name != 'utf-8':
                 # This behaviour mimics the Python interpreter
-                raise SyntaxError('encoding problem: utf-8')
+                if filename is None:
+                    msg = 'encoding problem: utf-8'
+                else:
+                    msg = 'encoding problem for {!r}: utf-8'.format(filename)
+                raise SyntaxError(msg)
             encoding += '-sig'
         return encoding
author	Brett Cannon <brett@python.org>	2012-04-20 17:23:54 (GMT)
committer	Brett Cannon <brett@python.org>	2012-04-20 17:23:54 (GMT)
commit	c33f3f2339fd3217a0c6fe3df916616abab2fab4 (patch)
tree	45b2793ea426faee0669f4a6d34387cd236aff06 /Lib
parent	dd9a56953e561076b5573d53f6e4fdd7f42b208c (diff)
download	cpython-c33f3f2339fd3217a0c6fe3df916616abab2fab4.zip cpython-c33f3f2339fd3217a0c6fe3df916616abab2fab4.tar.gz cpython-c33f3f2339fd3217a0c6fe3df916616abab2fab4.tar.bz2