From 729ad5cf561ba644322952b79051269f07bb1ec0 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sun, 9 Jun 2013 16:54:56 +0300
Subject: Issue #18038: SyntaxError raised during compilation sources with
 illegal encoding now always contains an encoding name.

---
 Lib/test/test_pep263.py | 18 ++++++++++++++++++
 Misc/NEWS               |  3 +++
 Parser/tokenizer.c      | 14 +++++++-------
 3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py
index 9286467..4b60624 100644
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
@@ -41,6 +41,24 @@ class PEP263Test(unittest.TestCase):
         # two bytes in common with the UTF-8 BOM
         self.assertRaises(SyntaxError, eval, '\xef\xbb\x20')
 
+    def test_error_message(self):
+        compile('# -*- coding: iso-8859-15 -*-\n', 'dummy', 'exec')
+        compile('\xef\xbb\xbf\n', 'dummy', 'exec')
+        compile('\xef\xbb\xbf# -*- coding: utf-8 -*-\n', 'dummy', 'exec')
+        with self.assertRaisesRegexp(SyntaxError, 'fake'):
+            compile('# -*- coding: fake -*-\n', 'dummy', 'exec')
+        with self.assertRaisesRegexp(SyntaxError, 'iso-8859-15'):
+            compile('\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n',
+                    'dummy', 'exec')
+        with self.assertRaisesRegexp(SyntaxError, 'BOM'):
+            compile('\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n',
+                    'dummy', 'exec')
+        with self.assertRaisesRegexp(SyntaxError, 'fake'):
+            compile('\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec')
+        with self.assertRaisesRegexp(SyntaxError, 'BOM'):
+            compile('\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec')
+
+
 def test_main():
     test_support.run_unittest(PEP263Test)
 
diff --git a/Misc/NEWS b/Misc/NEWS
index 93d2c09..784011a 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,9 @@ What's New in Python 2.7.6?
 Core and Builtins
 -----------------
 
+- Issue #18038: SyntaxError raised during compilation sources with illegal
+  encoding now always contains an encoding name.
+
 - Issue #18019: Fix crash in the repr of dictionaries containing their own
   views.
 
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index ee6313b..46cf9b2 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -277,8 +277,11 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
                     tok->encoding = cs;
                     tok->decoding_state = -1;
                 }
-                else
+                else {
+                    PyErr_Format(PyExc_SyntaxError,
+                                 "encoding problem: %s", cs);
                     PyMem_FREE(cs);
+                }
 #else
                 /* Without Unicode support, we cannot
                    process the coding spec. Since there
@@ -289,15 +292,12 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
             }
         } else {                /* then, compare cs with BOM */
             r = (strcmp(tok->encoding, cs) == 0);
+            if (!r)
+                PyErr_Format(PyExc_SyntaxError,
+                             "encoding problem: %s with BOM", cs);
             PyMem_FREE(cs);
         }
     }
-    if (!r) {
-        cs = tok->encoding;
-        if (!cs)
-            cs = "with BOM";
-        PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);
-    }
     return r;
 }
 
-- 
cgit v0.12