From cfbbf48e3e60438036f18f0376d7deb226873a8f Mon Sep 17 00:00:00 2001
From: Guido van Rossum <guido@python.org>
Date: Sat, 4 Aug 2007 17:43:15 +0000
Subject: Make test_tokenize pass again: Add code to test_roundtrip() that
 figures out the encoding from the first two lines of the file. (We need to
 refactor this again to make it available to all places that need this, e.g.
 linecache.py.)

---
 Lib/test/test_tokenize.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 022b658..788a04b 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -80,7 +80,10 @@ if (x  # The comments need to go in the right place
 
 """
 
+# ' Emacs hint
+
 import os, glob, random, time, sys
+import re
 from io import StringIO
 from test.test_support import (verbose, findfile, is_resource_enabled,
                                TestFailed)
@@ -96,7 +99,17 @@ _PRINT_WORKING_MSG_INTERVAL = 5 * 60
 # tokenization doesn't match the first.
 def test_roundtrip(f):
     ## print 'Testing:', f
-    fobj = open(f)
+    # Get the encoding first
+    fobj = open(f, encoding="latin-1")
+    first2lines = fobj.readline() + fobj.readline()
+    fobj.close()
+    m = re.search(r"coding:\s*(\S+)", first2lines)
+    if m:
+        encoding = m.group(1)
+        print("    coding:", encoding)
+    else:
+        encoding = "utf-8"
+    fobj = open(f, encoding=encoding)
     try:
         fulltok = list(generate_tokens(fobj.readline))
     finally:
@@ -185,8 +198,6 @@ def test_main():
 
     testdir = os.path.dirname(f) or os.curdir
     testfiles = glob.glob(testdir + os.sep + 'test*.py')
-    # Exclude test_pep263 which is encoded in KOI8-R
-    testfiles = [t for t in testfiles if not t.endswith("pep263.py")]
     if not is_resource_enabled('compiler'):
         testfiles = random.sample(testfiles, 10)
 
-- 
cgit v0.12