summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2009-03-24 22:30:15 (GMT)
committerBenjamin Peterson <benjamin@python.org>2009-03-24 22:30:15 (GMT)
commit9b8d24b17de49813eb53d6f9a4d615bfac574d11 (patch)
treea79e1cea434b4681bacf5f88225bd712b400d2e6
parenta8abe863316b8f0bc92c9a490573dde67c7c81e6 (diff)
downloadcpython-9b8d24b17de49813eb53d6f9a4d615bfac574d11.zip
cpython-9b8d24b17de49813eb53d6f9a4d615bfac574d11.tar.gz
cpython-9b8d24b17de49813eb53d6f9a4d615bfac574d11.tar.bz2
reuse tokenize.detect_encoding in linecache instead of a custom solution
patch by Victor Stinner #4016
-rw-r--r--Lib/linecache.py24
-rw-r--r--Lib/tokenize.py7
2 files changed, 8 insertions, 23 deletions
diff --git a/Lib/linecache.py b/Lib/linecache.py
index 6a9535e..51404e2 100644
--- a/Lib/linecache.py
+++ b/Lib/linecache.py
@@ -7,7 +7,7 @@ that name.
import sys
import os
-import re
+import tokenize
__all__ = ["getline", "clearcache", "checkcache"]
@@ -120,27 +120,11 @@ def updatecache(filename, module_globals=None):
pass
else:
# No luck
-## print '*** Cannot stat', filename, ':', msg
return []
-## print("Refreshing cache for %s..." % fullname)
- try:
- fp = open(fullname, 'rU')
+ with open(fullname, 'rb') as fp:
+ coding, line = tokenize.detect_encoding(fp.readline)
+ with open(fullname, 'r', encoding=coding) as fp:
lines = fp.readlines()
- fp.close()
- except Exception as msg:
-## print '*** Cannot open', fullname, ':', msg
- return []
- coding = "utf-8"
- for line in lines[:2]:
- m = re.search(r"coding[:=]\s*([-\w.]+)", line)
- if m:
- coding = m.group(1)
- break
- try:
- lines = [line if isinstance(line, str) else str(line, coding)
- for line in lines]
- except:
- pass # Hope for the best
size, mtime = stat.st_size, stat.st_mtime
cache[filename] = size, mtime, lines, fullname
return lines
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 16c4f3f..4ff859d 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -27,7 +27,6 @@ __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
import re, string, sys
from token import *
from codecs import lookup, BOM_UTF8
-from itertools import chain, repeat
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
import token
@@ -327,13 +326,15 @@ def tokenize(readline):
which tells you which encoding was used to decode the bytes stream.
"""
encoding, consumed = detect_encoding(readline)
- def readline_generator():
+ def readline_generator(consumed):
+ for line in consumed:
+ yield line
while True:
try:
yield readline()
except StopIteration:
return
- chained = chain(consumed, readline_generator())
+ chained = readline_generator(consumed)
return _tokenize(chained.__next__, encoding)