diff options
author | Benjamin Peterson <benjamin@python.org> | 2009-03-24 22:30:15 (GMT) |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2009-03-24 22:30:15 (GMT) |
commit | 9b8d24b17de49813eb53d6f9a4d615bfac574d11 (patch) | |
tree | a79e1cea434b4681bacf5f88225bd712b400d2e6 | |
parent | a8abe863316b8f0bc92c9a490573dde67c7c81e6 (diff) | |
download | cpython-9b8d24b17de49813eb53d6f9a4d615bfac574d11.zip cpython-9b8d24b17de49813eb53d6f9a4d615bfac574d11.tar.gz cpython-9b8d24b17de49813eb53d6f9a4d615bfac574d11.tar.bz2 |
reuse tokenize.detect_encoding in linecache instead of a custom solution
patch by Victor Stinner #4016
-rw-r--r-- | Lib/linecache.py | 24 | ||||
-rw-r--r-- | Lib/tokenize.py | 7 |
2 files changed, 8 insertions, 23 deletions
diff --git a/Lib/linecache.py b/Lib/linecache.py index 6a9535e..51404e2 100644 --- a/Lib/linecache.py +++ b/Lib/linecache.py @@ -7,7 +7,7 @@ that name. import sys import os -import re +import tokenize __all__ = ["getline", "clearcache", "checkcache"] @@ -120,27 +120,11 @@ def updatecache(filename, module_globals=None): pass else: # No luck -## print '*** Cannot stat', filename, ':', msg return [] -## print("Refreshing cache for %s..." % fullname) - try: - fp = open(fullname, 'rU') + with open(fullname, 'rb') as fp: + coding, line = tokenize.detect_encoding(fp.readline) + with open(fullname, 'r', encoding=coding) as fp: lines = fp.readlines() - fp.close() - except Exception as msg: -## print '*** Cannot open', fullname, ':', msg - return [] - coding = "utf-8" - for line in lines[:2]: - m = re.search(r"coding[:=]\s*([-\w.]+)", line) - if m: - coding = m.group(1) - break - try: - lines = [line if isinstance(line, str) else str(line, coding) - for line in lines] - except: - pass # Hope for the best size, mtime = stat.st_size, stat.st_mtime cache[filename] = size, mtime, lines, fullname return lines diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 16c4f3f..4ff859d 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -27,7 +27,6 @@ __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' import re, string, sys from token import * from codecs import lookup, BOM_UTF8 -from itertools import chain, repeat cookie_re = re.compile("coding[:=]\s*([-\w.]+)") import token @@ -327,13 +326,15 @@ def tokenize(readline): which tells you which encoding was used to decode the bytes stream. """ encoding, consumed = detect_encoding(readline) - def readline_generator(): + def readline_generator(consumed): + for line in consumed: + yield line while True: try: yield readline() except StopIteration: return - chained = chain(consumed, readline_generator()) + chained = readline_generator(consumed) return _tokenize(chained.__next__, encoding) |