summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2002-08-24 06:54:19 (GMT)
committerGuido van Rossum <guido@python.org>2002-08-24 06:54:19 (GMT)
commit9d6897accc49f40414fbecafeb1c65562c6e4647 (patch)
treee8c26f80a6e2dc56004450ea807c56e7e3e813b2
parent6248f441ea3ca34ed3306eb8634e6815a42611b4 (diff)
downloadcpython-9d6897accc49f40414fbecafeb1c65562c6e4647.zip
cpython-9d6897accc49f40414fbecafeb1c65562c6e4647.tar.gz
cpython-9d6897accc49f40414fbecafeb1c65562c6e4647.tar.bz2
Speed up the most egregious "if token in (long tuple)" cases by using
a dict instead. (Alas, using a Set would be slower instead of faster.)
-rw-r--r--Lib/tokenize.py29
1 files changed, 19 insertions, 10 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 22f28c4..76ea7a2 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -110,6 +110,21 @@ endprogs = {"'": re.compile(Single), '"': re.compile(Double),
"UR'''": single3prog, 'UR"""': double3prog,
'r': None, 'R': None, 'u': None, 'U': None}
+triple_quoted = {}
+for t in ("'''", '"""',
+ "r'''", 'r"""', "R'''", 'R"""',
+ "u'''", 'u"""', "U'''", 'U"""',
+ "ur'''", 'ur"""', "Ur'''", 'Ur"""',
+ "uR'''", 'uR"""', "UR'''", 'UR"""'):
+ triple_quoted[t] = t
+single_quoted = {}
+for t in ("'", '"',
+ "r'", 'r"', "R'", 'R"',
+ "u'", 'u"', "U'", 'U"',
+ "ur'", 'ur"', "Ur'", 'Ur"',
+ "uR'", 'uR"', "UR'", 'UR"' ):
+ single_quoted[t] = t
+
tabsize = 8
class TokenError(Exception): pass
@@ -232,11 +247,7 @@ def generate_tokens(readline):
token, spos, epos, line)
elif initial == '#':
yield (COMMENT, token, spos, epos, line)
- elif token in ("'''", '"""', # triple-quoted
- "r'''", 'r"""', "R'''", 'R"""',
- "u'''", 'u"""', "U'''", 'U"""',
- "ur'''", 'ur"""', "Ur'''", 'Ur"""',
- "uR'''", 'uR"""', "UR'''", 'UR"""'):
+ elif token in triple_quoted:
endprog = endprogs[token]
endmatch = endprog.match(line, pos)
if endmatch: # all on one line
@@ -248,11 +259,9 @@ def generate_tokens(readline):
contstr = line[start:]
contline = line
break
- elif initial in ("'", '"') or \
- token[:2] in ("r'", 'r"', "R'", 'R"',
- "u'", 'u"', "U'", 'U"') or \
- token[:3] in ("ur'", 'ur"', "Ur'", 'Ur"',
- "uR'", 'uR"', "UR'", 'UR"' ):
+ elif initial in single_quoted or \
+ token[:2] in single_quoted or \
+ token[:3] in single_quoted:
if token[-1] == '\n': # continued string
strstart = (lnum, start)
endprog = (endprogs[initial] or endprogs[token[1]] or