summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2011-10-11 13:45:56 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2011-10-11 13:45:56 (GMT)
commit10a99b024df0d30911b198146d0206c8f6d0d6c7 (patch)
tree686d14ab6fbf451b81c4b952e9b2877f4b8dee32
parent699cd9f7f175b4adb18577ae2e5faed329544713 (diff)
downloadcpython-10a99b024df0d30911b198146d0206c8f6d0d6c7.zip
cpython-10a99b024df0d30911b198146d0206c8f6d0d6c7.tar.gz
cpython-10a99b024df0d30911b198146d0206c8f6d0d6c7.tar.bz2
Issue #13150: The tokenize module doesn't compile large regular expressions at startup anymore.
Instead, the re module's standard caching does its work.
-rw-r--r--Lib/tokenize.py35
-rw-r--r--Misc/NEWS3
2 files changed, 19 insertions, 19 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index a0dc035..f923e17 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -114,19 +114,17 @@ PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
def _compile(expr):
return re.compile(expr, re.UNICODE)
-tokenprog, pseudoprog, single3prog, double3prog = map(
- _compile, (Token, PseudoToken, Single3, Double3))
-endprogs = {"'": _compile(Single), '"': _compile(Double),
- "'''": single3prog, '"""': double3prog,
- "r'''": single3prog, 'r"""': double3prog,
- "b'''": single3prog, 'b"""': double3prog,
- "br'''": single3prog, 'br"""': double3prog,
- "R'''": single3prog, 'R"""': double3prog,
- "B'''": single3prog, 'B"""': double3prog,
- "bR'''": single3prog, 'bR"""': double3prog,
- "Br'''": single3prog, 'Br"""': double3prog,
- "BR'''": single3prog, 'BR"""': double3prog,
- 'r': None, 'R': None, 'b': None, 'B': None}
+endpats = {"'": Single, '"': Double,
+ "'''": Single3, '"""': Double3,
+ "r'''": Single3, 'r"""': Double3,
+ "b'''": Single3, 'b"""': Double3,
+ "br'''": Single3, 'br"""': Double3,
+ "R'''": Single3, 'R"""': Double3,
+ "B'''": Single3, 'B"""': Double3,
+ "bR'''": Single3, 'bR"""': Double3,
+ "Br'''": Single3, 'Br"""': Double3,
+ "BR'''": Single3, 'BR"""': Double3,
+ 'r': None, 'R': None, 'b': None, 'B': None}
triple_quoted = {}
for t in ("'''", '"""',
@@ -143,8 +141,6 @@ for t in ("'", '"',
"bR'", 'bR"', "BR'", 'BR"' ):
single_quoted[t] = t
-del _compile
-
tabsize = 8
class TokenError(Exception): pass
@@ -466,7 +462,7 @@ def _tokenize(readline, encoding):
continued = 0
while pos < max:
- pseudomatch = pseudoprog.match(line, pos)
+ pseudomatch = _compile(PseudoToken).match(line, pos)
if pseudomatch: # scan for tokens
start, end = pseudomatch.span(1)
spos, epos, pos = (lnum, start), (lnum, end), end
@@ -482,7 +478,7 @@ def _tokenize(readline, encoding):
assert not token.endswith("\n")
yield TokenInfo(COMMENT, token, spos, epos, line)
elif token in triple_quoted:
- endprog = endprogs[token]
+ endprog = _compile(endpats[token])
endmatch = endprog.match(line, pos)
if endmatch: # all on one line
pos = endmatch.end(0)
@@ -498,8 +494,9 @@ def _tokenize(readline, encoding):
token[:3] in single_quoted:
if token[-1] == '\n': # continued string
strstart = (lnum, start)
- endprog = (endprogs[initial] or endprogs[token[1]] or
- endprogs[token[2]])
+ endprog = _compile(endpats[initial] or
+ endpats[token[1]] or
+ endpats[token[2]])
contstr, needcont = line[start:], 1
contline = line
break
diff --git a/Misc/NEWS b/Misc/NEWS
index b4dbd6e..9a8e46c 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -303,6 +303,9 @@ Core and Builtins
Library
-------
+- Issue #13150: The tokenize module doesn't compile large regular expressions
+ at startup anymore.
+
- Issue #11171: Fix distutils.sysconfig.get_makefile_filename when Python was
configured with different prefix and exec-prefix.