diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2008-03-19 05:04:44 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2008-03-19 05:04:44 (GMT) |
commit | ef04c44e29a8276a484f58d03a75a2dec516302d (patch) | |
tree | 6231aa6bb789345a6a86c60b0f547a7bfa19927f /Lib/lib2to3/patcomp.py | |
parent | c42bcbb1f07723476cccd352eb0ae98ad2d1a809 (diff) | |
download | cpython-ef04c44e29a8276a484f58d03a75a2dec516302d.zip cpython-ef04c44e29a8276a484f58d03a75a2dec516302d.tar.gz cpython-ef04c44e29a8276a484f58d03a75a2dec516302d.tar.bz2 |
Merged revisions 61596-61597 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r61596 | martin.v.loewis | 2008-03-18 23:43:46 -0500 (Di, 18 Mär 2008) | 2 lines
Import lib2to3.
........
r61597 | martin.v.loewis | 2008-03-18 23:58:04 -0500 (Di, 18 Mär 2008) | 3 lines
Initialized merge tracking via "svnmerge" with revisions "1-61595" from
svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3
........
Diffstat (limited to 'Lib/lib2to3/patcomp.py')
-rw-r--r-- | Lib/lib2to3/patcomp.py | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/Lib/lib2to3/patcomp.py b/Lib/lib2to3/patcomp.py new file mode 100644 index 0000000..353b960 --- /dev/null +++ b/Lib/lib2to3/patcomp.py @@ -0,0 +1,186 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Pattern compiler. + +The grammer is taken from PatternGrammar.txt. + +The compiler compiles a pattern to a pytree.*Pattern instance. +""" + +__author__ = "Guido van Rossum <guido@python.org>" + +# Python imports +import os + +# Fairly local imports +from .pgen2 import driver +from .pgen2 import literals +from .pgen2 import token +from .pgen2 import tokenize + +# Really local imports +from . import pytree +from . import pygram + +# The pattern grammar file +_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), + "PatternGrammar.txt") + + +def tokenize_wrapper(input): + """Tokenizes a string suppressing significant whitespace.""" + skip = (token.NEWLINE, token.INDENT, token.DEDENT) + tokens = tokenize.generate_tokens(driver.generate_lines(input).next) + for quintuple in tokens: + type, value, start, end, line_text = quintuple + if type not in skip: + yield quintuple + + +class PatternCompiler(object): + + def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE): + """Initializer. + + Takes an optional alternative filename for the pattern grammar. + """ + self.grammar = driver.load_grammar(grammar_file) + self.syms = pygram.Symbols(self.grammar) + self.pygrammar = pygram.python_grammar + self.pysyms = pygram.python_symbols + self.driver = driver.Driver(self.grammar, convert=pattern_convert) + + def compile_pattern(self, input, debug=False): + """Compiles a pattern string to a nested pytree.*Pattern object.""" + tokens = tokenize_wrapper(input) + root = self.driver.parse_tokens(tokens, debug=debug) + return self.compile_node(root) + + def compile_node(self, node): + """Compiles a node, recursively. + + This is one big switch on the node type. + """ + # XXX Optimize certain Wildcard-containing-Wildcard patterns + # that can be merged + if node.type == self.syms.Matcher: + node = node.children[0] # Avoid unneeded recursion + + if node.type == self.syms.Alternatives: + # Skip the odd children since they are just '|' tokens + alts = [self.compile_node(ch) for ch in node.children[::2]] + if len(alts) == 1: + return alts[0] + p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1) + return p.optimize() + + if node.type == self.syms.Alternative: + units = [self.compile_node(ch) for ch in node.children] + if len(units) == 1: + return units[0] + p = pytree.WildcardPattern([units], min=1, max=1) + return p.optimize() + + if node.type == self.syms.NegatedUnit: + pattern = self.compile_basic(node.children[1:]) + p = pytree.NegatedPattern(pattern) + return p.optimize() + + assert node.type == self.syms.Unit + + name = None + nodes = node.children + if len(nodes) >= 3 and nodes[1].type == token.EQUAL: + name = nodes[0].value + nodes = nodes[2:] + repeat = None + if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater: + repeat = nodes[-1] + nodes = nodes[:-1] + + # Now we've reduced it to: STRING | NAME [Details] | (...) | [...] + pattern = self.compile_basic(nodes, repeat) + + if repeat is not None: + assert repeat.type == self.syms.Repeater + children = repeat.children + child = children[0] + if child.type == token.STAR: + min = 0 + max = pytree.HUGE + elif child.type == token.PLUS: + min = 1 + max = pytree.HUGE + elif child.type == token.LBRACE: + assert children[-1].type == token.RBRACE + assert len(children) in (3, 5) + min = max = self.get_int(children[1]) + if len(children) == 5: + max = self.get_int(children[3]) + else: + assert False + if min != 1 or max != 1: + pattern = pattern.optimize() + pattern = pytree.WildcardPattern([[pattern]], min=min, max=max) + + if name is not None: + pattern.name = name + return pattern.optimize() + + def compile_basic(self, nodes, repeat=None): + # Compile STRING | NAME [Details] | (...) | [...] + assert len(nodes) >= 1 + node = nodes[0] + if node.type == token.STRING: + value = literals.evalString(node.value) + return pytree.LeafPattern(content=value) + elif node.type == token.NAME: + value = node.value + if value.isupper(): + if value not in TOKEN_MAP: + raise SyntaxError("Invalid token: %r" % value) + return pytree.LeafPattern(TOKEN_MAP[value]) + else: + if value == "any": + type = None + elif not value.startswith("_"): + type = getattr(self.pysyms, value, None) + if type is None: + raise SyntaxError("Invalid symbol: %r" % value) + if nodes[1:]: # Details present + content = [self.compile_node(nodes[1].children[1])] + else: + content = None + return pytree.NodePattern(type, content) + elif node.value == "(": + return self.compile_node(nodes[1]) + elif node.value == "[": + assert repeat is None + subpattern = self.compile_node(nodes[1]) + return pytree.WildcardPattern([[subpattern]], min=0, max=1) + assert False, node + + def get_int(self, node): + assert node.type == token.NUMBER + return int(node.value) + + +# Map named tokens to the type value for a LeafPattern +TOKEN_MAP = {"NAME": token.NAME, + "STRING": token.STRING, + "NUMBER": token.NUMBER, + "TOKEN": None} + + +def pattern_convert(grammar, raw_node_info): + """Converts raw node information to a Node or Leaf instance.""" + type, value, context, children = raw_node_info + if children or type in grammar.number2symbol: + return pytree.Node(type, children, context=context) + else: + return pytree.Leaf(type, value, context=context) + + +def compile_pattern(pattern): + return PatternCompiler().compile_pattern(pattern) |