summaryrefslogtreecommitdiffstats
path: root/Lib/lib2to3/patcomp.py
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2008-03-19 05:04:44 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2008-03-19 05:04:44 (GMT)
commitef04c44e29a8276a484f58d03a75a2dec516302d (patch)
tree6231aa6bb789345a6a86c60b0f547a7bfa19927f /Lib/lib2to3/patcomp.py
parentc42bcbb1f07723476cccd352eb0ae98ad2d1a809 (diff)
downloadcpython-ef04c44e29a8276a484f58d03a75a2dec516302d.zip
cpython-ef04c44e29a8276a484f58d03a75a2dec516302d.tar.gz
cpython-ef04c44e29a8276a484f58d03a75a2dec516302d.tar.bz2
Merged revisions 61596-61597 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r61596 | martin.v.loewis | 2008-03-18 23:43:46 -0500 (Di, 18 Mär 2008) | 2 lines Import lib2to3. ........ r61597 | martin.v.loewis | 2008-03-18 23:58:04 -0500 (Di, 18 Mär 2008) | 3 lines Initialized merge tracking via "svnmerge" with revisions "1-61595" from svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3 ........
Diffstat (limited to 'Lib/lib2to3/patcomp.py')
-rw-r--r--Lib/lib2to3/patcomp.py186
1 files changed, 186 insertions, 0 deletions
diff --git a/Lib/lib2to3/patcomp.py b/Lib/lib2to3/patcomp.py
new file mode 100644
index 0000000..353b960
--- /dev/null
+++ b/Lib/lib2to3/patcomp.py
@@ -0,0 +1,186 @@
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Pattern compiler.
+
+The grammer is taken from PatternGrammar.txt.
+
+The compiler compiles a pattern to a pytree.*Pattern instance.
+"""
+
+__author__ = "Guido van Rossum <guido@python.org>"
+
+# Python imports
+import os
+
+# Fairly local imports
+from .pgen2 import driver
+from .pgen2 import literals
+from .pgen2 import token
+from .pgen2 import tokenize
+
+# Really local imports
+from . import pytree
+from . import pygram
+
+# The pattern grammar file
+_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
+ "PatternGrammar.txt")
+
+
+def tokenize_wrapper(input):
+ """Tokenizes a string suppressing significant whitespace."""
+ skip = (token.NEWLINE, token.INDENT, token.DEDENT)
+ tokens = tokenize.generate_tokens(driver.generate_lines(input).next)
+ for quintuple in tokens:
+ type, value, start, end, line_text = quintuple
+ if type not in skip:
+ yield quintuple
+
+
+class PatternCompiler(object):
+
+ def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
+ """Initializer.
+
+ Takes an optional alternative filename for the pattern grammar.
+ """
+ self.grammar = driver.load_grammar(grammar_file)
+ self.syms = pygram.Symbols(self.grammar)
+ self.pygrammar = pygram.python_grammar
+ self.pysyms = pygram.python_symbols
+ self.driver = driver.Driver(self.grammar, convert=pattern_convert)
+
+ def compile_pattern(self, input, debug=False):
+ """Compiles a pattern string to a nested pytree.*Pattern object."""
+ tokens = tokenize_wrapper(input)
+ root = self.driver.parse_tokens(tokens, debug=debug)
+ return self.compile_node(root)
+
+ def compile_node(self, node):
+ """Compiles a node, recursively.
+
+ This is one big switch on the node type.
+ """
+ # XXX Optimize certain Wildcard-containing-Wildcard patterns
+ # that can be merged
+ if node.type == self.syms.Matcher:
+ node = node.children[0] # Avoid unneeded recursion
+
+ if node.type == self.syms.Alternatives:
+ # Skip the odd children since they are just '|' tokens
+ alts = [self.compile_node(ch) for ch in node.children[::2]]
+ if len(alts) == 1:
+ return alts[0]
+ p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
+ return p.optimize()
+
+ if node.type == self.syms.Alternative:
+ units = [self.compile_node(ch) for ch in node.children]
+ if len(units) == 1:
+ return units[0]
+ p = pytree.WildcardPattern([units], min=1, max=1)
+ return p.optimize()
+
+ if node.type == self.syms.NegatedUnit:
+ pattern = self.compile_basic(node.children[1:])
+ p = pytree.NegatedPattern(pattern)
+ return p.optimize()
+
+ assert node.type == self.syms.Unit
+
+ name = None
+ nodes = node.children
+ if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
+ name = nodes[0].value
+ nodes = nodes[2:]
+ repeat = None
+ if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
+ repeat = nodes[-1]
+ nodes = nodes[:-1]
+
+ # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
+ pattern = self.compile_basic(nodes, repeat)
+
+ if repeat is not None:
+ assert repeat.type == self.syms.Repeater
+ children = repeat.children
+ child = children[0]
+ if child.type == token.STAR:
+ min = 0
+ max = pytree.HUGE
+ elif child.type == token.PLUS:
+ min = 1
+ max = pytree.HUGE
+ elif child.type == token.LBRACE:
+ assert children[-1].type == token.RBRACE
+ assert len(children) in (3, 5)
+ min = max = self.get_int(children[1])
+ if len(children) == 5:
+ max = self.get_int(children[3])
+ else:
+ assert False
+ if min != 1 or max != 1:
+ pattern = pattern.optimize()
+ pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
+
+ if name is not None:
+ pattern.name = name
+ return pattern.optimize()
+
+ def compile_basic(self, nodes, repeat=None):
+ # Compile STRING | NAME [Details] | (...) | [...]
+ assert len(nodes) >= 1
+ node = nodes[0]
+ if node.type == token.STRING:
+ value = literals.evalString(node.value)
+ return pytree.LeafPattern(content=value)
+ elif node.type == token.NAME:
+ value = node.value
+ if value.isupper():
+ if value not in TOKEN_MAP:
+ raise SyntaxError("Invalid token: %r" % value)
+ return pytree.LeafPattern(TOKEN_MAP[value])
+ else:
+ if value == "any":
+ type = None
+ elif not value.startswith("_"):
+ type = getattr(self.pysyms, value, None)
+ if type is None:
+ raise SyntaxError("Invalid symbol: %r" % value)
+ if nodes[1:]: # Details present
+ content = [self.compile_node(nodes[1].children[1])]
+ else:
+ content = None
+ return pytree.NodePattern(type, content)
+ elif node.value == "(":
+ return self.compile_node(nodes[1])
+ elif node.value == "[":
+ assert repeat is None
+ subpattern = self.compile_node(nodes[1])
+ return pytree.WildcardPattern([[subpattern]], min=0, max=1)
+ assert False, node
+
+ def get_int(self, node):
+ assert node.type == token.NUMBER
+ return int(node.value)
+
+
+# Map named tokens to the type value for a LeafPattern
+TOKEN_MAP = {"NAME": token.NAME,
+ "STRING": token.STRING,
+ "NUMBER": token.NUMBER,
+ "TOKEN": None}
+
+
+def pattern_convert(grammar, raw_node_info):
+ """Converts raw node information to a Node or Leaf instance."""
+ type, value, context, children = raw_node_info
+ if children or type in grammar.number2symbol:
+ return pytree.Node(type, children, context=context)
+ else:
+ return pytree.Leaf(type, value, context=context)
+
+
+def compile_pattern(pattern):
+ return PatternCompiler().compile_pattern(pattern)