summaryrefslogtreecommitdiffstats
path: root/Lib/lib2to3/pytree.py
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2008-03-19 05:04:44 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2008-03-19 05:04:44 (GMT)
commitef04c44e29a8276a484f58d03a75a2dec516302d (patch)
tree6231aa6bb789345a6a86c60b0f547a7bfa19927f /Lib/lib2to3/pytree.py
parentc42bcbb1f07723476cccd352eb0ae98ad2d1a809 (diff)
downloadcpython-ef04c44e29a8276a484f58d03a75a2dec516302d.zip
cpython-ef04c44e29a8276a484f58d03a75a2dec516302d.tar.gz
cpython-ef04c44e29a8276a484f58d03a75a2dec516302d.tar.bz2
Merged revisions 61596-61597 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r61596 | martin.v.loewis | 2008-03-18 23:43:46 -0500 (Di, 18 Mär 2008) | 2 lines Import lib2to3. ........ r61597 | martin.v.loewis | 2008-03-18 23:58:04 -0500 (Di, 18 Mär 2008) | 3 lines Initialized merge tracking via "svnmerge" with revisions "1-61595" from svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3 ........
Diffstat (limited to 'Lib/lib2to3/pytree.py')
-rw-r--r--Lib/lib2to3/pytree.py712
1 files changed, 712 insertions, 0 deletions
diff --git a/Lib/lib2to3/pytree.py b/Lib/lib2to3/pytree.py
new file mode 100644
index 0000000..7584f71
--- /dev/null
+++ b/Lib/lib2to3/pytree.py
@@ -0,0 +1,712 @@
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Python parse tree definitions.
+
+This is a very concrete parse tree; we need to keep every token and
+even the comments and whitespace between tokens.
+
+There's also a pattern matching implementation here.
+"""
+
+__author__ = "Guido van Rossum <guido@python.org>"
+
+
+HUGE = 0x7FFFFFFF # maximum repeat count, default max
+
+
+class Base(object):
+
+ """Abstract base class for Node and Leaf.
+
+ This provides some default functionality and boilerplate using the
+ template pattern.
+
+ A node may be a subnode of at most one parent.
+ """
+
+ # Default values for instance variables
+ type = None # int: token number (< 256) or symbol number (>= 256)
+ parent = None # Parent node pointer, or None
+ children = () # Tuple of subnodes
+ was_changed = False
+
+ def __new__(cls, *args, **kwds):
+ """Constructor that prevents Base from being instantiated."""
+ assert cls is not Base, "Cannot instantiate Base"
+ return object.__new__(cls)
+
+ def __eq__(self, other):
+ """Compares two nodes for equality.
+
+ This calls the method _eq().
+ """
+ if self.__class__ is not other.__class__:
+ return NotImplemented
+ return self._eq(other)
+
+ def __ne__(self, other):
+ """Compares two nodes for inequality.
+
+ This calls the method _eq().
+ """
+ if self.__class__ is not other.__class__:
+ return NotImplemented
+ return not self._eq(other)
+
+ def _eq(self, other):
+ """Compares two nodes for equality.
+
+ This is called by __eq__ and __ne__. It is only called if the
+ two nodes have the same type. This must be implemented by the
+ concrete subclass. Nodes should be considered equal if they
+ have the same structure, ignoring the prefix string and other
+ context information.
+ """
+ raise NotImplementedError
+
+ def clone(self):
+ """Returns a cloned (deep) copy of self.
+
+ This must be implemented by the concrete subclass.
+ """
+ raise NotImplementedError
+
+ def post_order(self):
+ """Returns a post-order iterator for the tree.
+
+ This must be implemented by the concrete subclass.
+ """
+ raise NotImplementedError
+
+ def pre_order(self):
+ """Returns a pre-order iterator for the tree.
+
+ This must be implemented by the concrete subclass.
+ """
+ raise NotImplementedError
+
+ def set_prefix(self, prefix):
+ """Sets the prefix for the node (see Leaf class).
+
+ This must be implemented by the concrete subclass.
+ """
+ raise NotImplementedError
+
+ def get_prefix(self):
+ """Returns the prefix for the node (see Leaf class).
+
+ This must be implemented by the concrete subclass.
+ """
+ raise NotImplementedError
+
+ def replace(self, new):
+ """Replaces this node with a new one in the parent."""
+ assert self.parent is not None, str(self)
+ assert new is not None
+ if not isinstance(new, list):
+ new = [new]
+ l_children = []
+ found = False
+ for ch in self.parent.children:
+ if ch is self:
+ assert not found, (self.parent.children, self, new)
+ if new is not None:
+ l_children.extend(new)
+ found = True
+ else:
+ l_children.append(ch)
+ assert found, (self.children, self, new)
+ self.parent.changed()
+ self.parent.children = l_children
+ for x in new:
+ x.parent = self.parent
+ self.parent = None
+
+ def get_lineno(self):
+ """Returns the line number which generated the invocant node."""
+ node = self
+ while not isinstance(node, Leaf):
+ if not node.children:
+ return
+ node = node.children[0]
+ return node.lineno
+
+ def changed(self):
+ if self.parent:
+ self.parent.changed()
+ self.was_changed = True
+
+ def remove(self):
+ """Remove the node from the tree. Returns the position of the node
+ in its parent's children before it was removed."""
+ if self.parent:
+ for i, node in enumerate(self.parent.children):
+ if node is self:
+ self.parent.changed()
+ del self.parent.children[i]
+ self.parent = None
+ return i
+
+ def get_next_sibling(self):
+ """Return the node immediately following the invocant in their
+ parent's children list. If the invocant does not have a next
+ sibling, return None."""
+ if self.parent is None:
+ return None
+
+ # Can't use index(); we need to test by identity
+ for i, sibling in enumerate(self.parent.children):
+ if sibling is self:
+ try:
+ return self.parent.children[i+1]
+ except IndexError:
+ return None
+
+ def get_suffix(self):
+ """Return the string immediately following the invocant node. This
+ is effectively equivalent to node.get_next_sibling().get_prefix()"""
+ next_sib = self.get_next_sibling()
+ if next_sib is None:
+ return ""
+ return next_sib.get_prefix()
+
+
+class Node(Base):
+
+ """Concrete implementation for interior nodes."""
+
+ def __init__(self, type, children, context=None, prefix=None):
+ """Initializer.
+
+ Takes a type constant (a symbol number >= 256), a sequence of
+ child nodes, and an optional context keyword argument.
+
+ As a side effect, the parent pointers of the children are updated.
+ """
+ assert type >= 256, type
+ self.type = type
+ self.children = list(children)
+ for ch in self.children:
+ assert ch.parent is None, repr(ch)
+ ch.parent = self
+ if prefix is not None:
+ self.set_prefix(prefix)
+
+ def __repr__(self):
+ """Returns a canonical string representation."""
+ return "%s(%r, %r)" % (self.__class__.__name__,
+ self.type,
+ self.children)
+
+ def __str__(self):
+ """Returns a pretty string representation.
+
+ This reproduces the input source exactly.
+ """
+ return "".join(map(str, self.children))
+
+ def _eq(self, other):
+ """Compares two nodes for equality."""
+ return (self.type, self.children) == (other.type, other.children)
+
+ def clone(self):
+ """Returns a cloned (deep) copy of self."""
+ return Node(self.type, [ch.clone() for ch in self.children])
+
+ def post_order(self):
+ """Returns a post-order iterator for the tree."""
+ for child in self.children:
+ for node in child.post_order():
+ yield node
+ yield self
+
+ def pre_order(self):
+ """Returns a pre-order iterator for the tree."""
+ yield self
+ for child in self.children:
+ for node in child.post_order():
+ yield node
+
+ def set_prefix(self, prefix):
+ """Sets the prefix for the node.
+
+ This passes the responsibility on to the first child.
+ """
+ if self.children:
+ self.children[0].set_prefix(prefix)
+
+ def get_prefix(self):
+ """Returns the prefix for the node.
+
+ This passes the call on to the first child.
+ """
+ if not self.children:
+ return ""
+ return self.children[0].get_prefix()
+
+ def set_child(self, i, child):
+ """Equivalent to 'node.children[i] = child'. This method also sets the
+ child's parent attribute appropriately."""
+ child.parent = self
+ self.children[i].parent = None
+ self.children[i] = child
+
+ def insert_child(self, i, child):
+ """Equivalent to 'node.children.insert(i, child)'. This method also
+ sets the child's parent attribute appropriately."""
+ child.parent = self
+ self.children.insert(i, child)
+
+ def append_child(self, child):
+ """Equivalent to 'node.children.append(child)'. This method also
+ sets the child's parent attribute appropriately."""
+ child.parent = self
+ self.children.append(child)
+
+
+class Leaf(Base):
+
+ """Concrete implementation for leaf nodes."""
+
+ # Default values for instance variables
+ prefix = "" # Whitespace and comments preceding this token in the input
+ lineno = 0 # Line where this token starts in the input
+ column = 0 # Column where this token tarts in the input
+
+ def __init__(self, type, value, context=None, prefix=None):
+ """Initializer.
+
+ Takes a type constant (a token number < 256), a string value,
+ and an optional context keyword argument.
+ """
+ assert 0 <= type < 256, type
+ if context is not None:
+ self.prefix, (self.lineno, self.column) = context
+ self.type = type
+ self.value = value
+ if prefix is not None:
+ self.prefix = prefix
+
+ def __repr__(self):
+ """Returns a canonical string representation."""
+ return "%s(%r, %r)" % (self.__class__.__name__,
+ self.type,
+ self.value)
+
+ def __str__(self):
+ """Returns a pretty string representation.
+
+ This reproduces the input source exactly.
+ """
+ return self.prefix + str(self.value)
+
+ def _eq(self, other):
+ """Compares two nodes for equality."""
+ return (self.type, self.value) == (other.type, other.value)
+
+ def clone(self):
+ """Returns a cloned (deep) copy of self."""
+ return Leaf(self.type, self.value,
+ (self.prefix, (self.lineno, self.column)))
+
+ def post_order(self):
+ """Returns a post-order iterator for the tree."""
+ yield self
+
+ def pre_order(self):
+ """Returns a pre-order iterator for the tree."""
+ yield self
+
+ def set_prefix(self, prefix):
+ """Sets the prefix for the node."""
+ self.changed()
+ self.prefix = prefix
+
+ def get_prefix(self):
+ """Returns the prefix for the node."""
+ return self.prefix
+
+
+def convert(gr, raw_node):
+ """Converts raw node information to a Node or Leaf instance.
+
+ This is passed to the parser driver which calls it whenever a
+ reduction of a grammar rule produces a new complete node, so that
+ the tree is build strictly bottom-up.
+ """
+ type, value, context, children = raw_node
+ if children or type in gr.number2symbol:
+ # If there's exactly one child, return that child instead of
+ # creating a new node.
+ if len(children) == 1:
+ return children[0]
+ return Node(type, children, context=context)
+ else:
+ return Leaf(type, value, context=context)
+
+
+class BasePattern(object):
+
+ """A pattern is a tree matching pattern.
+
+ It looks for a specific node type (token or symbol), and
+ optionally for a specific content.
+
+ This is an abstract base class. There are three concrete
+ subclasses:
+
+ - LeafPattern matches a single leaf node;
+ - NodePattern matches a single node (usually non-leaf);
+ - WildcardPattern matches a sequence of nodes of variable length.
+ """
+
+ # Defaults for instance variables
+ type = None # Node type (token if < 256, symbol if >= 256)
+ content = None # Optional content matching pattern
+ name = None # Optional name used to store match in results dict
+
+ def __new__(cls, *args, **kwds):
+ """Constructor that prevents BasePattern from being instantiated."""
+ assert cls is not BasePattern, "Cannot instantiate BasePattern"
+ return object.__new__(cls)
+
+ def __repr__(self):
+ args = [self.type, self.content, self.name]
+ while args and args[-1] is None:
+ del args[-1]
+ return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
+
+ def optimize(self):
+ """A subclass can define this as a hook for optimizations.
+
+ Returns either self or another node with the same effect.
+ """
+ return self
+
+ def match(self, node, results=None):
+ """Does this pattern exactly match a node?
+
+ Returns True if it matches, False if not.
+
+ If results is not None, it must be a dict which will be
+ updated with the nodes matching named subpatterns.
+
+ Default implementation for non-wildcard patterns.
+ """
+ if self.type is not None and node.type != self.type:
+ return False
+ if self.content is not None:
+ r = None
+ if results is not None:
+ r = {}
+ if not self._submatch(node, r):
+ return False
+ if r:
+ results.update(r)
+ if results is not None and self.name:
+ results[self.name] = node
+ return True
+
+ def match_seq(self, nodes, results=None):
+ """Does this pattern exactly match a sequence of nodes?
+
+ Default implementation for non-wildcard patterns.
+ """
+ if len(nodes) != 1:
+ return False
+ return self.match(nodes[0], results)
+
+ def generate_matches(self, nodes):
+ """Generator yielding all matches for this pattern.
+
+ Default implementation for non-wildcard patterns.
+ """
+ r = {}
+ if nodes and self.match(nodes[0], r):
+ yield 1, r
+
+
+class LeafPattern(BasePattern):
+
+ def __init__(self, type=None, content=None, name=None):
+ """Initializer. Takes optional type, content, and name.
+
+ The type, if given must be a token type (< 256). If not given,
+ this matches any *leaf* node; the content may still be required.
+
+ The content, if given, must be a string.
+
+ If a name is given, the matching node is stored in the results
+ dict under that key.
+ """
+ if type is not None:
+ assert 0 <= type < 256, type
+ if content is not None:
+ assert isinstance(content, basestring), repr(content)
+ self.type = type
+ self.content = content
+ self.name = name
+
+ def match(self, node, results=None):
+ """Override match() to insist on a leaf node."""
+ if not isinstance(node, Leaf):
+ return False
+ return BasePattern.match(self, node, results)
+
+ def _submatch(self, node, results=None):
+ """Match the pattern's content to the node's children.
+
+ This assumes the node type matches and self.content is not None.
+
+ Returns True if it matches, False if not.
+
+ If results is not None, it must be a dict which will be
+ updated with the nodes matching named subpatterns.
+
+ When returning False, the results dict may still be updated.
+ """
+ return self.content == node.value
+
+
+class NodePattern(BasePattern):
+
+ wildcards = False
+
+ def __init__(self, type=None, content=None, name=None):
+ """Initializer. Takes optional type, content, and name.
+
+ The type, if given, must be a symbol type (>= 256). If the
+ type is None this matches *any* single node (leaf or not),
+ except if content is not None, in which it only matches
+ non-leaf nodes that also match the content pattern.
+
+ The content, if not None, must be a sequence of Patterns that
+ must match the node's children exactly. If the content is
+ given, the type must not be None.
+
+ If a name is given, the matching node is stored in the results
+ dict under that key.
+ """
+ if type is not None:
+ assert type >= 256, type
+ if content is not None:
+ assert not isinstance(content, basestring), repr(content)
+ content = list(content)
+ for i, item in enumerate(content):
+ assert isinstance(item, BasePattern), (i, item)
+ if isinstance(item, WildcardPattern):
+ self.wildcards = True
+ self.type = type
+ self.content = content
+ self.name = name
+
+ def _submatch(self, node, results=None):
+ """Match the pattern's content to the node's children.
+
+ This assumes the node type matches and self.content is not None.
+
+ Returns True if it matches, False if not.
+
+ If results is not None, it must be a dict which will be
+ updated with the nodes matching named subpatterns.
+
+ When returning False, the results dict may still be updated.
+ """
+ if self.wildcards:
+ for c, r in generate_matches(self.content, node.children):
+ if c == len(node.children):
+ if results is not None:
+ results.update(r)
+ return True
+ return False
+ if len(self.content) != len(node.children):
+ return False
+ for subpattern, child in zip(self.content, node.children):
+ if not subpattern.match(child, results):
+ return False
+ return True
+
+
+class WildcardPattern(BasePattern):
+
+ """A wildcard pattern can match zero or more nodes.
+
+ This has all the flexibility needed to implement patterns like:
+
+ .* .+ .? .{m,n}
+ (a b c | d e | f)
+ (...)* (...)+ (...)? (...){m,n}
+
+ except it always uses non-greedy matching.
+ """
+
+ def __init__(self, content=None, min=0, max=HUGE, name=None):
+ """Initializer.
+
+ Args:
+ content: optional sequence of subsequences of patterns;
+ if absent, matches one node;
+ if present, each subsequence is an alternative [*]
+ min: optinal minumum number of times to match, default 0
+ max: optional maximum number of times tro match, default HUGE
+ name: optional name assigned to this match
+
+ [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
+ equivalent to (a b c | d e | f g h); if content is None,
+ this is equivalent to '.' in regular expression terms.
+ The min and max parameters work as follows:
+ min=0, max=maxint: .*
+ min=1, max=maxint: .+
+ min=0, max=1: .?
+ min=1, max=1: .
+ If content is not None, replace the dot with the parenthesized
+ list of alternatives, e.g. (a b c | d e | f g h)*
+ """
+ assert 0 <= min <= max <= HUGE, (min, max)
+ if content is not None:
+ content = tuple(map(tuple, content)) # Protect against alterations
+ # Check sanity of alternatives
+ assert len(content), repr(content) # Can't have zero alternatives
+ for alt in content:
+ assert len(alt), repr(alt) # Can have empty alternatives
+ self.content = content
+ self.min = min
+ self.max = max
+ self.name = name
+
+ def optimize(self):
+ """Optimize certain stacked wildcard patterns."""
+ subpattern = None
+ if (self.content is not None and
+ len(self.content) == 1 and len(self.content[0]) == 1):
+ subpattern = self.content[0][0]
+ if self.min == 1 and self.max == 1:
+ if self.content is None:
+ return NodePattern(name=self.name)
+ if subpattern is not None and self.name == subpattern.name:
+ return subpattern.optimize()
+ if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
+ subpattern.min <= 1 and self.name == subpattern.name):
+ return WildcardPattern(subpattern.content,
+ self.min*subpattern.min,
+ self.max*subpattern.max,
+ subpattern.name)
+ return self
+
+ def match(self, node, results=None):
+ """Does this pattern exactly match a node?"""
+ return self.match_seq([node], results)
+
+ def match_seq(self, nodes, results=None):
+ """Does this pattern exactly match a sequence of nodes?"""
+ for c, r in self.generate_matches(nodes):
+ if c == len(nodes):
+ if results is not None:
+ results.update(r)
+ if self.name:
+ results[self.name] = list(nodes)
+ return True
+ return False
+
+ def generate_matches(self, nodes):
+ """Generator yielding matches for a sequence of nodes.
+
+ Args:
+ nodes: sequence of nodes
+
+ Yields:
+ (count, results) tuples where:
+ count: the match comprises nodes[:count];
+ results: dict containing named submatches.
+ """
+ if self.content is None:
+ # Shortcut for special case (see __init__.__doc__)
+ for count in xrange(self.min, 1 + min(len(nodes), self.max)):
+ r = {}
+ if self.name:
+ r[self.name] = nodes[:count]
+ yield count, r
+ else:
+ for count, r in self._recursive_matches(nodes, 0):
+ if self.name:
+ r[self.name] = nodes[:count]
+ yield count, r
+
+ def _recursive_matches(self, nodes, count):
+ """Helper to recursively yield the matches."""
+ assert self.content is not None
+ if count >= self.min:
+ r = {}
+ if self.name:
+ r[self.name] = nodes[:0]
+ yield 0, r
+ if count < self.max:
+ for alt in self.content:
+ for c0, r0 in generate_matches(alt, nodes):
+ for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
+ r = {}
+ r.update(r0)
+ r.update(r1)
+ yield c0 + c1, r
+
+
+class NegatedPattern(BasePattern):
+
+ def __init__(self, content=None):
+ """Initializer.
+
+ The argument is either a pattern or None. If it is None, this
+ only matches an empty sequence (effectively '$' in regex
+ lingo). If it is not None, this matches whenever the argument
+ pattern doesn't have any matches.
+ """
+ if content is not None:
+ assert isinstance(content, BasePattern), repr(content)
+ self.content = content
+
+ def match(self, node):
+ # We never match a node in its entirety
+ return False
+
+ def match_seq(self, nodes):
+ # We only match an empty sequence of nodes in its entirety
+ return len(nodes) == 0
+
+ def generate_matches(self, nodes):
+ if self.content is None:
+ # Return a match if there is an empty sequence
+ if len(nodes) == 0:
+ yield 0, {}
+ else:
+ # Return a match if the argument pattern has no matches
+ for c, r in self.content.generate_matches(nodes):
+ return
+ yield 0, {}
+
+
+def generate_matches(patterns, nodes):
+ """Generator yielding matches for a sequence of patterns and nodes.
+
+ Args:
+ patterns: a sequence of patterns
+ nodes: a sequence of nodes
+
+ Yields:
+ (count, results) tuples where:
+ count: the entire sequence of patterns matches nodes[:count];
+ results: dict containing named submatches.
+ """
+ if not patterns:
+ yield 0, {}
+ else:
+ p, rest = patterns[0], patterns[1:]
+ for c0, r0 in p.generate_matches(nodes):
+ if not rest:
+ yield c0, r0
+ else:
+ for c1, r1 in generate_matches(rest, nodes[c0:]):
+ r = {}
+ r.update(r0)
+ r.update(r1)
+ yield c0 + c1, r