summaryrefslogtreecommitdiffstats
path: root/Lib/lib2to3/btm_utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/lib2to3/btm_utils.py')
-rw-r--r--Lib/lib2to3/btm_utils.py283
1 files changed, 283 insertions, 0 deletions
diff --git a/Lib/lib2to3/btm_utils.py b/Lib/lib2to3/btm_utils.py
new file mode 100644
index 0000000..2276dc9
--- /dev/null
+++ b/Lib/lib2to3/btm_utils.py
@@ -0,0 +1,283 @@
+"Utility functions used by the btm_matcher module"
+
+from . import pytree
+from .pgen2 import grammar, token
+from .pygram import pattern_symbols, python_symbols
+
+syms = pattern_symbols
+pysyms = python_symbols
+tokens = grammar.opmap
+token_labels = token
+
+TYPE_ANY = -1
+TYPE_ALTERNATIVES = -2
+TYPE_GROUP = -3
+
+class MinNode(object):
+ """This class serves as an intermediate representation of the
+ pattern tree during the conversion to sets of leaf-to-root
+ subpatterns"""
+
+ def __init__(self, type=None, name=None):
+ self.type = type
+ self.name = name
+ self.children = []
+ self.leaf = False
+ self.parent = None
+ self.alternatives = []
+ self.group = []
+
+ def __repr__(self):
+ return str(self.type) + ' ' + str(self.name)
+
+ def leaf_to_root(self):
+ """Internal method. Returns a characteristic path of the
+ pattern tree. This method must be run for all leaves until the
+ linear subpatterns are merged into a single"""
+ node = self
+ subp = []
+ while node:
+ if node.type == TYPE_ALTERNATIVES:
+ node.alternatives.append(subp)
+ if len(node.alternatives) == len(node.children):
+ #last alternative
+ subp = [tuple(node.alternatives)]
+ node.alternatives = []
+ node = node.parent
+ continue
+ else:
+ node = node.parent
+ subp = None
+ break
+
+ if node.type == TYPE_GROUP:
+ node.group.append(subp)
+ #probably should check the number of leaves
+ if len(node.group) == len(node.children):
+ subp = get_characteristic_subpattern(node.group)
+ node.group = []
+ node = node.parent
+ continue
+ else:
+ node = node.parent
+ subp = None
+ break
+
+ if node.type == token_labels.NAME and node.name:
+ #in case of type=name, use the name instead
+ subp.append(node.name)
+ else:
+ subp.append(node.type)
+
+ node = node.parent
+ return subp
+
+ def get_linear_subpattern(self):
+ """Drives the leaf_to_root method. The reason that
+ leaf_to_root must be run multiple times is because we need to
+ reject 'group' matches; for example the alternative form
+ (a | b c) creates a group [b c] that needs to be matched. Since
+ matching multiple linear patterns overcomes the automaton's
+ capabilities, leaf_to_root merges each group into a single
+ choice based on 'characteristic'ity,
+
+ i.e. (a|b c) -> (a|b) if b more characteristic than c
+
+ Returns: The most 'characteristic'(as defined by
+ get_characteristic_subpattern) path for the compiled pattern
+ tree.
+ """
+
+ for l in self.leaves():
+ subp = l.leaf_to_root()
+ if subp:
+ return subp
+
+ def leaves(self):
+ "Generator that returns the leaves of the tree"
+ for child in self.children:
+ for x in child.leaves():
+ yield x
+ if not self.children:
+ yield self
+
+def reduce_tree(node, parent=None):
+ """
+ Internal function. Reduces a compiled pattern tree to an
+ intermediate representation suitable for feeding the
+ automaton. This also trims off any optional pattern elements(like
+ [a], a*).
+ """
+
+ new_node = None
+ #switch on the node type
+ if node.type == syms.Matcher:
+ #skip
+ node = node.children[0]
+
+ if node.type == syms.Alternatives :
+ #2 cases
+ if len(node.children) <= 2:
+ #just a single 'Alternative', skip this node
+ new_node = reduce_tree(node.children[0], parent)
+ else:
+ #real alternatives
+ new_node = MinNode(type=TYPE_ALTERNATIVES)
+ #skip odd children('|' tokens)
+ for child in node.children:
+ if node.children.index(child)%2:
+ continue
+ reduced = reduce_tree(child, new_node)
+ if reduced is not None:
+ new_node.children.append(reduced)
+ elif node.type == syms.Alternative:
+ if len(node.children) > 1:
+
+ new_node = MinNode(type=TYPE_GROUP)
+ for child in node.children:
+ reduced = reduce_tree(child, new_node)
+ if reduced:
+ new_node.children.append(reduced)
+ if not new_node.children:
+ # delete the group if all of the children were reduced to None
+ new_node = None
+
+ else:
+ new_node = reduce_tree(node.children[0], parent)
+
+ elif node.type == syms.Unit:
+ if (isinstance(node.children[0], pytree.Leaf) and
+ node.children[0].value == '('):
+ #skip parentheses
+ return reduce_tree(node.children[1], parent)
+ if ((isinstance(node.children[0], pytree.Leaf) and
+ node.children[0].value == '[')
+ or
+ (len(node.children)>1 and
+ hasattr(node.children[1], "value") and
+ node.children[1].value == '[')):
+ #skip whole unit if its optional
+ return None
+
+ leaf = True
+ details_node = None
+ alternatives_node = None
+ has_repeater = False
+ repeater_node = None
+ has_variable_name = False
+
+ for child in node.children:
+ if child.type == syms.Details:
+ leaf = False
+ details_node = child
+ elif child.type == syms.Repeater:
+ has_repeater = True
+ repeater_node = child
+ elif child.type == syms.Alternatives:
+ alternatives_node = child
+ if hasattr(child, 'value') and child.value == '=': # variable name
+ has_variable_name = True
+
+ #skip variable name
+ if has_variable_name:
+ #skip variable name, '='
+ name_leaf = node.children[2]
+ if hasattr(name_leaf, 'value') and name_leaf.value == '(':
+ # skip parenthesis
+ name_leaf = node.children[3]
+ else:
+ name_leaf = node.children[0]
+
+ #set node type
+ if name_leaf.type == token_labels.NAME:
+ #(python) non-name or wildcard
+ if name_leaf.value == 'any':
+ new_node = MinNode(type=TYPE_ANY)
+ else:
+ if hasattr(token_labels, name_leaf.value):
+ new_node = MinNode(type=getattr(token_labels, name_leaf.value))
+ else:
+ new_node = MinNode(type=getattr(pysyms, name_leaf.value))
+
+ elif name_leaf.type == token_labels.STRING:
+ #(python) name or character; remove the apostrophes from
+ #the string value
+ name = name_leaf.value.strip("'")
+ if name in tokens:
+ new_node = MinNode(type=tokens[name])
+ else:
+ new_node = MinNode(type=token_labels.NAME, name=name)
+ elif name_leaf.type == syms.Alternatives:
+ new_node = reduce_tree(alternatives_node, parent)
+
+ #handle repeaters
+ if has_repeater:
+ if repeater_node.children[0].value == '*':
+ #reduce to None
+ new_node = None
+ elif repeater_node.children[0].value == '+':
+ #reduce to a single occurence i.e. do nothing
+ pass
+ else:
+ #TODO: handle {min, max} repeaters
+ raise NotImplementedError
+ pass
+
+ #add children
+ if details_node and new_node is not None:
+ for child in details_node.children[1:-1]:
+ #skip '<', '>' markers
+ reduced = reduce_tree(child, new_node)
+ if reduced is not None:
+ new_node.children.append(reduced)
+ if new_node:
+ new_node.parent = parent
+ return new_node
+
+
+def get_characteristic_subpattern(subpatterns):
+ """Picks the most characteristic from a list of linear patterns
+ Current order used is:
+ names > common_names > common_chars
+ """
+ if not isinstance(subpatterns, list):
+ return subpatterns
+ if len(subpatterns)==1:
+ return subpatterns[0]
+
+ # first pick out the ones containing variable names
+ subpatterns_with_names = []
+ subpatterns_with_common_names = []
+ common_names = ['in', 'for', 'if' , 'not', 'None']
+ subpatterns_with_common_chars = []
+ common_chars = "[]().,:"
+ for subpattern in subpatterns:
+ if any(rec_test(subpattern, lambda x: type(x) is str)):
+ if any(rec_test(subpattern,
+ lambda x: isinstance(x, str) and x in common_chars)):
+ subpatterns_with_common_chars.append(subpattern)
+ elif any(rec_test(subpattern,
+ lambda x: isinstance(x, str) and x in common_names)):
+ subpatterns_with_common_names.append(subpattern)
+
+ else:
+ subpatterns_with_names.append(subpattern)
+
+ if subpatterns_with_names:
+ subpatterns = subpatterns_with_names
+ elif subpatterns_with_common_names:
+ subpatterns = subpatterns_with_common_names
+ elif subpatterns_with_common_chars:
+ subpatterns = subpatterns_with_common_chars
+ # of the remaining subpatterns pick out the longest one
+ return max(subpatterns, key=len)
+
+def rec_test(sequence, test_func):
+ """Tests test_func on all items of sequence and items of included
+ sub-iterables"""
+ for x in sequence:
+ if isinstance(x, (list, tuple)):
+ for y in rec_test(x, test_func):
+ yield y
+ else:
+ yield test_func(x)