diff options
author | Benjamin Peterson <benjamin@python.org> | 2010-10-14 23:00:04 (GMT) |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2010-10-14 23:00:04 (GMT) |
commit | f37eb3a184c344a5ee7355a1c1a9acc6d4835c6e (patch) | |
tree | 0e66f7e8405072086393182d68bedb4325181fb0 /Lib/lib2to3/btm_matcher.py | |
parent | 92f60ed82a302035009835a8d63ff714118a96ad (diff) | |
download | cpython-f37eb3a184c344a5ee7355a1c1a9acc6d4835c6e.zip cpython-f37eb3a184c344a5ee7355a1c1a9acc6d4835c6e.tar.gz cpython-f37eb3a184c344a5ee7355a1c1a9acc6d4835c6e.tar.bz2 |
Merged revisions 83852-83853,83857,84042,84216,84274-84276,84375,85388,85478,85506-85508 via svnmerge from
svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3
........
r83852 | benjamin.peterson | 2010-08-08 15:45:44 -0500 (Sun, 08 Aug 2010) | 1 line
wrap with parens
........
r83853 | benjamin.peterson | 2010-08-08 15:46:31 -0500 (Sun, 08 Aug 2010) | 1 line
use parens
........
r83857 | benjamin.peterson | 2010-08-08 15:59:49 -0500 (Sun, 08 Aug 2010) | 1 line
things which use touch_import should be pre order
........
r84042 | george.boutsioukis | 2010-08-14 16:10:19 -0500 (Sat, 14 Aug 2010) | 2 lines
This revision incorporates into the 2to3 tool the new, faster, tree matching algorithm developed during a GSOC project. The algorithm resides in the two added modules, btm_matcher and btm_utils. New code has been added to drive the new matching process in refactor.py and a few minor changes were made in other modules. A BM_compatible flag(False by default) has been added in fixer_base and it is set to True in most of the current fixers.
........
r84216 | benjamin.peterson | 2010-08-19 16:44:05 -0500 (Thu, 19 Aug 2010) | 1 line
allow star_expr in testlist_gexp
........
r84274 | benjamin.peterson | 2010-08-22 18:40:46 -0500 (Sun, 22 Aug 2010) | 1 line
wrap long line
........
r84275 | benjamin.peterson | 2010-08-22 18:42:22 -0500 (Sun, 22 Aug 2010) | 1 line
cleanup
........
r84276 | benjamin.peterson | 2010-08-22 18:51:01 -0500 (Sun, 22 Aug 2010) | 1 line
when there's a None value and a traceback, don't call type with it #9661
........
r84375 | george.boutsioukis | 2010-08-31 08:38:53 -0500 (Tue, 31 Aug 2010) | 3 lines
Idiomatic code changes & stylistic issues fixed in the BottomMatcher module. Thanks to Benjamin Peterson for taking the time to review the code.
........
r85388 | benjamin.peterson | 2010-10-12 17:27:44 -0500 (Tue, 12 Oct 2010) | 1 line
fix urllib fixer with multiple as imports on a line #10069
........
r85478 | benjamin.peterson | 2010-10-14 08:09:56 -0500 (Thu, 14 Oct 2010) | 1 line
stop abusing docstrings
........
r85506 | benjamin.peterson | 2010-10-14 17:45:19 -0500 (Thu, 14 Oct 2010) | 1 line
kill sibling import
........
r85507 | benjamin.peterson | 2010-10-14 17:54:15 -0500 (Thu, 14 Oct 2010) | 1 line
remove trailing whitespace
........
r85508 | benjamin.peterson | 2010-10-14 17:55:28 -0500 (Thu, 14 Oct 2010) | 1 line
typo
........
Diffstat (limited to 'Lib/lib2to3/btm_matcher.py')
-rw-r--r-- | Lib/lib2to3/btm_matcher.py | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/Lib/lib2to3/btm_matcher.py b/Lib/lib2to3/btm_matcher.py new file mode 100644 index 0000000..eabe1a7 --- /dev/null +++ b/Lib/lib2to3/btm_matcher.py @@ -0,0 +1,168 @@ +"""A bottom-up tree matching algorithm implementation meant to speed +up 2to3's matching process. After the tree patterns are reduced to +their rarest linear path, a linear Aho-Corasick automaton is +created. The linear automaton traverses the linear paths from the +leaves to the root of the AST and returns a set of nodes for further +matching. This reduces significantly the number of candidate nodes.""" + +__author__ = "George Boutsioukis <gboutsioukis@gmail.com>" + +import logging +import itertools +from collections import defaultdict + +from . import pytree +from .btm_utils import reduce_tree + +class BMNode(object): + """Class for a node of the Aho-Corasick automaton used in matching""" + count = itertools.count() + def __init__(self): + self.transition_table = {} + self.fixers = [] + self.id = next(BMNode.count) + self.content = '' + +class BottomMatcher(object): + """The main matcher class. After instantiating the patterns should + be added using the add_fixer method""" + + def __init__(self): + self.match = set() + self.root = BMNode() + self.nodes = [self.root] + self.fixers = [] + self.logger = logging.getLogger("RefactoringTool") + + def add_fixer(self, fixer): + """Reduces a fixer's pattern tree to a linear path and adds it + to the matcher(a common Aho-Corasick automaton). The fixer is + appended on the matching states and called when they are + reached""" + self.fixers.append(fixer) + tree = reduce_tree(fixer.pattern_tree) + linear = tree.get_linear_subpattern() + match_nodes = self.add(linear, start=self.root) + for match_node in match_nodes: + match_node.fixers.append(fixer) + + def add(self, pattern, start): + "Recursively adds a linear pattern to the AC automaton" + #print("adding pattern", pattern, "to", start) + if not pattern: + #print("empty pattern") + return [start] + if isinstance(pattern[0], tuple): + #alternatives + #print("alternatives") + match_nodes = [] + for alternative in pattern[0]: + #add all alternatives, and add the rest of the pattern + #to each end node + end_nodes = self.add(alternative, start=start) + for end in end_nodes: + match_nodes.extend(self.add(pattern[1:], end)) + return match_nodes + else: + #single token + #not last + if pattern[0] not in start.transition_table: + #transition did not exist, create new + next_node = BMNode() + start.transition_table[pattern[0]] = next_node + else: + #transition exists already, follow + next_node = start.transition_table[pattern[0]] + + if pattern[1:]: + end_nodes = self.add(pattern[1:], start=next_node) + else: + end_nodes = [next_node] + return end_nodes + + def run(self, leaves): + """The main interface with the bottom matcher. The tree is + traversed from the bottom using the constructed + automaton. Nodes are only checked once as the tree is + retraversed. When the automaton fails, we give it one more + shot(in case the above tree matches as a whole with the + rejected leaf), then we break for the next leaf. There is the + special case of multiple arguments(see code comments) where we + recheck the nodes + + Args: + The leaves of the AST tree to be matched + + Returns: + A dictionary of node matches with fixers as the keys + """ + current_ac_node = self.root + results = defaultdict(list) + for leaf in leaves: + current_ast_node = leaf + while current_ast_node: + current_ast_node.was_checked = True + for child in current_ast_node.children: + # multiple statements, recheck + if isinstance(child, pytree.Leaf) and child.value == ";": + current_ast_node.was_checked = False + break + if current_ast_node.type == 1: + #name + node_token = current_ast_node.value + else: + node_token = current_ast_node.type + + if node_token in current_ac_node.transition_table: + #token matches + current_ac_node = current_ac_node.transition_table[node_token] + for fixer in current_ac_node.fixers: + if not fixer in results: + results[fixer] = [] + results[fixer].append(current_ast_node) + + else: + #matching failed, reset automaton + current_ac_node = self.root + if (current_ast_node.parent is not None + and current_ast_node.parent.was_checked): + #the rest of the tree upwards has been checked, next leaf + break + + #recheck the rejected node once from the root + if node_token in current_ac_node.transition_table: + #token matches + current_ac_node = current_ac_node.transition_table[node_token] + for fixer in current_ac_node.fixers: + if not fixer in results.keys(): + results[fixer] = [] + results[fixer].append(current_ast_node) + + current_ast_node = current_ast_node.parent + return results + + def print_ac(self): + "Prints a graphviz diagram of the BM automaton(for debugging)" + print("digraph g{") + def print_node(node): + for subnode_key in node.transition_table.keys(): + subnode = node.transition_table[subnode_key] + print("%d -> %d [label=%s] //%s" % + (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers))) + if subnode_key == 1: + print(subnode.content) + print_node(subnode) + print_node(self.root) + print("}") + +# taken from pytree.py for debugging; only used by print_ac +_type_reprs = {} +def type_repr(type_num): + global _type_reprs + if not _type_reprs: + from .pygram import python_symbols + # printing tokens is possible but not as useful + # from .pgen2 import token // token.__dict__.items(): + for name, val in python_symbols.__dict__.items(): + if type(val) == int: _type_reprs[val] = name + return _type_reprs.setdefault(type_num, type_num) |