| """A bottom-up tree matching algorithm implementation meant to speed | |
| up 2to3's matching process. After the tree patterns are reduced to | |
| their rarest linear path, a linear Aho-Corasick automaton is | |
| created. The linear automaton traverses the linear paths from the | |
| leaves to the root of the AST and returns a set of nodes for further | |
| matching. This reduces significantly the number of candidate nodes.""" | |
| __author__ = "George Boutsioukis <gboutsioukis@gmail.com>" | |
| import logging | |
| import itertools | |
| from collections import defaultdict | |
| from . import pytree | |
| from .btm_utils import reduce_tree | |
| class BMNode(object): | |
| """Class for a node of the Aho-Corasick automaton used in matching""" | |
| count = itertools.count() | |
| def __init__(self): | |
| self.transition_table = {} | |
| self.fixers = [] | |
| self.id = next(BMNode.count) | |
| self.content = '' | |
| class BottomMatcher(object): | |
| """The main matcher class. After instantiating the patterns should | |
| be added using the add_fixer method""" | |
| def __init__(self): | |
| self.match = set() | |
| self.root = BMNode() | |
| self.nodes = [self.root] | |
| self.fixers = [] | |
| self.logger = logging.getLogger("RefactoringTool") | |
| def add_fixer(self, fixer): | |
| """Reduces a fixer's pattern tree to a linear path and adds it | |
| to the matcher(a common Aho-Corasick automaton). The fixer is | |
| appended on the matching states and called when they are | |
| reached""" | |
| self.fixers.append(fixer) | |
| tree = reduce_tree(fixer.pattern_tree) | |
| linear = tree.get_linear_subpattern() | |
| match_nodes = self.add(linear, start=self.root) | |
| for match_node in match_nodes: | |
| match_node.fixers.append(fixer) | |
| def add(self, pattern, start): | |
| "Recursively adds a linear pattern to the AC automaton" | |
| #print("adding pattern", pattern, "to", start) | |
| if not pattern: | |
| #print("empty pattern") | |
| return [start] | |
| if isinstance(pattern[0], tuple): | |
| #alternatives | |
| #print("alternatives") | |
| match_nodes = [] | |
| for alternative in pattern[0]: | |
| #add all alternatives, and add the rest of the pattern | |
| #to each end node | |
| end_nodes = self.add(alternative, start=start) | |
| for end in end_nodes: | |
| match_nodes.extend(self.add(pattern[1:], end)) | |
| return match_nodes | |
| else: | |
| #single token | |
| #not last | |
| if pattern[0] not in start.transition_table: | |
| #transition did not exist, create new | |
| next_node = BMNode() | |
| start.transition_table[pattern[0]] = next_node | |
| else: | |
| #transition exists already, follow | |
| next_node = start.transition_table[pattern[0]] | |
| if pattern[1:]: | |
| end_nodes = self.add(pattern[1:], start=next_node) | |
| else: | |
| end_nodes = [next_node] | |
| return end_nodes | |
| def run(self, leaves): | |
| """The main interface with the bottom matcher. The tree is | |
| traversed from the bottom using the constructed | |
| automaton. Nodes are only checked once as the tree is | |
| retraversed. When the automaton fails, we give it one more | |
| shot(in case the above tree matches as a whole with the | |
| rejected leaf), then we break for the next leaf. There is the | |
| special case of multiple arguments(see code comments) where we | |
| recheck the nodes | |
| Args: | |
| The leaves of the AST tree to be matched | |
| Returns: | |
| A dictionary of node matches with fixers as the keys | |
| """ | |
| current_ac_node = self.root | |
| results = defaultdict(list) | |
| for leaf in leaves: | |
| current_ast_node = leaf | |
| while current_ast_node: | |
| current_ast_node.was_checked = True | |
| for child in current_ast_node.children: | |
| # multiple statements, recheck | |
| if isinstance(child, pytree.Leaf) and child.value == u";": | |
| current_ast_node.was_checked = False | |
| break | |
| if current_ast_node.type == 1: | |
| #name | |
| node_token = current_ast_node.value | |
| else: | |
| node_token = current_ast_node.type | |
| if node_token in current_ac_node.transition_table: | |
| #token matches | |
| current_ac_node = current_ac_node.transition_table[node_token] | |
| for fixer in current_ac_node.fixers: | |
| if not fixer in results: | |
| results[fixer] = [] | |
| results[fixer].append(current_ast_node) | |
| else: | |
| #matching failed, reset automaton | |
| current_ac_node = self.root | |
| if (current_ast_node.parent is not None | |
| and current_ast_node.parent.was_checked): | |
| #the rest of the tree upwards has been checked, next leaf | |
| break | |
| #recheck the rejected node once from the root | |
| if node_token in current_ac_node.transition_table: | |
| #token matches | |
| current_ac_node = current_ac_node.transition_table[node_token] | |
| for fixer in current_ac_node.fixers: | |
| if not fixer in results.keys(): | |
| results[fixer] = [] | |
| results[fixer].append(current_ast_node) | |
| current_ast_node = current_ast_node.parent | |
| return results | |
| def print_ac(self): | |
| "Prints a graphviz diagram of the BM automaton(for debugging)" | |
| print("digraph g{") | |
| def print_node(node): | |
| for subnode_key in node.transition_table.keys(): | |
| subnode = node.transition_table[subnode_key] | |
| print("%d -> %d [label=%s] //%s" % | |
| (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers))) | |
| if subnode_key == 1: | |
| print(subnode.content) | |
| print_node(subnode) | |
| print_node(self.root) | |
| print("}") | |
| # taken from pytree.py for debugging; only used by print_ac | |
| _type_reprs = {} | |
| def type_repr(type_num): | |
| global _type_reprs | |
| if not _type_reprs: | |
| from .pygram import python_symbols | |
| # printing tokens is possible but not as useful | |
| # from .pgen2 import token // token.__dict__.items(): | |
| for name, val in python_symbols.__dict__.items(): | |
| if type(val) == int: _type_reprs[val] = name | |
| return _type_reprs.setdefault(type_num, type_num) |