Lib/lib2to3/btm_utils.py
"Utility functions used by the btm_matcher module"

from . import pytree
from .pgen2 import grammar, token
from .pygram import pattern_symbols, python_symbols

syms = pattern_symbols
pysyms = python_symbols
tokens = grammar.opmap
token_labels = token

TYPE_ANY = -1
TYPE_ALTERNATIVES = -2
TYPE_GROUP = -3

class MinNode(object):
    """This class serves as an intermediate representation of the
    pattern tree during the conversion to sets of leaf-to-root
    subpatterns"""

    def __init__(self, type=None, name=None):
        self.type = type
        self.name = name
        self.children = []
        self.leaf = False
        self.parent = None
        self.alternatives = []
        self.group = []

    def __repr__(self):
        return str(self.type) + ' ' + str(self.name)

    def leaf_to_root(self):
        """Internal method. Returns a characteristic path of the
        pattern tree. This method must be run for all leaves until the
        linear subpatterns are merged into a single"""
        node = self
        subp = []
        while node:
            if node.type == TYPE_ALTERNATIVES:
                node.alternatives.append(subp)
                if len(node.alternatives) == len(node.children):
                    #last alternative
                    subp = [tuple(node.alternatives)]
                    node.alternatives = []
                    node = node.parent
                    continue
                else:
                    node = node.parent
                    subp = None
                    break

            if node.type == TYPE_GROUP:
                node.group.append(subp)
                #probably should check the number of leaves
                if len(node.group) == len(node.children):
                    subp = get_characteristic_subpattern(node.group)
                    node.group = []
                    node = node.parent
                    continue
                else:
                    node = node.parent
                    subp = None
                    break

            if node.type == token_labels.NAME and node.name:
                #in case of type=name, use the name instead
                subp.append(node.name)
            else:
                subp.append(node.type)

            node = node.parent
        return subp

    def get_linear_subpattern(self):
        """Drives the leaf_to_root method. The reason that
        leaf_to_root must be run multiple times is because we need to
        reject 'group' matches; for example the alternative form
        (a | b c) creates a group [b c] that needs to be matched. Since
        matching multiple linear patterns overcomes the automaton's
        capabilities, leaf_to_root merges each group into a single
        choice based on 'characteristic'ity,

        i.e. (a|b c) -> (a|b) if b more characteristic than c

        Returns: The most 'characteristic'(as defined by
          get_characteristic_subpattern) path for the compiled pattern
          tree.
        """

        for l in self.leaves():
            subp = l.leaf_to_root()
            if subp:
                return subp

    def leaves(self):
        "Generator that returns the leaves of the tree"
        for child in self.children:
            for x in child.leaves():
                yield x
        if not self.children:
            yield self

def reduce_tree(node, parent=None):
    """
    Internal function. Reduces a compiled pattern tree to an
    intermediate representation suitable for feeding the
    automaton. This also trims off any optional pattern elements(like
    [a], a*).
    """

    new_node = None
    #switch on the node type
    if node.type == syms.Matcher:
        #skip
        node = node.children[0]

    if node.type == syms.Alternatives  :
        #2 cases
        if len(node.children) <= 2:
            #just a single 'Alternative', skip this node
            new_node = reduce_tree(node.children[0], parent)
        else:
            #real alternatives
            new_node = MinNode(type=TYPE_ALTERNATIVES)
            #skip odd children('|' tokens)
            for child in node.children:
                if node.children.index(child)%2:
                    continue
                reduced = reduce_tree(child, new_node)
                if reduced is not None:
                    new_node.children.append(reduced)
    elif node.type == syms.Alternative:
        if len(node.children) > 1:

            new_node = MinNode(type=TYPE_GROUP)
            for child in node.children:
                reduced = reduce_tree(child, new_node)
                if reduced:
                    new_node.children.append(reduced)
            if not new_node.children:
                # delete the group if all of the children were reduced to None
                new_node = None

        else:
            new_node = reduce_tree(node.children[0], parent)

    elif node.type == syms.Unit:
        if (isinstance(node.children[0], pytree.Leaf) and
            node.children[0].value == '('):
            #skip parentheses
            return reduce_tree(node.children[1], parent)
        if ((isinstance(node.children[0], pytree.Leaf) and
               node.children[0].value == '[')
               or
               (len(node.children)>1 and
               hasattr(node.children[1], "value") and
               node.children[1].value == '[')):
            #skip whole unit if its optional
            return None

        leaf = True
        details_node = None
        alternatives_node = None
        has_repeater = False
        repeater_node = None
        has_variable_name = False

        for child in node.children:
            if child.type == syms.Details:
                leaf = False
                details_node = child
            elif child.type == syms.Repeater:
                has_repeater = True
                repeater_node = child
            elif child.type == syms.Alternatives:
                alternatives_node = child
            if hasattr(child, 'value') and child.value == '=': # variable name
                has_variable_name = True

        #skip variable name
        if has_variable_name:
            #skip variable name, '='
            name_leaf = node.children[2]
            if hasattr(name_leaf, 'value') and name_leaf.value == '(':
                # skip parenthesis
                name_leaf = node.children[3]
        else:
            name_leaf = node.children[0]

        #set node type
        if name_leaf.type == token_labels.NAME:
            #(python) non-name or wildcard
            if name_leaf.value == 'any':
                new_node = MinNode(type=TYPE_ANY)
            else:
                if hasattr(token_labels, name_leaf.value):
                    new_node = MinNode(type=getattr(token_labels, name_leaf.value))
                else:
                    new_node = MinNode(type=getattr(pysyms, name_leaf.value))

        elif name_leaf.type == token_labels.STRING:
            #(python) name or character; remove the apostrophes from
            #the string value
            name = name_leaf.value.strip("'")
            if name in tokens:
                new_node = MinNode(type=tokens[name])
            else:
                new_node = MinNode(type=token_labels.NAME, name=name)
        elif name_leaf.type == syms.Alternatives:
            new_node = reduce_tree(alternatives_node, parent)

        #handle repeaters
        if has_repeater:
            if repeater_node.children[0].value == '*':
                #reduce to None
                new_node = None
            elif repeater_node.children[0].value == '+':
                #reduce to a single occurence i.e. do nothing
                pass
            else:
                #TODO: handle {min, max} repeaters
                raise NotImplementedError
                pass

        #add children
        if details_node and new_node is not None:
            for child in details_node.children[1:-1]:
                #skip '<', '>' markers
                reduced = reduce_tree(child, new_node)
                if reduced is not None:
                    new_node.children.append(reduced)
    if new_node:
        new_node.parent = parent
    return new_node


def get_characteristic_subpattern(subpatterns):
    """Picks the most characteristic from a list of linear patterns
    Current order used is:
    names > common_names > common_chars
    """
    if not isinstance(subpatterns, list):
        return subpatterns
    if len(subpatterns)==1:
        return subpatterns[0]

    # first pick out the ones containing variable names
    subpatterns_with_names = []
    subpatterns_with_common_names = []
    common_names = ['in', 'for', 'if' , 'not', 'None']
    subpatterns_with_common_chars = []
    common_chars = "[]().,:"
    for subpattern in subpatterns:
        if any(rec_test(subpattern, lambda x: type(x) is str)):
            if any(rec_test(subpattern,
                            lambda x: isinstance(x, str) and x in common_chars)):
                subpatterns_with_common_chars.append(subpattern)
            elif any(rec_test(subpattern,
                              lambda x: isinstance(x, str) and x in common_names)):
                subpatterns_with_common_names.append(subpattern)

            else:
                subpatterns_with_names.append(subpattern)

    if subpatterns_with_names:
        subpatterns = subpatterns_with_names
    elif subpatterns_with_common_names:
        subpatterns = subpatterns_with_common_names
    elif subpatterns_with_common_chars:
        subpatterns = subpatterns_with_common_chars
    # of the remaining subpatterns pick out the longest one
    return max(subpatterns, key=len)

def rec_test(sequence, test_func):
    """Tests test_func on all items of sequence and items of included
    sub-iterables"""
    for x in sequence:
        if isinstance(x, (list, tuple)):
            for y in rec_test(x, test_func):
                yield y
        else:
            yield test_func(x)