Source code for pybel_tools.selection.group_nodes

# -*- coding: utf-8 -*-

from collections import defaultdict
from typing import Callable, Iterable, Mapping, Optional, Set, TypeVar

from pybel import BELGraph
from pybel.constants import *
from pybel.dsl import BaseEntity
from pybel.struct.filters.edge_predicates import edge_has_annotation
from pybel.struct.filters.node_filters import concatenate_node_predicates
from pybel.struct.filters.typing import NodePredicates

__all__ = [
    'group_nodes_by_annotation',
    'average_node_annotation',
    'group_nodes_by_annotation_filtered'
]

X = TypeVar('X')


[docs]def group_nodes_by_annotation(graph: BELGraph, annotation: str = 'Subgraph') -> Mapping[str, Set[BaseEntity]]:
    """Group the nodes occurring in edges by the given annotation."""
    result = defaultdict(set)

    for u, v, d in graph.edges(data=True):
        if not edge_has_annotation(d, annotation):
            continue

        result[d[ANNOTATIONS][annotation]].add(u)
        result[d[ANNOTATIONS][annotation]].add(v)

    return dict(result)


[docs]def average_node_annotation(graph: BELGraph,
                            key: str,
                            annotation: str = 'Subgraph',
                            aggregator: Optional[Callable[[Iterable[X]], X]] = None,
                            ) -> Mapping[str, X]:
    """Groups graph into subgraphs and assigns each subgraph a score based on the average of all nodes values
    for the given node key

    :param pybel.BELGraph graph: A BEL graph
    :param key: The key in the node data dictionary representing the experimental data
    :param annotation: A BEL annotation to use to group nodes
    :param aggregator: A function from list of values -> aggregate value. Defaults to taking the average of a list of
                       floats.
    :type aggregator: lambda
    """

    if aggregator is None:
        def aggregator(x):
            """Calculates the average"""
            return sum(x) / len(x)

    result = {}

    for subgraph, nodes in group_nodes_by_annotation(graph, annotation).items():
        values = [graph.nodes[node][key] for node in nodes if key in graph.nodes[node]]
        result[subgraph] = aggregator(values)

    return result


[docs]def group_nodes_by_annotation_filtered(graph: BELGraph,
                                       node_predicates: NodePredicates = None,
                                       annotation: str = 'Subgraph',
                                       ) -> Mapping[str, Set[BaseEntity]]:
    """Group the nodes occurring in edges by the given annotation, with a node filter applied.

    :param graph: A BEL graph
    :param node_predicates: A predicate or list of predicates (graph, node) -> bool
    :param annotation: The annotation to use for grouping
    :return: A dictionary of {annotation value: set of nodes}
    """
    node_filter = concatenate_node_predicates(node_predicates)

    return {
        key: {
            node
            for node in nodes
            if node_filter(graph, node)
        }
        for key, nodes in group_nodes_by_annotation(graph, annotation).items()
    }


def get_mapped_nodes(graph: BELGraph, namespace: str, names: Iterable[str]) -> Mapping[BaseEntity, Set[BaseEntity]]:
    """Return a dict with keys: nodes that match the namespace and in names and values other nodes (complexes, variants, orthologous...) or this node.
    
    :param graph: A BEL graph
    :param namespace: The namespace to search
    :param names: List or set of values from which we want to map nodes from
    :return: Main node to variants/groups.
    """
    parent_to_variants = defaultdict(set)
    names = set(names)

    for u, v, d in graph.edges(data=True):
        if d[RELATION] in {HAS_MEMBER, HAS_COMPONENT} and v.get(NAMESPACE) == namespace and v.get(NAME) in names:
            parent_to_variants[v].add(u)

        elif d[RELATION] == HAS_VARIANT and u.get(NAMESPACE) == namespace and u.get(NAME) in names:
            parent_to_variants[u].add(v)

        elif d[RELATION] == ORTHOLOGOUS and u.get(NAMESPACE) == namespace and u.get(NAME) in names:
            parent_to_variants[u].add(v)

    return dict(parent_to_variants)