# -*- coding: utf-8 -*-
"""Collapse functions to supplement :mod:`pybel.struct.mutation.collapse`."""
import itertools as itt
import logging
from collections import defaultdict
import networkx as nx
from pybel import BELGraph
from pybel.constants import EQUIVALENT_TO, GENE, HAS_VARIANT, NAME, NAMESPACE, ORTHOLOGOUS, PROTEIN, RELATION
from pybel.dsl import BaseEntity, Gene, Protein
from pybel.struct.filters import build_relation_predicate, filter_edges, has_polarity
from pybel.struct.filters.typing import EdgePredicates
from pybel.struct.mutation import collapse_nodes, collapse_pair, collapse_to_genes, get_subgraph_by_edge_filter
from pybel.struct.pipeline import in_place_transformation, transformation
from pybel.typing import Strings
from tqdm import tqdm
from ..filters.edge_filters import build_source_namespace_filter, build_target_namespace_filter
from ..summary.edge_summary import pair_is_consistent
__all__ = [
'collapse_nodes',
'rewire_variants_to_genes',
'collapse_gene_variants',
'collapse_protein_variants',
'collapse_consistent_edges',
'collapse_equivalencies_by_namespace',
'collapse_orthologies_by_namespace',
'collapse_to_protein_interactions',
'collapse_nodes_with_same_names',
]
log = logging.getLogger(__name__)
[docs]@in_place_transformation
def collapse_protein_variants(graph: BELGraph) -> None:
"""Collapse all protein's variants' edges to their parents, in-place."""
_collapse_variants_by_function(graph, PROTEIN)
[docs]@in_place_transformation
def collapse_gene_variants(graph: BELGraph) -> None:
"""Collapse all gene's variants' edges to their parents, in-place."""
_collapse_variants_by_function(graph, GENE)
def _collapse_variants_by_function(graph: BELGraph, func: str) -> None:
"""Collapse all of the given functions' variants' edges to their parents, in-place."""
for parent_node, variant_node, data in graph.edges(data=True):
if data[RELATION] == HAS_VARIANT and parent_node.function == func:
collapse_pair(graph, from_node=variant_node, to_node=parent_node)
[docs]@in_place_transformation
def rewire_variants_to_genes(graph: BELGraph) -> None:
"""Find all protein variants that are pointing to a gene and not a protein and fixes them by changing their
function to be :data:`pybel.constants.GENE`, in place
A use case is after running :func:`collapse_to_genes`.
"""
mapping = {}
for node in graph:
if not isinstance(node, Protein) or not node.variants:
continue
mapping[node] = Gene(
name=node.name,
namespace=node.namespace,
identifier=node.identifier,
variants=node.variants,
)
nx.relabel_nodes(graph, mapping, copy=False)
def _collapse_edge_passing_predicates(graph: BELGraph, edge_predicates: EdgePredicates = None) -> None:
"""Collapse all edges passing the given edge predicates."""
for u, v, _ in filter_edges(graph, edge_predicates=edge_predicates):
collapse_pair(graph, survivor=u, victim=v)
def _collapse_edge_by_namespace(graph: BELGraph,
victim_namespaces: Strings,
survivor_namespaces: str,
relations: Strings) -> None:
"""Collapse pairs of nodes with the given namespaces that have the given relationship.
:param graph: A BEL Graph
:param victim_namespaces: The namespace(s) of the node to collapse
:param survivor_namespaces: The namespace of the node to keep
:param relations: The relation(s) to search
"""
relation_filter = build_relation_predicate(relations)
source_namespace_filter = build_source_namespace_filter(victim_namespaces)
target_namespace_filter = build_target_namespace_filter(survivor_namespaces)
edge_predicates = [
relation_filter,
source_namespace_filter,
target_namespace_filter
]
_collapse_edge_passing_predicates(graph, edge_predicates=edge_predicates)
[docs]@in_place_transformation
def collapse_equivalencies_by_namespace(graph: BELGraph, victim_namespace: Strings, survivor_namespace: str) -> None:
"""Collapse pairs of nodes with the given namespaces that have equivalence relationships.
:param graph: A BEL graph
:param victim_namespace: The namespace(s) of the node to collapse
:param survivor_namespace: The namespace of the node to keep
To convert all ChEBI names to InChI keys, assuming there are appropriate equivalence relations between nodes with
those namespaces:
>>> collapse_equivalencies_by_namespace(graph, 'CHEBI', 'CHEBIID')
>>> collapse_equivalencies_by_namespace(graph, 'CHEBIID', 'INCHI')
"""
_collapse_edge_by_namespace(graph, victim_namespace, survivor_namespace, EQUIVALENT_TO)
[docs]@in_place_transformation
def collapse_orthologies_by_namespace(graph: BELGraph, victim_namespace: Strings, survivor_namespace: str) -> None:
"""Collapse pairs of nodes with the given namespaces that have orthology relationships.
:param graph: A BEL Graph
:param victim_namespace: The namespace(s) of the node to collapse
:param survivor_namespace: The namespace of the node to keep
To collapse all MGI nodes to their HGNC orthologs, use:
>>> collapse_orthologies_by_namespace('MGI', 'HGNC')
To collapse collapse both MGI and RGD nodes to their HGNC orthologs, use:
>>> collapse_orthologies_by_namespace(['MGI', 'RGD'], 'HGNC')
"""
_collapse_edge_by_namespace(graph, victim_namespace, survivor_namespace, ORTHOLOGOUS)
@in_place_transformation
def collapse_entrez_to_hgnc(graph: BELGraph):
"""Collapse Entrez equivalences to HGNC."""
collapse_equivalencies_by_namespace(graph, ['EGID', 'EG', 'ENTREZ'], 'HGNC')
@in_place_transformation
def collapse_mgi_to_hgnc(graph: BELGraph):
"""Collapse MGI orthologies to HGNC."""
collapse_orthologies_by_namespace(graph, ['MGI', 'MGIID'], 'HGNC')
@in_place_transformation
def collapse_rgd_to_hgnc(graph: BELGraph):
"""Collapse RGD orthologies to HGNC."""
collapse_orthologies_by_namespace(graph, ['RGD', 'RGDID'], 'HGNC')
@in_place_transformation
def collapse_flybase_to_hgnc(graph: BELGraph):
"""Collapse FlyBase orthologies to HGNC."""
collapse_orthologies_by_namespace(graph, 'FLYBASE', 'HGNC')
@in_place_transformation
def collapse_entrez_equivalencies(graph: BELGraph):
"""Collapse all equivalence edges away from Entrez. Assumes well formed, 2-way equivalencies."""
relation_filter = build_relation_predicate(EQUIVALENT_TO)
source_namespace_filter = build_source_namespace_filter(['EGID', 'EG', 'ENTREZ'])
edge_predicates = [
relation_filter,
source_namespace_filter,
]
_collapse_edge_passing_predicates(graph, edge_predicates=edge_predicates)
[docs]@in_place_transformation
def collapse_consistent_edges(graph: BELGraph):
"""Collapse consistent edges together.
.. warning:: This operation doesn't preserve evidences or other annotations
"""
for u, v in graph.edges():
relation = pair_is_consistent(graph, u, v)
if not relation:
continue
edges = [(u, v, k) for k in graph[u][v]]
graph.remove_edges_from(edges)
graph.add_edge(u, v, attr_dict={RELATION: relation})
[docs]@transformation
def collapse_to_protein_interactions(graph: BELGraph) -> BELGraph:
"""Collapse to a graph made of only causal gene/protein edges."""
rv: BELGraph = graph.copy()
collapse_to_genes(rv)
def is_edge_ppi(_: BELGraph, u: BaseEntity, v: BaseEntity, __: str) -> bool:
"""Check if an edge is a PPI."""
return isinstance(u, Gene) and isinstance(v, Gene)
return get_subgraph_by_edge_filter(rv, edge_predicates=[has_polarity, is_edge_ppi])
[docs]@in_place_transformation
def collapse_nodes_with_same_names(graph: BELGraph) -> None:
"""Collapse all nodes with the same name, merging namespaces by picking first alphabetical one."""
survivor_mapping = defaultdict(set) # Collapse mapping dict
victims = set() # Things already mapped while iterating
it = tqdm(itt.combinations(graph, r=2), total=graph.number_of_nodes() * (graph.number_of_nodes() - 1) / 2)
for a, b in it:
if b in victims:
continue
a_name, b_name = a.get(NAME), b.get(NAME)
if not a_name or not b_name or a_name.lower() != b_name.lower():
continue
if a.keys() != b.keys(): # not same version (might have variants)
continue
# Ensure that the values in the keys are also the same
for k in set(a.keys()) - {NAME, NAMESPACE}:
if a[k] != b[k]: # something different
continue
survivor_mapping[a].add(b)
# Keep track of things that has been already mapped
victims.add(b)
collapse_nodes(graph, survivor_mapping)