Source code for pybel_tools.analysis.neurommsig.algorithm

# -*- coding: utf-8 -*-

"""An implementation of the NeuroMMSig mechanism enrichment algorithm [DomingoFernandez2017]_.

.. [DomingoFernandez2017] Domingo-Fernández, D., *et al* (2017). `Multimodal mechanistic signatures for
    neurodegenerative diseases (NeuroMMSig): A web server for mechanism enrichment
    <https://doi.org/10.1093/bioinformatics/btx399>`_. Bioinformatics, 33(22), 3679–3681.
"""

import itertools as itt
import logging
from collections import Counter

from pybel import Pipeline
from pybel.constants import GENE
from pybel.struct import (
    collapse_all_variants, collapse_to_genes, enrich_protein_and_rna_origins, get_nodes_by_function,
    get_subgraphs_by_annotation,
)
from ...utils import calculate_betweenness_centality

__all__ = [
    'neurommsig_graph_preprocessor',
    'get_neurommsig_scores_prestratified',
    'get_neurommsig_scores',
    'get_neurommsig_score',
]

log = logging.getLogger(__name__)

neurommsig_graph_preprocessor = Pipeline.from_functions([
    enrich_protein_and_rna_origins,
    collapse_to_genes,
    collapse_all_variants,
])


[docs]def get_neurommsig_scores_prestratified(it, genes, ora_weight=None, hub_weight=None, top=None, topology_weight=None): """Takes a graph stratification and runs neurommsig on each :param iter[tuple[str,pybel.BELGraph]] it: A pre-stratified set of graphs :param list[tuple] genes: A list of gene nodes :param Optional[float] ora_weight: The relative weight of the over-enrichment analysis score from :py:func:`neurommsig_gene_ora`. Defaults to 1.0. :param Optional[float] hub_weight: The relative weight of the hub analysis score from :py:func:`neurommsig_hubs`. Defaults to 1.0. :param Optional[float] top: The percentage of top genes to use as hubs. Defaults to 5% (0.05). :param Optional[float] topology_weight: The relative weight of the topolgical analysis core from :py:func:`neurommsig_topology`. Defaults to 1.0. :param bool preprocess: If true, preprocess the graph. :return: A dictionary from {annotation value: NeuroMMSig composite score} :rtype: Optional[dict[str, float]] Pre-processing steps: 1. Infer the central dogma with :func:`` 2. Collapse all proteins, RNAs and miRNAs to genes with :func:`` 3. Collapse variants to genes with :func:`` """ rv = {} for annotation_value, subgraph in it: score = get_neurommsig_score(subgraph, genes, ora_weight=ora_weight, hub_weight=hub_weight, top=top, topology_weight=topology_weight) rv[annotation_value] = score return rv
[docs]def get_neurommsig_scores(graph, genes, annotation='Subgraph', ora_weight=None, hub_weight=None, top=None, topology_weight=None, preprocess=False): """Preprocesses the graph, stratifies by the given annotation, then runs the NeuroMMSig algorithm on each. :param pybel.BELGraph graph: A BEL graph :param list[tuple] genes: A list of gene nodes :param str annotation: The annotation to use to stratify the graph to subgraphs :param Optional[float] ora_weight: The relative weight of the over-enrichment analysis score from :py:func:`neurommsig_gene_ora`. Defaults to 1.0. :param Optional[float] hub_weight: The relative weight of the hub analysis score from :py:func:`neurommsig_hubs`. Defaults to 1.0. :param Optional[float] top: The percentage of top genes to use as hubs. Defaults to 5% (0.05). :param Optional[float] topology_weight: The relative weight of the topolgical analysis core from :py:func:`neurommsig_topology`. Defaults to 1.0. :param bool preprocess: If true, preprocess the graph. :return: A dictionary from {annotation value: NeuroMMSig composite score} :rtype: Optional[dict[str, float]] Pre-processing steps: 1. Infer the central dogma with :func:`` 2. Collapse all proteins, RNAs and miRNAs to genes with :func:`` 3. Collapse variants to genes with :func:`` """ if preprocess: graph = neurommsig_graph_preprocessor.run(graph) if not any(gene in graph for gene in genes): log.debug('no genes mapping to graph') return it = get_subgraphs_by_annotation(graph, annotation=annotation).items() return get_neurommsig_scores_prestratified(it, genes, ora_weight=ora_weight, hub_weight=hub_weight, top=top, topology_weight=topology_weight)
[docs]def get_neurommsig_score(graph, target_genes, ora_weight=None, hub_weight=None, top=None, topology_weight=None): """Calculates the composite NeuroMMSig Score for a given list of genes. :param pybel.BELGraph graph: A BEL graph :param list[tuple] target_genes: A list of gene nodes :param Optional[float] ora_weight: The relative weight of the over-enrichment analysis score from :py:func:`neurommsig_gene_ora`. Defaults to 1.0. :param Optional[float] hub_weight: The relative weight of the hub analysis score from :py:func:`neurommsig_hubs`. Defaults to 1.0. :param Optional[float] top: The percentage of top genes to use as hubs. Defaults to 5% (0.05). :param Optional[float] topology_weight: The relative weight of the topolgical analysis core from :py:func:`neurommsig_topology`. Defaults to 1.0. :return: The NeuroMMSig composite score :rtype: float """ ora_weight = ora_weight or 1.0 hub_weight = hub_weight or 1.0 topology_weight = topology_weight or 1.0 target_genes = list(target_genes) ora_score = neurommsig_gene_ora(graph, target_genes) hub_score = neurommsig_hubs(graph, target_genes, top=top) topology_score = neurommsig_topology(graph, target_genes) weighted_sum = ora_weight * ora_score + hub_weight * hub_score + topology_weight * topology_score total_weight = ora_weight + hub_weight + topology_weight return weighted_sum / total_weight
def neurommsig_gene_ora(graph, target_genes): """Calculates the percentage of target genes mappable to the graph Assume: graph central dogma inferred, collapsed to genes, collapsed variants :param pybel.BELGraph graph: A BEL graph :param iter target_genes: An iterable of nodes :rtype: float """ graph_genes = set(get_nodes_by_function(graph, GENE)) return len(graph_genes.intersection(target_genes)) / len(graph_genes) def neurommsig_hubs(graph, target_genes, top=None): """Calculates the percentage of target genes mappable to the graph Assume: graph central dogma inferred, collapsed to genes, collapsed variants, graph has more than 20 nodes :param pybel.BELGraph graph: A BEL graph :param iter[tuple] target_genes: A list of nodes :param Optional[float] top: The percentage of top genes to use as hubs. Defaults to 5% (0.05). :rtype: float """ top = top or 0.05 if graph.number_of_nodes() < 20: log.debug('Graph has less than 20 nodes') return 0.0 graph_genes = set(get_nodes_by_function(graph, GENE)) bc = Counter({ node: betweenness_centrality for node, betweenness_centrality in calculate_betweenness_centality(graph).items() if node in graph_genes }) # TODO consider continuous analog with weighting by percentile n = int(len(graph_genes) * top) if n < 1: n = 1 unnormalized_sum = sum( node in target_genes for node in bc.most_common(n) ) return unnormalized_sum / n def neurommsig_topology(graph, nodes): """Calculates the node neighbor score for a given list of nodes. - Doesn't consider self loops :param pybel.BELGraph graph: A BEL graph :param list[tuple] nodes: A list of nodes :rtype: float .. math:: \frac{\sum_i^n N_G[i]}{n*(n-1)} """ nodes = list(nodes) n = len(nodes) if n <= 1: # log.debug('') return 0.0 unnormalized_sum = sum( u in graph[v] for u, v in itt.product(nodes, repeat=2) if v in graph and u != v ) return unnormalized_sum / (n * (n - 1.0))