# -*- coding: utf-8 -*-
"""This module contains functions that provide summaries of the edges in a graph"""
from collections import Counter, defaultdict
import itertools as itt
from pybel.constants import (
ANNOTATIONS, CAUSAL_DECREASE_RELATIONS, CAUSAL_INCREASE_RELATIONS, CAUSES_NO_CHANGE,
FUNCTION, PATHOLOGY, RELATION,
)
from pybel.struct.filters.edge_predicates import edge_has_annotation
from pybel.struct.filters.node_predicates import keep_node_permissive
from pybel.struct.summary import (
count_relations, get_annotation_values, iter_annotation_value_pairs,
iter_annotation_values,
)
__all__ = [
'count_relations',
'get_edge_relations',
'count_unique_relations',
'count_annotations',
'get_annotations',
'get_annotations_containing_keyword',
'count_annotation_values',
'count_annotation_values_filtered',
'pair_is_consistent',
'get_consistent_edges',
'pair_has_contradiction',
'get_contradictory_pairs',
'count_pathologies',
'relation_set_has_contradictions',
'get_unused_annotations',
'get_unused_list_annotation_values',
]
def group_dict_set(iterator):
"""Makes a dict that accumulates the values for each key in an iterator of doubles
:param iter[tuple[A,B]] iterator: An iterator
:rtype: dict[A,set[B]]
"""
d = defaultdict(set)
for key, value in iterator:
d[key].add(value)
return dict(d)
[docs]def get_edge_relations(graph):
"""Builds a dictionary of {node pair: set of edge types}
:param pybel.BELGraph graph: A BEL graph
:return: A dictionary of {(node, node): set of edge types}
:rtype: dict[tuple[tuple,tuple],set[str]]
"""
return group_dict_set(
((u, v), d[RELATION])
for u, v, d in graph.edges_iter(data=True)
)
[docs]def count_unique_relations(graph):
"""Returns a histogram of the different types of relations present in a graph.
Note: this operation only counts each type of edge once for each pair of nodes
:param pybel.BELGraph graph: A BEL graph
:return: Counter from {relation type: frequency}
:rtype: collections.Counter
"""
return Counter(itt.chain.from_iterable(get_edge_relations(graph).values()))
def _annotation_iter_helper(graph):
"""Iterates over the annotation keys
:param pybel.BELGraph graph:
:rtype: iter[str]
"""
return (
key
for _, _, data in graph.edges(data=True)
if ANNOTATIONS in data
for key in data[ANNOTATIONS]
)
[docs]def count_annotations(graph):
"""Counts how many times each annotation is used in the graph
:param pybel.BELGraph graph: A BEL graph
:return: A Counter from {annotation key: frequency}
:rtype: collections.Counter
"""
return Counter(_annotation_iter_helper(graph))
[docs]def get_annotations(graph):
"""Gets the set of annotations used in the graph
:param pybel.BELGraph graph: A BEL graph
:return: A set of annotation keys
:rtype: set[str]
"""
return set(_annotation_iter_helper(graph))
[docs]def get_unused_annotations(graph):
"""Gets the set of all annotations that are defined in a graph, but are never used.
:param pybel.BELGraph graph: A BEL graph
:return: A set of annotations
:rtype: set[str]
"""
return graph.defined_annotation_keywords - get_annotations(graph)
[docs]def get_unused_list_annotation_values(graph):
"""Gets all of the unused values for list annotations
:param pybel.BELGraph graph: A BEL graph
:return: A dictionary of {str annotation: set of str values that aren't used}
:rtype: dict[str, set[str]]
"""
result = {}
for annotation, values in graph.annotation_list.items():
used_values = get_annotation_values(graph, annotation)
if len(used_values) == len(values): # all values have been used
continue
result[annotation] = set(values) - used_values
return result
[docs]def get_annotations_containing_keyword(graph, keyword):
"""Gets annotation/value pairs for values for whom the search string is a substring
:param pybel.BELGraph graph: A BEL graph
:param str keyword: Search for annotations whose values have this as a substring
:rtype: list[dict[str,str]
"""
return [
{
'annotation': annotation,
'value': value
}
for annotation, value in iter_annotation_value_pairs(graph)
if keyword.lower() in value.lower()
]
[docs]def count_annotation_values(graph, annotation):
"""Counts in how many edges each annotation appears in a graph
:param pybel.BELGraph graph: A BEL graph
:param str annotation: The annotation to count
:return: A Counter from {annotation value: frequency}
:rtype: collections.Counter
"""
return Counter(iter_annotation_values(graph, annotation))
[docs]def count_annotation_values_filtered(graph, annotation, source_filter=None, target_filter=None):
"""Counts in how many edges each annotation appears in a graph, but filter out source nodes and target nodes
See :func:`pybel_tools.utils.keep_node` for a basic filter.
:param pybel.BELGraph graph: A BEL graph
:param str annotation: The annotation to count
:param source_filter: A predicate (graph, node) -> bool for keeping source nodes
:type source_filter: types.FunctionType
:param target_filter: A predicate (graph, node) -> bool for keeping target nodes
:type target_filter: types.FunctionType
:return: A Counter from {annotation value: frequency}
:rtype: Counter
"""
source_filter = keep_node_permissive if source_filter is None else source_filter
target_filter = keep_node_permissive if target_filter is None else target_filter
return Counter(
data[ANNOTATIONS][annotation]
for u, v, data in graph.edges_iter(data=True)
if edge_has_annotation(data, annotation) and source_filter(graph, u) and target_filter(graph, v)
)
[docs]def pair_is_consistent(graph, u, v):
"""Return if the edges between the given nodes are consistent, meaning they all have the same relation.
:param pybel.BELGraph graph: A BEL graph
:param tuple u: The source BEL node
:param tuple v: The target BEL node
:return: If the edges aren't consistent, return false, otherwise return the relation type
:rtype: bool or str
"""
relations = {data[RELATION] for data in graph[u][v].values()}
if 1 != len(relations):
return False
return list(relations)[0]
[docs]def relation_set_has_contradictions(relations):
"""Return if the set of relations contains a contradiction.
:param set[str] relations: A set of relations
:rtype: bool
"""
has_increases = any(relation in CAUSAL_INCREASE_RELATIONS for relation in relations)
has_decreases = any(relation in CAUSAL_DECREASE_RELATIONS for relation in relations)
has_cnc = any(relation == CAUSES_NO_CHANGE for relation in relations)
return 1 < sum([has_cnc, has_decreases, has_increases])
[docs]def pair_has_contradiction(graph, u, v):
"""Checks if a pair of nodes has any contradictions in their causal relationships.
:param pybel.BELGraph graph: A BEL graph
:param tuple u: The source BEL node
:param tuple v: The target BEL node
:return: Do the edges between these nodes have a contradiction?
:rtype: bool
"""
relations = {data[RELATION] for data in graph[u][v].values()}
return relation_set_has_contradictions(relations)
[docs]def get_contradictory_pairs(graph):
"""Iterates over contradictory node pairs in the graph based on their causal relationships
:param pybel.BELGraph graph: A BEL graph
:return: An iterator over (source, target) node pairs that have contradictory causal edges
:rtype: iter
"""
for u, v in graph.edges():
if pair_has_contradiction(graph, u, v):
yield u, v
[docs]def get_consistent_edges(graph):
"""Yields pairs of (source node, target node) for which all of their edges have the same type of relation.
:param pybel.BELGraph graph: A BEL graph
:return: An iterator over (source, target) node pairs corresponding to edges with many inconsistent relations
:rtype: iter[tuple]
"""
for u, v in graph.edges():
if pair_is_consistent(graph, u, v):
yield u, v
def _pathology_iterator(graph):
"""Iterates over the diseases encountered in edges
:param pybel.BELGraph graph: A BEL graph
:rtype: iter
"""
for node in itt.chain.from_iterable(graph.edges()):
if graph.node[node][FUNCTION] == PATHOLOGY:
yield node
[docs]def count_pathologies(graph):
"""Returns a counter of all of the mentions of pathologies in a network
:param pybel.BELGraph graph: A BEL graph
:rtype: Counter
"""
return Counter(_pathology_iterator(graph))