Source code for pybel_tools.summary.error_summary

# -*- coding: utf-8 -*-

"""This module contains functions that provide summaries of the errors encountered while parsing a BEL script"""

from collections import Counter, defaultdict

from pybel.constants import ANNOTATIONS
from pybel.parser.exc import *
from pybel.struct.filters.edge_predicates import edge_has_annotation
from pybel.struct.summary.node_summary import get_names_by_namespace, get_namespaces
from ..utils import count_dict_values

__all__ = [
    'count_error_types',
    'count_naked_names',
    'get_naked_names',
    'get_incorrect_names_by_namespace',
    'get_incorrect_names',
    'get_undefined_namespaces',
    'get_undefined_namespace_names',
    'calculate_incorrect_name_dict',
    'calculate_error_by_annotation',
    'group_errors',
    'get_names_including_errors',
    'get_names_including_errors_by_namespace',
    'get_undefined_annotations',
    'get_namespaces_with_incorrect_names',
    'get_most_common_errors',
]


# TODO replace with pybel.struct.summary.count_error_types
[docs]def count_error_types(graph): """Counts the occurrence of each type of error in a graph :param pybel.BELGraph graph: A BEL graph :return: A Counter of {error type: frequency} :rtype: collections.Counter """ return Counter(e.__class__.__name__ for _, _, e, _ in graph.warnings)
def _naked_names_iter(graph): """Iterates over naked name warnings froma graph :param pybel.BELGraph graph: A BEL graph :rtype: iter[NakedNameWarning] """ for _, _, e, _ in graph.warnings: if isinstance(e, NakedNameWarning): yield e.name # TODO replace with pybel.struct.summary.count_naked_names
[docs]def count_naked_names(graph): """Counts the frequency of each naked name (names without namespaces) :param pybel.BELGraph graph: A BEL graph :return: A Counter from {name: frequency} :rtype: collections.Counter """ return Counter(_naked_names_iter(graph))
# TODO replace with pybel.struct.summary.get_naked_names
[docs]def get_naked_names(graph): """Gets the set of naked names in the graph :param pybel.BELGraph graph: A BEL graph :rtype: set[str] """ return set(_naked_names_iter(graph))
[docs]def get_namespaces_with_incorrect_names(graph): """Returns the set of all namespaces with incorrect names in the graph :param pybel.BELGraph graph: A BEL graph :rtype: set[str] """ return { e.namespace for _, _, e, _ in graph.warnings if isinstance(e, (MissingNamespaceNameWarning, MissingNamespaceRegexWarning)) }
[docs]def get_incorrect_names_by_namespace(graph, namespace): """Returns the set of all incorrect names from the given namespace in the graph :param pybel.BELGraph graph: A BEL graph :param str namespace: The namespace to filter by :return: The set of all incorrect names from the given namespace in the graph :rtype: set[str] """ return { e.name for _, _, e, _ in graph.warnings if isinstance(e, (MissingNamespaceNameWarning, MissingNamespaceRegexWarning)) and e.namespace == namespace }
[docs]def get_incorrect_names(graph): """Returns the dict of the sets of all incorrect names from the given namespace in the graph :param pybel.BELGraph graph: A BEL graph :return: The set of all incorrect names from the given namespace in the graph :rtype: dict[str,set[str]] """ return { namespace: get_incorrect_names_by_namespace(graph, namespace) for namespace in get_namespaces(graph) }
[docs]def get_undefined_namespaces(graph): """Gets all namespaces that aren't actually defined :param pybel.BELGraph graph: A BEL graph :return: The set of all undefined namespaces :rtype: set[str] """ return { e.namespace for _, _, e, _ in graph.warnings if isinstance(e, UndefinedNamespaceWarning) }
[docs]def get_undefined_namespace_names(graph, namespace): """Gets the names from a namespace that wasn't actually defined :param pybel.BELGraph graph: A BEL graph :param str namespace: The namespace to filter by :return: The set of all names from the undefined namespace :rtype: set[str] """ return { e.name for _, _, e, _ in graph.warnings if isinstance(e, UndefinedNamespaceWarning) and e.namespace == namespace }
[docs]def get_undefined_annotations(graph): """Gets all annotations that aren't actually defined :param pybel.BELGraph graph: A BEL graph :return: The set of all undefined annotations :rtype: set[str] """ return { e.annotation for _, _, e, _ in graph.warnings if isinstance(e, UndefinedAnnotationWarning) }
[docs]def calculate_incorrect_name_dict(graph): """Groups all of the incorrect identifiers in a dict of {namespace: list of erroneous names} :param pybel.BELGraph graph: A BEL graph :return: A dictionary of {namespace: list of erroneous names} :rtype: dict[str, str] """ missing = defaultdict(list) for line_number, line, e, ctx in graph.warnings: if not isinstance(e, (MissingNamespaceNameWarning, MissingNamespaceRegexWarning)): continue missing[e.namespace].append(e.name) return dict(missing)
[docs]def calculate_error_by_annotation(graph, annotation): """Groups the graph by a given annotation and builds lists of errors for each :param pybel.BELGraph graph: A BEL graph :param annotation: The annotation to group errors by :type annotation: str :return: A dictionary of {annotation value: list of errors} :rtype: dict[str, list[str]] """ results = defaultdict(list) for line_number, line, e, context in graph.warnings: if not context or not edge_has_annotation(context, annotation): continue values = context[ANNOTATIONS][annotation] if isinstance(values, str): results[values].append(e.__class__.__name__) elif isinstance(values, (set, tuple, list)): for value in values: results[value].append(e.__class__.__name__) return dict(results)
[docs]def group_errors(graph): """Groups the errors together for analysis of the most frequent error :param pybel.BELGraph graph: A BEL graph :return: A dictionary of {error string: list of line numbers} :rtype: dict[str, list[int]] """ warning_summary = defaultdict(list) for ln, _, e, _ in graph.warnings: warning_summary[str(e)].append(ln) return dict(warning_summary)
[docs]def get_most_common_errors(graph, number=20): """Gets the most common errors in a graph :param pybel.BELGraph graph: :param int number: :rtype: Counter """ return count_dict_values(group_errors(graph)).most_common(number)
[docs]def get_names_including_errors_by_namespace(graph, namespace): """Takes the names from the graph in a given namespace (:func:`pybel.struct.summary.get_names_by_namespace`) and the erroneous names from the same namespace (:func:`get_incorrect_names_by_namespace`) and returns them together as a unioned set :param pybel.BELGraph graph: A BEL graph :param str namespace: The namespace to filter by :return: The set of all correct and incorrect names from the given namespace in the graph :rtype: set[str] """ return get_names_by_namespace(graph, namespace) | get_incorrect_names_by_namespace(graph, namespace)
[docs]def get_names_including_errors(graph): """Takes the names from the graph in a given namespace and the erroneous names from the same namespace and returns them together as a unioned set :param pybel.BELGraph graph: A BEL graph :return: The dict of the sets of all correct and incorrect names from the given namespace in the graph :rtype: dict[str,set[str]] """ return { namespace: get_names_including_errors_by_namespace(graph, namespace) for namespace in get_namespaces(graph) }