Source code for pybel_tools.summary.error_summary

# -*- coding: utf-8 -*-

"""This module contains functions that provide summaries of the errors encountered while parsing a BEL script."""

from collections import Iterable, defaultdict
from typing import List, Mapping, Optional, Set

from pybel import BELGraph
from pybel.constants import ANNOTATIONS
from pybel.parser.exc import (
    MissingNamespaceNameWarning, MissingNamespaceRegexWarning, UndefinedAnnotationWarning, UndefinedNamespaceWarning,
)
from pybel.struct.filters.edge_predicates import edge_has_annotation
from pybel.struct.summary.errors import count_error_types, count_naked_names, get_naked_names
from pybel.struct.summary.node_summary import get_names_by_namespace, get_namespaces
from ..utils import count_dict_values

__all__ = [
    'count_error_types',
    'count_naked_names',
    'get_naked_names',
    'get_incorrect_names_by_namespace',
    'get_incorrect_names',
    'get_undefined_namespaces',
    'get_undefined_namespace_names',
    'calculate_incorrect_name_dict',
    'calculate_error_by_annotation',
    'group_errors',
    'get_names_including_errors',
    'get_names_including_errors_by_namespace',
    'get_undefined_annotations',
    'get_namespaces_with_incorrect_names',
    'get_most_common_errors',
]


[docs]def get_namespaces_with_incorrect_names(graph: BELGraph) -> Set[str]: """Return the set of all namespaces with incorrect names in the graph.""" return { exc.namespace for _, exc, _ in graph.warnings if isinstance(exc, (MissingNamespaceNameWarning, MissingNamespaceRegexWarning)) }
[docs]def get_undefined_namespaces(graph: BELGraph) -> Set[str]: """Get all namespaces that are used in the BEL graph aren't actually defined.""" return { exc.namespace for _, exc, _ in graph.warnings if isinstance(exc, UndefinedNamespaceWarning) }
[docs]def get_incorrect_names_by_namespace(graph: BELGraph, namespace: str) -> Set[str]: """Return the set of all incorrect names from the given namespace in the graph. :return: The set of all incorrect names from the given namespace in the graph """ return { exc.name for _, exc, _ in graph.warnings if isinstance(exc, (MissingNamespaceNameWarning, MissingNamespaceRegexWarning)) and exc.namespace == namespace }
[docs]def get_undefined_namespace_names(graph: BELGraph, namespace: str) -> Set[str]: """Get the names from a namespace that wasn't actually defined. :return: The set of all names from the undefined namespace """ return { exc.name for _, exc, _ in graph.warnings if isinstance(exc, UndefinedNamespaceWarning) and exc.namespace == namespace }
[docs]def get_incorrect_names(graph: BELGraph) -> Mapping[str, Set[str]]: """Return the dict of the sets of all incorrect names from the given namespace in the graph. :return: The set of all incorrect names from the given namespace in the graph """ return { namespace: get_incorrect_names_by_namespace(graph, namespace) for namespace in get_namespaces(graph) }
[docs]def get_undefined_annotations(graph: BELGraph) -> Set[str]: """Get all annotations that aren't actually defined. :return: The set of all undefined annotations """ return { exc.annotation for _, exc, _ in graph.warnings if isinstance(exc, UndefinedAnnotationWarning) }
[docs]def calculate_incorrect_name_dict(graph: BELGraph) -> Mapping[str, str]: """Group all of the incorrect identifiers in a dict of {namespace: list of erroneous names}. :return: A dictionary of {namespace: list of erroneous names} """ missing = defaultdict(list) for _, e, ctx in graph.warnings: if not isinstance(e, (MissingNamespaceNameWarning, MissingNamespaceRegexWarning)): continue missing[e.namespace].append(e.name) return dict(missing)
[docs]def calculate_error_by_annotation(graph: BELGraph, annotation: str) -> Mapping[str, List[str]]: """Group the graph by a given annotation and builds lists of errors for each. :return: A dictionary of {annotation value: list of errors} """ results = defaultdict(list) for _, exc, ctx in graph.warnings: if not ctx or not edge_has_annotation(ctx, annotation): continue values = ctx[ANNOTATIONS][annotation] if isinstance(values, str): results[values].append(exc.__class__.__name__) elif isinstance(values, Iterable): for value in values: results[value].append(exc.__class__.__name__) return dict(results)
[docs]def group_errors(graph: BELGraph) -> Mapping[str, List[int]]: """Group the errors together for analysis of the most frequent error. :return: A dictionary of {error string: list of line numbers} """ warning_summary = defaultdict(list) for _, exc, _ in graph.warnings: warning_summary[str(exc)].append(exc.line_number) return dict(warning_summary)
[docs]def get_most_common_errors(graph: BELGraph, n: Optional[int] = 20): """Get the (n) most common errors in a graph.""" return count_dict_values(group_errors(graph)).most_common(n)
[docs]def get_names_including_errors_by_namespace(graph: BELGraph, namespace: str) -> Set[str]: """Takes the names from the graph in a given namespace (:func:`pybel.struct.summary.get_names_by_namespace`) and the erroneous names from the same namespace (:func:`get_incorrect_names_by_namespace`) and returns them together as a unioned set :return: The set of all correct and incorrect names from the given namespace in the graph """ return get_names_by_namespace(graph, namespace) | get_incorrect_names_by_namespace(graph, namespace)
[docs]def get_names_including_errors(graph: BELGraph) -> Mapping[str, Set[str]]: """Takes the names from the graph in a given namespace and the erroneous names from the same namespace and returns them together as a unioned set :return: The dict of the sets of all correct and incorrect names from the given namespace in the graph """ return { namespace: get_names_including_errors_by_namespace(graph, namespace) for namespace in get_namespaces(graph) }