Skip to content

Commit

Permalink
node_info_by_name
Browse files Browse the repository at this point in the history
  • Loading branch information
dhimmel committed Mar 11, 2022
1 parent 3a0cd3a commit be4e985
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 10 deletions.
22 changes: 12 additions & 10 deletions nxontology/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from nxontology.exceptions import NodeNotFound
from nxontology.node import Node

logger = logging.getLogger(__name__)


def pronto_to_nxontology(onto: Prontology) -> NXOntology[str]:
"""
Expand Down Expand Up @@ -38,7 +40,7 @@ def pronto_to_nxontology(onto: Prontology) -> NXOntology[str]:
try:
nxo.add_edge(term.id, child.id)
except NodeNotFound as e:
logging.warning(
logger.warning(
f"Cannot add edge: {term.id} --> {child.id} "
f"({term.name} --> {child.name}): {e}"
)
Expand Down Expand Up @@ -114,7 +116,7 @@ def pronto_to_multidigraph(
# https://github.com/althonos/pronto/issues/122
continue
if term.id in graph:
logging.warning(f"Skipping node already in graph: {term}")
logger.warning(f"Skipping node already in graph: {term}")
continue
graph.add_node(
term.id,
Expand All @@ -128,16 +130,16 @@ def pronto_to_multidigraph(
):
for node in source, target:
if node not in graph:
logging.warning(
logger.warning(
f"Skipping edge: node does not exist in graph: {node}"
)
if graph.has_edge(source, target, key):
logging.warning(
logger.warning(
f"Skipping edge already in graph: {source} --> {target} (key={key!r})"
)
graph.add_edge(source, target, key=key)
rel_counts = Counter(key for _, _, key in graph.edges(keys=True))
logging.info(f"MultiDiGraph relationship counts:\n{rel_counts}")
logger.info(f"MultiDiGraph relationship counts:\n{rel_counts}")
return graph


Expand All @@ -161,7 +163,7 @@ def multidigraph_to_digraph(
— i.e. those that are already captured by a more specific ancestral path.
The default is reduce=False since the reduction can be a computationally expensive step.
"""
logging.info(f"Received MultiDiGraph with {graph.number_of_edges():,} edges.")
logger.info(f"Received MultiDiGraph with {graph.number_of_edges():,} edges.")
if rel_types is not None:
graph = graph.copy()
graph.remove_edges_from(
Expand All @@ -171,7 +173,7 @@ def multidigraph_to_digraph(
if key not in rel_types
]
)
logging.info(
logger.info(
f"Filtered MultiDiGraph to {graph.number_of_edges():,} edges of the following types: {rel_types}."
)
if reverse:
Expand All @@ -187,12 +189,12 @@ def multidigraph_to_digraph(
)
no_data_digraph.graph.update(digraph.graph)
digraph = no_data_digraph
logging.info(
logger.info(
f"Reduced DiGraph by removing {n_edges_before - digraph.number_of_edges():,} redundant edges."
)
for source, target in digraph.edges(data=False):
digraph[source][target]["rel_types"] = sorted(graph[source][target])
logging.info(
logger.info(
f"Converted MultiDiGraph to DiGraph with {digraph.number_of_nodes():,} nodes and {digraph.number_of_edges():,} edges."
)
return digraph
Expand Down Expand Up @@ -228,7 +230,7 @@ def read_gene_ontology(
else:
date.fromisoformat(release) # check that release is a valid date
url = f"http:https://release.geneontology.org/{release}/ontology/{source_file}"
logging.info(f"Loading Gene Ontology into Pronto from <{url}>.")
logger.info(f"Loading Gene Ontology into Pronto from <{url}>.")
go_pronto = Prontology(handle=url)
go_multidigraph = pronto_to_multidigraph(go_pronto, default_rel_type="is a")
go_digraph = multidigraph_to_digraph(
Expand Down
27 changes: 27 additions & 0 deletions nxontology/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import itertools
import json
import logging
from typing import Any, Dict, Generic, Iterable, List, Optional, Set, cast

import fsspec
Expand All @@ -17,6 +18,8 @@
from .similarity import SimilarityIC
from .utils import Freezable, cache_on_frozen

logger = logging.getLogger(__name__)


class NXOntology(Freezable, Generic[Node]):
"""
Expand Down Expand Up @@ -205,6 +208,30 @@ def node_info(self, node: Node) -> Node_Info[Node]:
self._node_info_cache[node] = Node_Info(self, node)
return self._node_info_cache[node]

@cache_on_frozen
def _get_name_to_node_info(self) -> dict[str, Node_Info[Node]]:
name_to_node_info: dict[str, Node_Info[Node]] = dict()
for node in self.graph:
info = self.node_info(node)
name = info.label
if not name:
continue
if name in name_to_node_info:
logger.warning(
f"Node name duplicated for nodes {name_to_node_info[name].node!r} & {node!r}: {name!r}"
)
name_to_node_info[name] = info
return name_to_node_info

def node_info_by_name(self, name: str) -> Node_Info[Node]:
"""
Return Node_Info instance using a lookup by name.
"""
name_to_node_info = self._get_name_to_node_info()
if name not in name_to_node_info:
raise NodeNotFound(f"No node found named {name!r}.")
return name_to_node_info[name]

@property
def n_nodes(self) -> int:
"""
Expand Down
12 changes: 12 additions & 0 deletions nxontology/tests/ontology_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,18 @@ def test_set_graph_attributes(metal_nxo: NXOntology[str]) -> None:
assert silver_info.url is None


def test_node_info_by_name() -> None:
nxo: NXOntology[str] = NXOntology()
nxo.set_graph_attributes(node_label_attribute="name")
nxo.add_node("a", name="a_name")
nxo.add_node("b", name="b_name")
nxo.add_node("c")
assert nxo.node_info_by_name("a_name").node == "a"
assert nxo.node_info_by_name("b_name").node == "b"
with pytest.raises(NodeNotFound, match="No node found named"):
nxo.node_info_by_name("missing_name")


def test_node_info_not_found(metal_nxo_frozen: NXOntology[str]) -> None:
with pytest.raises(NodeNotFound, match="not-a-metal not in graph"):
metal_nxo_frozen.node_info("not-a-metal")

0 comments on commit be4e985

Please sign in to comment.