Skip to content

Commit

Permalink
Add informational error message for failed loading of nn-ensemble mod…
Browse files Browse the repository at this point in the history
…el (#785)

* Add informational message for failed loading of nn-ensemble model

* Add test for failed initialization of nn-ensemble model

* Make model metadata fn NN ensemble method

* More informative error message

* Better handling of failed metadata reading

* Run isort and black

* Remove unused import

* Add test for failing to read metadata from model file

* Catch all errors when reading metadata from file

* Resolve deprecation warning

* Try diffent logging mechanism to fix testing in CICD

* Remove testing for log message for failing to read metadata

* Remove obsolete fixture argument and rename test
  • Loading branch information
juhoinkinen committed Apr 25, 2024
1 parent d9f3793 commit ca04d07
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 5 deletions.
41 changes: 37 additions & 4 deletions annif/backend/nn_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@

from __future__ import annotations

import importlib
import json
import os.path
import shutil
import zipfile
from io import BytesIO
from typing import TYPE_CHECKING, Any

Expand All @@ -21,7 +24,11 @@
import annif.corpus
import annif.parallel
import annif.util
from annif.exception import NotInitializedException, NotSupportedException
from annif.exception import (
NotInitializedException,
NotSupportedException,
OperationFailedException,
)
from annif.suggestion import SuggestionBatch, vector_to_suggestions

from . import backend, ensemble
Expand All @@ -31,6 +38,8 @@

from annif.corpus.document import DocumentCorpus

logger = annif.logger


def idx_to_key(idx: int) -> bytes:
"""convert an integer index to a binary key for use in LMDB"""
Expand Down Expand Up @@ -129,9 +138,20 @@ def initialize(self, parallel: bool = False) -> None:
backend_id=self.backend_id,
)
self.debug("loading Keras model from {}".format(model_filename))
self._model = load_model(
model_filename, custom_objects={"MeanLayer": MeanLayer}
)
try:
self._model = load_model(
model_filename, custom_objects={"MeanLayer": MeanLayer}
)
except Exception as err:
metadata = self.get_model_metadata(model_filename)
keras_version = importlib.metadata.version("keras")
message = (
f"loading Keras model from {model_filename}; "
f"model metadata: {metadata}; "
f"you have Keras version {keras_version}. "
f'Original error message: "{err}"'
)
raise OperationFailedException(message, backend_id=self.backend_id)

def _merge_source_batches(
self,
Expand Down Expand Up @@ -289,3 +309,16 @@ def _learn(
self._fit_model(
corpus, int(params["learn-epochs"]), int(params["lmdb_map_size"])
)

def get_model_metadata(self, model_filename: str) -> dict | None:
"""Read metadata from Keras model files."""

try:
with zipfile.ZipFile(model_filename, "r") as zip:
with zip.open("metadata.json") as metadata_file:
metadata_str = metadata_file.read().decode("utf-8")
metadata = json.loads(metadata_str)
return metadata
except Exception:
self.warning(f"Failed to read metadata from {model_filename}")
return None
61 changes: 60 additions & 1 deletion tests/test_backend_nn_ensemble.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
"""Unit tests for the nn_ensemble backend in Annif"""

import importlib
import os.path
import time
from datetime import datetime, timedelta, timezone
from unittest import mock

import py.path
import pytest

import annif.backend
import annif.corpus
from annif.exception import NotInitializedException, NotSupportedException
from annif.exception import (
NotInitializedException,
NotSupportedException,
OperationFailedException,
)

pytest.importorskip("annif.backend.nn_ensemble")
lmdb = pytest.importorskip("lmdb")
Expand Down Expand Up @@ -192,6 +199,58 @@ def test_nn_ensemble_modification_time(app_project):
assert datetime.now(timezone.utc) - nn_ensemble.modification_time < timedelta(1)


def test_nn_ensemble_get_model_metadata(app_project):
nn_ensemble_type = annif.backend.get_backend("nn_ensemble")
nn_ensemble = nn_ensemble_type(
backend_id="nn_ensemble",
config_params={"sources": "dummy-en"},
project=app_project,
)
model_filename = os.path.join(nn_ensemble.datadir, nn_ensemble.MODEL_FILE)

expected_version = importlib.metadata.version("keras")
expected_date_saved = datetime.now(timezone.utc)
actual_metadata = nn_ensemble.get_model_metadata(model_filename)

assert actual_metadata["keras_version"] == expected_version
datetime_format = "%Y-%m-%d@%H:%M:%S"
actual_datetime = datetime.strptime(actual_metadata["date_saved"], datetime_format)
assert expected_date_saved - actual_datetime.astimezone(
tz=timezone.utc
) < timedelta(1)


def test_nn_ensemble_get_model_metadata_nonexistent_file(app_project):
nn_ensemble_type = annif.backend.get_backend("nn_ensemble")
nn_ensemble = nn_ensemble_type(
backend_id="nn_ensemble",
config_params={"sources": "dummy-en"},
project=app_project,
)
nonexistent_model_file = "nonexistent.zip"
model_filename = os.path.join(nn_ensemble.datadir, nonexistent_model_file)

actual_metadata = nn_ensemble.get_model_metadata(model_filename)
assert actual_metadata is None


@mock.patch("annif.backend.nn_ensemble.load_model", side_effect=Exception)
def test_nn_ensemble_initialize_error(load_model, app_project):
nn_ensemble_type = annif.backend.get_backend("nn_ensemble")
nn_ensemble = nn_ensemble_type(
backend_id="nn_ensemble",
config_params={"sources": "dummy-en"},
project=app_project,
)
assert nn_ensemble._model is None
with pytest.raises(
OperationFailedException,
match=r"loading Keras model from .*; model metadata: .*",
):
nn_ensemble.initialize()
assert load_model.called


def test_nn_ensemble_initialize(app_project):
nn_ensemble_type = annif.backend.get_backend("nn_ensemble")
nn_ensemble = nn_ensemble_type(
Expand Down

0 comments on commit ca04d07

Please sign in to comment.