Skip to content

Commit

Permalink
Merge branch 'update-dependencies-v1.0' into issue703-python-3.11-sup…
Browse files Browse the repository at this point in the history
…port
  • Loading branch information
juhoinkinen committed Aug 15, 2023
2 parents f595278 + cc0bcd8 commit 05d52ad
Show file tree
Hide file tree
Showing 18 changed files with 97 additions and 57 deletions.
7 changes: 3 additions & 4 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
version: 2

build:
os: "ubuntu-20.04"
os: "ubuntu-22.04"
tools:
python: "3.9"
python: "3.10"

# Build documentation in the docs/ directory with Sphinx
sphinx:
Expand All @@ -27,10 +27,9 @@ python:
- nn
- omikuji
- fasttext
- stwfsa
- yake
- pycld3
- spacy
- requirements: docs/requirements.txt
- method: pip
path: .
system_packages: true
19 changes: 19 additions & 0 deletions annif/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def create_flask_app(config_name: str | None = None) -> Flask:
"""Create a Flask app to be used by the CLI."""
from flask import Flask

_set_tensorflow_loglevel()

app = Flask(__name__)
config_name = _get_config_name(config_name)
logger.debug(f"creating flask app with configuration {config_name}")
Expand Down Expand Up @@ -75,3 +77,20 @@ def _get_config_name(config_name: str | None) -> str:
else:
config_name = "annif.default_config.ProductionConfig" # pragma: no cover
return config_name


def _set_tensorflow_loglevel():
"""Set TensorFlow log level based on Annif log level (--verbosity/-v
option) using an environment variable. INFO messages by TF are shown only on
DEBUG (or NOTSET) level of Annif."""
annif_loglevel = logger.getEffectiveLevel()
tf_loglevel_mapping = {
0: "0", # NOTSET
10: "0", # DEBUG
20: "1", # INFO
30: "1", # WARNING
40: "2", # ERROR
50: "3", # CRITICAL
}
tf_loglevel = tf_loglevel_mapping[annif_loglevel]
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", tf_loglevel)
19 changes: 16 additions & 3 deletions annif/backend/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ def __init__(
self.datadir = project.datadir

def default_params(self) -> dict[str, Any]:
return self.DEFAULT_PARAMETERS
params = AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS) # Optional backend specific parameters
return params

@property
def params(self) -> dict[str, Any]:
Expand All @@ -49,15 +51,26 @@ def params(self) -> dict[str, Any]:
params.update(self.config_params)
return params

@property
def _model_file_paths(self) -> list:
all_paths = glob(os.path.join(self.datadir, "*"))
ignore_patterns = ("*-train*", "tmp-*", "vectorizer")
ignore_paths = [
path
for igp in ignore_patterns
for path in glob(os.path.join(self.datadir, igp))
]
return list(set(all_paths) - set(ignore_paths))

@property
def is_trained(self) -> bool:
return bool(glob(os.path.join(self.datadir, "*")))
return bool(self._model_file_paths)

@property
def modification_time(self) -> datetime | None:
mtimes = [
datetime.utcfromtimestamp(os.path.getmtime(p))
for p in glob(os.path.join(self.datadir, "*"))
for p in self._model_file_paths
]
most_recent = max(mtimes, default=None)
if most_recent is None:
Expand Down
3 changes: 0 additions & 3 deletions annif/backend/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ class DummyBackend(backend.AnnifLearningBackend):
is_trained = True
modification_time = None

def default_params(self) -> dict[str, int]:
return backend.AnnifBackend.DEFAULT_PARAMETERS

def initialize(self, parallel: bool = False) -> None:
self.initialized = True

Expand Down
2 changes: 1 addition & 1 deletion annif/backend/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _format_cfg_line(self, hps: dict[str, float]) -> str:
def _objective(self, trial: Trial) -> float:
eval_batch = annif.eval.EvaluationBatch(self._backend.project.subjects)
proj_weights = {
project_id: trial.suggest_uniform(project_id, 0.0, 1.0)
project_id: trial.suggest_float(project_id, 0.0, 1.0)
for project_id in self._sources
}
for gold_batch, src_batches in zip(self._gold_batches, self._source_batches):
Expand Down
7 changes: 1 addition & 6 deletions annif/backend/mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from annif.lexical.mllm import MLLMModel
from annif.suggestion import vector_to_suggestions

from . import backend, hyperopt
from . import hyperopt

if TYPE_CHECKING:
from collections.abc import Iterator
Expand Down Expand Up @@ -95,11 +95,6 @@ class MLLMBackend(hyperopt.AnnifHyperoptBackend):
def get_hp_optimizer(self, corpus: DocumentCorpus, metric: str) -> MLLMOptimizer:
return MLLMOptimizer(self, corpus, metric)

def default_params(self) -> dict[str, Any]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS)
return params

def _load_model(self) -> MLLMModel:
path = os.path.join(self.datadir, self.MODEL_FILE)
self.debug("loading model from {}".format(path))
Expand Down
5 changes: 0 additions & 5 deletions annif/backend/nn_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,6 @@ class NNEnsembleBackend(backend.AnnifLearningBackend, ensemble.BaseEnsembleBacke
# defaults for uninitialized instances
_model = None

def default_params(self) -> dict[str, Any]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS)
return params

def initialize(self, parallel: bool = False) -> None:
super().initialize(parallel)
if self._model is not None:
Expand Down
5 changes: 0 additions & 5 deletions annif/backend/omikuji.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,6 @@ class OmikujiBackend(mixins.TfidfVectorizerMixin, backend.AnnifBackend):
"collapse_every_n_layers": 0,
}

def default_params(self) -> dict[str, Any]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS)
return params

def _initialize_model(self) -> None:
if self._model is None:
path = os.path.join(self.datadir, self.MODEL_FILE)
Expand Down
7 changes: 1 addition & 6 deletions annif/backend/pav.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from annif.exception import NotInitializedException, NotSupportedException
from annif.suggestion import SubjectSuggestion, SuggestionBatch

from . import backend, ensemble
from . import ensemble

if TYPE_CHECKING:
from annif.corpus.document import DocumentCorpus
Expand All @@ -36,11 +36,6 @@ class PAVBackend(ensemble.BaseEnsembleBackend):

DEFAULT_PARAMETERS = {"min-docs": 10}

def default_params(self) -> dict[str, Any]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS)
return params

def initialize(self, parallel: bool = False) -> None:
super().initialize(parallel)
if self._models is not None:
Expand Down
5 changes: 0 additions & 5 deletions annif/backend/svc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,6 @@ class SVCBackend(mixins.TfidfVectorizerMixin, backend.AnnifBackend):

DEFAULT_PARAMETERS = {"min_df": 1, "ngram": 1}

def default_params(self) -> dict[str, Any]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS)
return params

def _initialize_model(self) -> None:
if self._model is None:
path = os.path.join(self.datadir, self.MODEL_FILE)
Expand Down
5 changes: 0 additions & 5 deletions annif/backend/yake.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,6 @@ class YakeBackend(backend.AnnifBackend):
"remove_parentheses": False,
}

def default_params(self) -> dict[str, Any]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS)
return params

@property
def is_trained(self):
return True
Expand Down
1 change: 1 addition & 0 deletions annif/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def atomic_save(
final name."""

prefix, suffix = os.path.splitext(filename)
prefix = "tmp-" + prefix
tempfd, tempfilename = tempfile.mkstemp(prefix=prefix, suffix=suffix, dir=dirname)
os.close(tempfd)
logger.debug("saving %s to temporary file %s", str(obj)[:90], tempfilename)
Expand Down
11 changes: 8 additions & 3 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,26 @@
import os
import re
import sys
from datetime import date

sys.path.insert(0, os.path.abspath(".."))


# -- Project information -----------------------------------------------------

project = "annif"
copyright = "2017, University Of Helsinki (The National Library Of Finland)"
author = "Osma Suominen"
copyright = (
f"2017-{date.today().year}, University Of Helsinki "
+ "(The National Library Of Finland)"
)

author = "National Library Of Finland"

# Get version number from GitHub tag
release = re.sub("^v", "", os.popen("git describe --tags").read().strip())
# The short X.Y version.
version = release

html_title = project + " " + release

# -- General configuration ---------------------------------------------------

Expand Down
20 changes: 10 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,32 +30,32 @@ classifiers=[
[tool.poetry.dependencies]
python = ">=3.8,<3.12"

connexion = {version = "2.14.*", extras = ["swagger-ui"]}
flask = ">=1.0.4,<3"
flask-cors = "3.0.*"
connexion = {version = "2.14.2", extras = ["swagger-ui"]}
flask = "2.2.*"
flask-cors = "4.0.*"
click = "8.1.*"
click-log = "0.4.*"
joblib = "1.2.*"
joblib = "1.3.*"
nltk = "3.8.*"
gensim = "4.3.*"
scikit-learn = "1.2.2"
scikit-learn = "1.3.*"
scipy = "1.10.*"
rdflib = ">=4.2,<7.0"
gunicorn = "20.1.*"
rdflib = "6.3.*"
gunicorn = "21.2.*"
numpy = "1.24.*"
optuna = "2.10.*"
optuna = "3.3.*"
python-dateutil = "2.8.*"
tomli = { version = "2.0.*", python = "<3.11" }
simplemma = "0.9.*"
jsonschema = "4.17.*"

fasttext-wheel = {version = "0.9.2", optional = true}
voikko = {version = "0.5.*", optional = true}
tensorflow-cpu = {version = "2.11.*", optional = true}
tensorflow-cpu = {version = "2.13.*", optional = true}
lmdb = {version = "1.4.1", optional = true}
omikuji = {version = "0.5.*", optional = true}
yake = {version = "0.4.5", optional = true}
spacy = {version = "3.5.*", optional = true}
spacy = {version = "3.6.*", optional = true}
stwfsapy = {version="0.3.*", optional = true}

[tool.poetry.dev-dependencies]
Expand Down
1 change: 1 addition & 0 deletions tests/test_backend_stwfsa.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def test_stwfsa_default_params(project):
backend_id=stwfsa_backend_name, config_params={}, project=project
)
expected_default_params = {
"limit": 100,
"concept_type_uri": "https://www.w3.org/2004/02/skos/core#Concept",
"sub_thesaurus_type_uri": "https://www.w3.org/2004/02/skos/core#Collection",
"thesaurus_relation_type_uri": "https://www.w3.org/2004/02/skos/core#member",
Expand Down
21 changes: 21 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,27 @@
PROJECTS_CONFIG_PATH = "tests/projects_for_config_path_option.cfg"


@mock.patch.dict(os.environ, clear=True)
def test_tensorflow_loglevel():
tf_env = "TF_CPP_MIN_LOG_LEVEL"

runner.invoke(annif.cli.cli, ["list-projects", "-v", "DEBUG"])
assert os.environ[tf_env] == "0" # Show INFO, WARNING and ERROR messages by TF
os.environ.pop(tf_env)
runner.invoke(annif.cli.cli, ["list-projects"]) # INFO level by default
assert os.environ[tf_env] == "1" # Show WARNING and ERROR messages by TF
os.environ.pop(tf_env)
runner.invoke(annif.cli.cli, ["list-projects", "-v", "WARN"])
assert os.environ[tf_env] == "1" # Show WARNING and ERROR messages by TF
os.environ.pop(tf_env)
runner.invoke(annif.cli.cli, ["list-projects", "-v", "ERROR"])
assert os.environ[tf_env] == "2" # Show ERROR messages by TF
os.environ.pop(tf_env)
runner.invoke(annif.cli.cli, ["list-projects", "-v", "CRITICAL"])
assert os.environ[tf_env] == "3" # Show no messages by TF
os.environ.pop(tf_env)


def test_list_projects():
result = runner.invoke(annif.cli.cli, ["list-projects"])
assert not result.exception
Expand Down
14 changes: 14 additions & 0 deletions tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,20 @@ def test_project_tfidf_is_not_trained(registry):
assert not project.is_trained


def test_project_tfidf_is_not_trained_prepared_only(registry, testdatadir):
testdatadir.join("projects/tfidf-fi").ensure("vectorizer")
testdatadir.join("projects/tfidf-fi").ensure("dummy-tfidf-train.txt")
project = registry.get_project("tfidf-fi")
assert not project.is_trained


def test_project_tfidf_modification_time_prepared_only(registry, testdatadir):
testdatadir.join("projects/tfidf-fi").ensure("vectorizer")
testdatadir.join("projects/tfidf-fi").ensure("dummy-tfidf-train.txt")
project = registry.get_project("tfidf-fi")
assert project.modification_time is None


def test_project_train_tfidf(registry, document_corpus, testdatadir):
project = registry.get_project("tfidf-fi")
project.train(document_corpus)
Expand Down
2 changes: 1 addition & 1 deletion tests/time-startup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ average_startup_time=$(echo "scale=3; ($startup_time1 + $startup_time2 + $startu
echo "Average Startup time: $average_startup_time seconds"

# Set the threshold for acceptable startup time in seconds
threshold=0.300
threshold=0.400

# Compare the average startup time with the threshold
if (( $(echo "$average_startup_time > $threshold" | bc -l) )); then
Expand Down

0 comments on commit 05d52ad

Please sign in to comment.