Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hyperparameter optimization for NN ensemble #569

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
refactor: avoid self.params; assign self._model outside _create_model
  • Loading branch information
osma committed Feb 11, 2022
commit 59b45f5575fac9e951053c46a9da215e731063cf
30 changes: 15 additions & 15 deletions annif/backend/nn_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,18 +137,17 @@ def _merge_hits_from_sources(self, hits_from_sources, params):
np.expand_dims(score_vector.transpose(), 0))
return VectorSuggestionResult(results[0])

def _create_model(self, sources):
def _create_model(self, sources, params):
self.info("creating NN ensemble model")

inputs = Input(shape=(len(self.project.subjects), len(sources)))

flat_input = Flatten()(inputs)
drop_input = Dropout(
rate=float(
self.params['dropout_rate']))(flat_input)
hidden = Dense(int(self.params['nodes']),
rate=float(params['dropout_rate']))(flat_input)
hidden = Dense(int(params['nodes']),
activation="relu")(drop_input)
drop_hidden = Dropout(rate=float(self.params['dropout_rate']))(hidden)
drop_hidden = Dropout(rate=float(params['dropout_rate']))(hidden)
delta = Dense(len(self.project.subjects),
kernel_initializer='zeros',
bias_initializer='zeros')(drop_hidden)
Expand All @@ -157,21 +156,22 @@ def _create_model(self, sources):

predictions = Add()([mean, delta])

self._model = Model(inputs=inputs, outputs=predictions)
self._model.compile(optimizer=self.params['optimizer'],
loss='binary_crossentropy',
metrics=['top_k_categorical_accuracy'])
if 'lr' in self.params:
self._model.optimizer.learning_rate.assign(
float(self.params['lr']))
model = Model(inputs=inputs, outputs=predictions)
model.compile(optimizer=params['optimizer'],
loss='binary_crossentropy',
metrics=['top_k_categorical_accuracy'])
if 'lr' in params:
model.optimizer.learning_rate.assign(
float(params['lr']))

summary = []
self._model.summary(print_fn=summary.append)
model.summary(print_fn=summary.append)
self.debug("Created model: \n" + "\n".join(summary))
return model

def _train(self, corpus, params, jobs=0):
sources = annif.util.parse_sources(self.params['sources'])
self._create_model(sources)
sources = annif.util.parse_sources(params['sources'])
self._model = self._create_model(sources, params)
self._fit_model(
corpus,
epochs=int(params['epochs']),
Expand Down
4 changes: 2 additions & 2 deletions tests/test_backend_nn_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def test_nn_ensemble_can_set_lr(registry):
backend_id='nn_ensemble',
config_params={'epochs': 1, 'lr': 0.002},
project=project)
nn_ensemble._create_model(['dummy-en'])
assert nn_ensemble._model.optimizer.learning_rate.value() == 0.002
model = nn_ensemble._create_model(['dummy-en'], nn_ensemble.params)
assert model.optimizer.learning_rate.value() == 0.002


def test_set_lmdb_map_size(registry, tmpdir):
Expand Down