Skip to content

Commit

Permalink
feat: resume training parameter (#40)
Browse files Browse the repository at this point in the history
Signed-off-by: s0nicboOm <[email protected]>
  • Loading branch information
s0nicboOm committed Oct 17, 2022
1 parent a800d48 commit 9cc97cb
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 6 deletions.
14 changes: 11 additions & 3 deletions numalogic/models/autoencoder/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ class AutoencoderPipeline(OutlierMixin):
supported values include {"absolute", "squared"}
threshold_min: the minimum threshold to use;
can be used when the threshold calculated is too low
resume_train: parameter to decide if resume training is needed. Also,
based on this parameter the optimizer state dict
is stored in registry.
>>> # Example usage
>>> from numalogic.models.autoencoder.variants import VanillaAE
Expand All @@ -59,6 +62,7 @@ def __init__(
std_tolerance: float = 3.0,
reconerr_method: str = "absolute",
threshold_min: float = None,
resume_train: bool = False,
):
if not (model and seq_len):
raise ValueError("No model and seq len provided!")
Expand All @@ -75,14 +79,17 @@ def __init__(
self.stdtol = std_tolerance
self.reconerr_func = self.get_reconerr_func(reconerr_method)
self.threshold_min = threshold_min
self.resume_train = resume_train

@property
def model_properties(self):
return {
"optimizer_state_dict": self.optimizer.state_dict(),
model_properties_dict = {
"thresholds": self._thresholds,
"err_stats": self._stats,
}
if self.resume_train:
model_properties_dict["optimizer_state_dict"] = self.optimizer.state_dict()
return model_properties_dict

@property
def model(self) -> AutoencoderModel:
Expand Down Expand Up @@ -252,7 +259,8 @@ def save(self, path: Optional[str] = None) -> Optional[BinaryIO]:
return buf

def __load_metadata(self, **metadata) -> None:
self.optimizer.load_state_dict(metadata["optimizer_state_dict"])
if self.resume_train:
self.optimizer.load_state_dict(metadata["optimizer_state_dict"])
self._thresholds = metadata["thresholds"]
self._stats = metadata["err_stats"]

Expand Down
52 changes: 50 additions & 2 deletions numalogic/tests/models/autoencoder/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,31 @@ def test_fit_predict(self):

def test_score_01(self):
model = VanillaAE(SEQ_LEN, n_features=self.X_train.shape[1])
trainer = AutoencoderPipeline(model, SEQ_LEN, num_epochs=5, optimizer="adagrad")
trainer = AutoencoderPipeline(
model, SEQ_LEN, num_epochs=5, optimizer="adagrad", resume_train=True
)
trainer.fit(self.X_train)
pred = trainer.predict(self.X_val)

score = trainer.score(self.X_val)
self.assertEqual(score.shape, pred.shape)
self.assertEqual(trainer.reconerr_func, np.abs)

def test_resume_training_01(self):
model = VanillaAE(SEQ_LEN, n_features=self.X_train.shape[1])
trainer = AutoencoderPipeline(model, SEQ_LEN, num_epochs=10, resume_train=True)
trainer.fit(self.X_train)
thresholds_old = trainer.thresholds.tolist()
err_mean_old = trainer.err_stats["mean"].tolist()
err_std_old = trainer.err_stats["std"].tolist()
trainer.fit(self.X_train)
thresholds_new = trainer.thresholds.tolist()
err_mean_new = trainer.err_stats["mean"].tolist()
err_std_new = trainer.err_stats["std"].tolist()
self.assertNotEqual(thresholds_new, thresholds_old)
self.assertNotEqual(err_std_new, err_std_old)
self.assertNotEqual(err_mean_new, err_mean_old)

def test_score_02(self):
stream_data = self.X_val[:12]
self.model = Conv1dAE(self.X_train.shape[1], 8)
Expand Down Expand Up @@ -231,14 +248,45 @@ def test_with_transformer_model(self):
)
self.assertIsInstance(trainer.model, TransformerAE)

def test_load_model(self):
def test_load_model_without_resume_train_01(self):
X = np.random.randn(10, 1)
model = VanillaAE(10)
model_pl1 = AutoencoderPipeline(model, 10)
model_pl1.fit(X)
model_pl2 = AutoencoderPipeline(model, 10)
model_pl2.load(model=model_pl1.model, **model_pl1.model_properties)
self.assertEqual(model_pl2.err_stats["std"], model_pl1.err_stats["std"])
self.assertEqual(list(model_pl1.model_properties.keys()), ["thresholds", "err_stats"])

def test_load_model_resume_train_01(self):
X = np.random.randn(10, 1)
model = VanillaAE(10)
model_pl1 = AutoencoderPipeline(model, 10, resume_train=True)
model_pl1.fit(X)
model_pl2 = AutoencoderPipeline(model, 10, resume_train=True)
model_pl2.load(model=model_pl1.model, **model_pl1.model_properties)
self.assertEqual(model_pl2.err_stats["std"], model_pl1.err_stats["std"])
self.assertEqual(
list(model_pl1.model_properties.keys()),
["thresholds", "err_stats", "optimizer_state_dict"],
)

def test_load_model_with_resume_train_02(self):
X = np.random.randn(10, 1)
model = VanillaAE(10)
model_pl1 = AutoencoderPipeline(model, 10, resume_train=True)
model_pl1.fit(X)
self.assertEqual(
list(model_pl1.model_properties.keys()),
["thresholds", "err_stats", "optimizer_state_dict"],
)

def test_load_model_without_resume_train_02(self):
X = np.random.randn(10, 1)
model = VanillaAE(10)
model_pl1 = AutoencoderPipeline(model, 10, resume_train=False)
model_pl1.fit(X)
self.assertEqual(list(model_pl1.model_properties.keys()), ["thresholds", "err_stats"])

def test_exception_in_load_model(self):
X = np.random.randn(10, 1)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "numalogic"
version = "0.2.5"
version = "0.2.6"
description = "Collection of operational Machine Learning models and tools."
authors = ["Numalogic Developers"]
packages = [{ include = "numalogic" }]
Expand Down

0 comments on commit 9cc97cb

Please sign in to comment.