feat: resume training parameter (#40)

Signed-off-by: s0nicboOm <[email protected]>
numaproj · Oct 17, 2022 · 9cc97cb · 9cc97cb
1 parent a800d48
commit 9cc97cb
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 6 deletions.
diff --git a/numalogic/models/autoencoder/pipeline.py b/numalogic/models/autoencoder/pipeline.py
@@ -37,6 +37,9 @@ class AutoencoderPipeline(OutlierMixin):
  supported values include {"absolute", "squared"}
  threshold_min: the minimum threshold to use;
  can be used when the threshold calculated is too low
+ resume_train: parameter to decide if resume training is needed. Also,
+ based on this parameter the optimizer state dict
+ is stored in registry.
 
  >>> # Example usage
  >>> from numalogic.models.autoencoder.variants import VanillaAE
@@ -59,6 +62,7 @@ def __init__(
  std_tolerance: float = 3.0,
  reconerr_method: str = "absolute",
  threshold_min: float = None,
+ resume_train: bool = False,
  ):
  if not (model and seq_len):
  raise ValueError("No model and seq len provided!")
@@ -75,14 +79,17 @@ def __init__(
  self.stdtol = std_tolerance
  self.reconerr_func = self.get_reconerr_func(reconerr_method)
  self.threshold_min = threshold_min
+ self.resume_train = resume_train
 
  @property
  def model_properties(self):
- return {
- "optimizer_state_dict": self.optimizer.state_dict(),
+ model_properties_dict = {
  "thresholds": self._thresholds,
  "err_stats": self._stats,
  }
+ if self.resume_train:
+ model_properties_dict["optimizer_state_dict"] = self.optimizer.state_dict()
+ return model_properties_dict
 
  @property
  def model(self) -> AutoencoderModel:
@@ -252,7 +259,8 @@ def save(self, path: Optional[str] = None) -> Optional[BinaryIO]:
  return buf
 
  def __load_metadata(self, **metadata) -> None:
- self.optimizer.load_state_dict(metadata["optimizer_state_dict"])
+ if self.resume_train:
+ self.optimizer.load_state_dict(metadata["optimizer_state_dict"])
  self._thresholds = metadata["thresholds"]
  self._stats = metadata["err_stats"]
 

diff --git a/numalogic/tests/models/autoencoder/test_pipeline.py b/numalogic/tests/models/autoencoder/test_pipeline.py
@@ -90,14 +90,31 @@ def test_fit_predict(self):
 
  def test_score_01(self):
  model = VanillaAE(SEQ_LEN, n_features=self.X_train.shape[1])
- trainer = AutoencoderPipeline(model, SEQ_LEN, num_epochs=5, optimizer="adagrad")
+ trainer = AutoencoderPipeline(
+ model, SEQ_LEN, num_epochs=5, optimizer="adagrad", resume_train=True
+ )
  trainer.fit(self.X_train)
  pred = trainer.predict(self.X_val)
 
  score = trainer.score(self.X_val)
  self.assertEqual(score.shape, pred.shape)
  self.assertEqual(trainer.reconerr_func, np.abs)
 
+ def test_resume_training_01(self):
+ model = VanillaAE(SEQ_LEN, n_features=self.X_train.shape[1])
+ trainer = AutoencoderPipeline(model, SEQ_LEN, num_epochs=10, resume_train=True)
+ trainer.fit(self.X_train)
+ thresholds_old = trainer.thresholds.tolist()
+ err_mean_old = trainer.err_stats["mean"].tolist()
+ err_std_old = trainer.err_stats["std"].tolist()
+ trainer.fit(self.X_train)
+ thresholds_new = trainer.thresholds.tolist()
+ err_mean_new = trainer.err_stats["mean"].tolist()
+ err_std_new = trainer.err_stats["std"].tolist()
+ self.assertNotEqual(thresholds_new, thresholds_old)
+ self.assertNotEqual(err_std_new, err_std_old)
+ self.assertNotEqual(err_mean_new, err_mean_old)
+
  def test_score_02(self):
  stream_data = self.X_val[:12]
  self.model = Conv1dAE(self.X_train.shape[1], 8)
@@ -231,14 +248,45 @@ def test_with_transformer_model(self):
  )
  self.assertIsInstance(trainer.model, TransformerAE)
 
- def test_load_model(self):
+ def test_load_model_without_resume_train_01(self):
  X = np.random.randn(10, 1)
  model = VanillaAE(10)
  model_pl1 = AutoencoderPipeline(model, 10)
  model_pl1.fit(X)
  model_pl2 = AutoencoderPipeline(model, 10)
  model_pl2.load(model=model_pl1.model, **model_pl1.model_properties)
  self.assertEqual(model_pl2.err_stats["std"], model_pl1.err_stats["std"])
+ self.assertEqual(list(model_pl1.model_properties.keys()), ["thresholds", "err_stats"])
+
+ def test_load_model_resume_train_01(self):
+ X = np.random.randn(10, 1)
+ model = VanillaAE(10)
+ model_pl1 = AutoencoderPipeline(model, 10, resume_train=True)
+ model_pl1.fit(X)
+ model_pl2 = AutoencoderPipeline(model, 10, resume_train=True)
+ model_pl2.load(model=model_pl1.model, **model_pl1.model_properties)
+ self.assertEqual(model_pl2.err_stats["std"], model_pl1.err_stats["std"])
+ self.assertEqual(
+ list(model_pl1.model_properties.keys()),
+ ["thresholds", "err_stats", "optimizer_state_dict"],
+ )
+
+ def test_load_model_with_resume_train_02(self):
+ X = np.random.randn(10, 1)
+ model = VanillaAE(10)
+ model_pl1 = AutoencoderPipeline(model, 10, resume_train=True)
+ model_pl1.fit(X)
+ self.assertEqual(
+ list(model_pl1.model_properties.keys()),
+ ["thresholds", "err_stats", "optimizer_state_dict"],
+ )
+
+ def test_load_model_without_resume_train_02(self):
+ X = np.random.randn(10, 1)
+ model = VanillaAE(10)
+ model_pl1 = AutoencoderPipeline(model, 10, resume_train=False)
+ model_pl1.fit(X)
+ self.assertEqual(list(model_pl1.model_properties.keys()), ["thresholds", "err_stats"])
 
  def test_exception_in_load_model(self):
  X = np.random.randn(10, 1)

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "numalogic"
-version = "0.2.5"
+version = "0.2.6"
 description = "Collection of operational Machine Learning models and tools."
 authors = ["Numalogic Developers"]
 packages = [{ include = "numalogic" }]