fix: map data to the proper device in functions _assemble_input_for*;

WenjieDu · WenjieDu · Apr 19, 2023 · Apr 18, 2023 · Apr 18, 2023 · Apr 19, 2023
commit 1a22fd872e6d2f97801d945e56ba2b5bc641989b
diff --git a/pypots/__init__.py b/pypots/__init__.py
@@ -5,7 +5,7 @@
 # Created by Wenjie Du <[email protected]>
 # License: GPL-v3
 
-from .__version__ import version as __version__
+from pypots.__version__ import version as __version__
 
 __all__ = [
  "data",

diff --git a/pypots/base.py b/pypots/base.py
@@ -1,5 +1,5 @@
 """
-Base class for main models in PyPOTS.
+The base (abstract) classes for models in PyPOTS.
 """
 
 # Created by Wenjie Du <[email protected]>
@@ -17,17 +17,34 @@
 
 
 class BaseModel(ABC):
- """Base model class for all model implementations.
+ """The base model class for all model implementations.
 
  Parameters
  ----------
  device : str or `torch.device`, default = None,
  The device for the model to run on.
- If not given, will try to use CUDA devices first, then CPUs. CUDA and CPU are so far the main devices for people
- to train ML models. Other devices like Google TPU and Apple Silicon accelerator MPS may be added in the future.
+ If not given, will try to use CUDA devices first (will use the GPU with device number 0 only by default),
+ then CPUs, considering CUDA and CPU are so far the main devices for people to train ML models.
+ Other devices like Google TPU and Apple Silicon accelerator MPS may be added in the future.
 
  tb_file_saving_path : str, default = None,
- The path to save the tensorboard file, which contains the loss values recorded during training.
+ The path to save the training logs (i.e. loss values recorded during training) into a tensorboard file.
+ Will not save if not given.
+
+ Attributes
+ ----------
+ model : object, default = None,
+ The underlying model or algorithm to finish the task.
+
+ summary_writer : None or torch.utils.tensorboard.SummaryWriter, default = None,
+ The event writer to save training logs. Default as None. It only works when parameter `tb_file_saving_path` is
+ given, otherwise the training events won't be saved.
+
+ It is designed as being set up while initializing the model because it's created to
+ 1). help visualize the model's training procedure (during training not after) and
+ 2). assist users to tune the model's hype-parameters.
+ If only setting it up after training with a function like setter(), it cannot achieve the 1st purpose.
+
  """
 
  def __init__(
@@ -36,6 +53,8 @@ def __init__(
  tb_file_saving_path: str = None,
  ):
  self.model = None
+ self.summary_writer = None
+ self.device = None
 
  # set up the device for model running below
  if device is None:
@@ -57,6 +76,7 @@ def __init__(
  )
 
  # set up the summary writer for training log saving below
+ # initialize self.summary_writer if tb_file_saving_path is given and not None, otherwise don't save the log
  if isinstance(tb_file_saving_path, str):
 
  from datetime import datetime
@@ -73,28 +93,29 @@ def __init__(
  self.summary_writer = SummaryWriter(
  actual_tb_file_saving_path, filename_suffix=".pypots"
  )
- else:
- # don't save the log if tb_file_saving_path isn't given, set summary_writer as None
- self.summary_writer = None
 
  def save_log_into_tb_file(self, step: int, stage: str, loss_dict: dict) -> None:
- """Saving training logs into the tensorboard file.
+ """Saving training logs into the tensorboard file specified by the given path `tb_file_saving_path`.
 
  Parameters
  ----------
  step : int,
  The current training step number.
+ One step for one batch processing, so the number of steps means how many batches the model has processed.
 
  stage : str,
- The stage of the current operation, 'training' or 'validating'.
+ The stage of the current operation, e.g. 'pretraining', 'training', 'validating'.
 
  loss_dict : dict,
- A dictionary containing items to log, should have at least one item, e.g. {'imputation loss': 0.05}
+ A dictionary containing items to log, should have at least one item, and only items having its name
+ including "loss" or "error" will be logged, e.g. {'imputation_loss': 0.05, "classification_error": 0.32}.
 
  """
  while len(loss_dict) > 0:
  (item_name, loss) = loss_dict.popitem()
- if "loss" in item_name: # save all items containing word "loss" in the name
+ # save all items containing "loss" or "error" in the name
+ # WDU: may enable customization keywords in the future
+ if ("loss" in item_name) or ("error" in item_name):
  self.summary_writer.add_scalar(f"{stage}/{item_name}", loss, step)
 
  def save_model(
@@ -103,7 +124,7 @@ def save_model(
  file_name: str,
  overwrite: bool = False,
  ) -> None:
- """Save the model to a disk file.
+ """Save the model with current parameters to a disk file.
 
  A .pypots extension will be appended to the filename if it does not already have one.
  Please note that such an extension is not necessary, but to indicate the saved model is from PyPOTS framework
@@ -138,7 +159,9 @@ def save_model(
  torch.save(self.model, saving_path)
  logger.info(f"Saved successfully to {saving_path}.")
  except Exception as e:
- raise RuntimeError(f'{e} Failed to save the model to "{saving_path}"!')
+ raise RuntimeError(
+ f'Failed to save the model to "{saving_path}" because of the below error! \n{e}'
+ )
 
  def load_model(self, model_path: str) -> None:
  """Load the saved model from a disk file.
@@ -166,7 +189,7 @@ def load_model(self, model_path: str) -> None:
 
 
 class BaseNNModel(BaseModel):
- """Abstract class for all neural-network models.
+ """The abstract class for all neural-network models.
 
  Parameters
  ----------
@@ -197,6 +220,22 @@ class BaseNNModel(BaseModel):
 
  tb_file_saving_path : str, default = None,
  The path to save the tensorboard file, which contains the loss values recorded during training.
+
+
+ Attributes
+ ---------
+ optimizer : torch.optim.Optimizer, default = None,
+ The optimizer to back propagate losses for model optimization. Default as None, will be implemented
+ when the concreate implementation model gets initialized.
+
+ best_model_dict : dict, default = None,
+ A dictionary contains the trained model that achieves the best performance according to the loss defined,
+ i.e. the lowest loss.
+
+ best_loss : float, default = inf,
+ The criteria to judge whether the model's performance is the best so far.
+ Usually the lower, the better.
+
  """
 
  def __init__(
@@ -224,6 +263,7 @@ def __init__(
  self.model = None
  self.optimizer = None
  self.best_model_dict = None
+ # WDU: may enable users to customize the criteria in the future
  self.best_loss = float("inf")
 
  def _print_model_size(self) -> None:

diff --git a/pypots/classification/base.py b/pypots/classification/base.py
@@ -1,5 +1,5 @@
 """
-The base class for classification models.
+The base classes for PyPOTS classification models.
 """
 
 # Created by Wenjie Du <[email protected]>
@@ -18,7 +18,13 @@
 
 
 class BaseClassifier(BaseModel):
- """Abstract class for all classification models."""
+ """The abstract class for all PyPOTS classification models.
+ Parameters
+ ---
+ device
+ tb_file_saving_path
+
+ """
 
  def __init__(
  self,

diff --git a/pypots/classification/brits.py b/pypots/classification/brits.py
@@ -213,7 +213,7 @@ def _assemble_input_for_training(self, data: dict) -> dict:
  back_missing_mask,
  back_deltas,
  label,
- ) = data
+ ) = map(lambda x: x.to(self.device), data)
 
  # assemble input data
  inputs = {
@@ -278,7 +278,7 @@ def _assemble_input_for_testing(self, data: dict) -> dict:
  back_X,
  back_missing_mask,
  back_deltas,
- ) = data
+ ) = map(lambda x: x.to(self.device), data)
 
  # assemble input data
  inputs = {

diff --git a/pypots/classification/grud.py b/pypots/classification/grud.py
@@ -181,7 +181,9 @@ def _assemble_input_for_training(self, data: dict) -> dict:
  A dictionary with data assembled.
  """
  # fetch data
- indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean, label = data
+ indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean, label = map(
+ lambda x: x.to(self.device), data
+ )
 
  # assemble input data
  inputs = {
@@ -232,7 +234,9 @@ def _assemble_input_for_testing(self, data: dict) -> dict:
  inputs : dict,
  A python dictionary contains the input data for model testing.
  """
- indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data
+ indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = map(
+ lambda x: x.to(self.device), data
+ )
 
  inputs = {
  "indices": indices,

diff --git a/pypots/classification/raindrop.py b/pypots/classification/raindrop.py
@@ -690,7 +690,9 @@ def _assemble_input_for_training(self, data: dict) -> dict:
  A dictionary with data assembled.
  """
  # fetch data
- indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean, label = data
+ indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean, label = map(
+ lambda x: x.to(self.device), data
+ )
 
  bz, n_steps, n_features = X.shape
  lengths = torch.tensor([n_steps] * bz, dtype=torch.float)
@@ -743,7 +745,9 @@ def _assemble_input_for_testing(self, data: dict) -> dict:
  inputs : dict,
  A python dictionary contains the input data for model testing.
  """
- indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data
+ indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = map(
+ lambda x: x.to(self.device), data
+ )
  bz, n_steps, n_features = X.shape
  lengths = torch.tensor([n_steps] * bz, dtype=torch.float)
  times = torch.tensor(range(n_steps), dtype=torch.float).repeat(bz, 1)

diff --git a/pypots/clustering/base.py b/pypots/clustering/base.py
@@ -1,5 +1,5 @@
 """
-The base class for clustering models.
+The base classes for PyPOTS clustering models.
 """
 
 # Created by Wenjie Du <[email protected]>

diff --git a/pypots/clustering/crli.py b/pypots/clustering/crli.py
@@ -396,7 +396,7 @@ def _assemble_input_for_training(self, data: list) -> dict:
  """
 
  # fetch data
- indices, X, _, missing_mask, _, _ = data
+ indices, X, _, missing_mask, _, _ = map(lambda x: x.to(self.device), data)
 
  inputs = {
  "X": X,

diff --git a/pypots/clustering/vader.py b/pypots/clustering/vader.py
@@ -429,7 +429,7 @@ def _assemble_input_for_training(self, data: list) -> dict:
  """
 
  # fetch data
- indices, X, _, missing_mask, _, _ = data
+ indices, X, _, missing_mask, _, _ = map(lambda x: x.to(self.device), data)
 
  inputs = {
  "X": X,

diff --git a/pypots/data/base.py b/pypots/data/base.py
@@ -1,5 +1,5 @@
 """
-Utilities for data manipulation
+The base class for PyPOTS datasets.
 """
 
 # Created by Wenjie Du <[email protected]>

diff --git a/pypots/data/dataset_for_mit.py b/pypots/data/dataset_for_mit.py
@@ -42,10 +42,10 @@ class DatasetForMIT(BaseDataset):
  """
 
  def __init__(
-  self,
-  data: Union[dict, str],
-  file_type: str = "h5py",
-  rate: float = 0.2,
+ self,
+ data: Union[dict, str],
+ file_type: str = "h5py",
+ rate: float = 0.2,
  ):
  super().__init__(data, file_type)
  self.rate = rate
@@ -124,7 +124,7 @@ def _fetch_data_from_file(self, idx: int) -> Iterable:
  ]
 
  if (
-  "y" in self.file_handle.keys()
+ "y" in self.file_handle.keys()
  ): # if the dataset has labels, then fetch it from the file
  sample.append(torch.tensor(self.file_handle["y"][idx], dtype=torch.long))
 

diff --git a/pypots/forecasting/base.py b/pypots/forecasting/base.py
@@ -1,5 +1,5 @@
 """
-The base class for forecasting models.
+The base classes for PyPOTS forecasting models.
 """
 
 # Created by Wenjie Du <[email protected]>

diff --git a/pypots/imputation/base.py b/pypots/imputation/base.py
@@ -1,5 +1,5 @@
 """
-The base class for imputation models.
+The base class for PyPOTS imputation models.
 """
 
 # Created by Wenjie Du <[email protected]>
@@ -258,7 +258,7 @@ def _train_model(
  imputation_collector.append(imputed_data)
 
  imputation_collector = torch.cat(imputation_collector)
- imputation_collector = imputation_collector.numpy()
+ imputation_collector = imputation_collector.cpu().detach().numpy()
 
  mean_val_loss = cal_mae(
  imputation_collector,

diff --git a/pypots/imputation/brits.py b/pypots/imputation/brits.py
@@ -560,7 +560,9 @@ def _assemble_input_for_training(self, data: list) -> dict:
  """
 
  # fetch data
- indices, X, missing_mask, deltas, back_X, back_missing_mask, back_deltas = data
+ indices, X, missing_mask, deltas, back_X, back_missing_mask, back_deltas = map(
+ lambda x: x.to(self.device), data
+ )
 
  # assemble input data
  inputs = {

diff --git a/pypots/imputation/saits.py b/pypots/imputation/saits.py
@@ -246,7 +246,9 @@ def _assemble_input_for_training(self, data: list) -> dict:
  A python dictionary contains the input data for model training.
  """
 
- indices, X_intact, X, missing_mask, indicating_mask = data
+ indices, X_intact, X, missing_mask, indicating_mask = map(
+ lambda x: x.to(self.device), data
+ )
 
  inputs = {
  "X": X,
@@ -275,7 +277,7 @@ def _assemble_input_for_validating(self, data) -> dict:
  inputs : dict,
  A python dictionary contains the input data for model validating.
  """
- indices, X, missing_mask = data
+ indices, X, missing_mask = map(lambda x: x.to(self.device), data)
 
  inputs = {
  "X": X,

diff --git a/pypots/imputation/transformer.py b/pypots/imputation/transformer.py
@@ -356,7 +356,9 @@ def _assemble_input_for_training(self, data: dict) -> dict:
  A python dictionary contains the input data for model training.
  """
 
- indices, X_intact, X, missing_mask, indicating_mask = data
+ indices, X_intact, X, missing_mask, indicating_mask = map(
+ lambda x: x.to(self.device), data
+ )
 
  inputs = {
  "X": X,
@@ -385,7 +387,7 @@ def _assemble_input_for_validating(self, data: list) -> dict:
  inputs : dict,
  A python dictionary contains the input data for model validating.
  """
- indices, X, missing_mask = data
+ indices, X, missing_mask = map(lambda x: x.to(self.device), data)
 
  inputs = {
  "X": X,