Skip to content

Commit

Permalink
feat: add transformers model (#8)
Browse files Browse the repository at this point in the history
Signed-off-by: s0nicboOm <[email protected]>
  • Loading branch information
s0nicboOm committed Jul 21, 2022
1 parent 6fb0bc3 commit 5314070
Show file tree
Hide file tree
Showing 12 changed files with 763 additions and 100 deletions.
102 changes: 84 additions & 18 deletions numalogic/models/autoencoder/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,27 @@


class AutoencoderPipeline(OutlierMixin):
"""
r"""
Class to simplify training, inference, loading and saving of time-series autoencoders.
Note: this class only supports Pytorch models.
:param model: model instance
:param seq_len: sequence length
:param loss_fn: loss function used for training
supported values include {"huber", "l1", "mse"}
:param: optimizer: optimizer to used for training.
supported values include {"adam", "adagrad", "rmsprop"}
:param lr: learning rate
:param batch_size: batch size for training
:param num_epochs: number of epochs for training
:param std_tolerance: determines how many times the standard deviation to be used for threshold
:param reconerr_method: method used to calculate the distance
between the original and the reconstucted data
supported values include {"absolute", "squared"}
:param threshold_min: the minimum threshold to use;
can be used when the threshold calculated is too low
Note:
This class only supports Pytorch models.
Args:
model: model instance
seq_len: sequence length
loss_fn: loss function used for training
supported values include {"huber", "l1", "mse"}
optimizer: optimizer to used for training.
supported values include {"adam", "adagrad", "rmsprop"}
lr: learning rate
batch_size: batch size for training
num_epochs: number of epochs for training
std_tolerance: determines how many times the standard deviation to be used for threshold
reconerr_method: method used to calculate the distance
between the original and the reconstucted data
supported values include {"absolute", "squared"}
threshold_min: the minimum threshold to use;
can be used when the threshold calculated is too low
>>> # Example usage
>>> from numalogic.models.autoencoder.variants import VanillaAE
Expand Down Expand Up @@ -123,6 +124,17 @@ def init_optimizer(self, optimizer: str, lr: float):
raise NotImplementedError(f"Unsupported optimizer value provided: {optimizer}")

def fit(self, X: NDArray[float], y=None, log_freq: int = 5) -> "AutoencoderPipeline":
r"""
Fit function to train autoencoder model
Args:
X: training dataset
y: labels
log_freq: frequency logging
Returns:
AutoencoderPipeline instance
"""
_LOGGER.info("Training autoencoder model..")

dataset = self._model.construct_dataset(X, self.seq_len)
Expand All @@ -148,6 +160,16 @@ def fit(self, X: NDArray[float], y=None, log_freq: int = 5) -> "AutoencoderPipel
return self

def predict(self, X: NDArray[float], seq_len: int = None) -> NDArray[float]:
r"""
Return the reconstruction from the model.
Args:
X: training dataset
seq_len: sequence length / window length
Returns:
Numpy array
"""
if not seq_len:
seq_len = self.seq_len or len(X)
dataset = self._model.construct_dataset(X, seq_len)
Expand All @@ -157,6 +179,16 @@ def predict(self, X: NDArray[float], seq_len: int = None) -> NDArray[float]:
return dataset.recover_shape(pred)

def score(self, X: NDArray[float], seq_len: int = None) -> NDArray[float]:
r"""
Return anomaly score using the calculated threshold
Args:
X: training dataset
seq_len: sequence length / window length
Returns:
numpy array with anomaly scores
"""
if self._thresholds is None:
raise RuntimeError("Thresholds not present!!!")
thresh = self._thresholds.reshape(1, -1)
Expand All @@ -167,13 +199,31 @@ def score(self, X: NDArray[float], seq_len: int = None) -> NDArray[float]:
return anomaly_scores

def recon_err(self, X: NDArray[float], seq_len: int) -> NDArray:
r"""
Returns the reconstruction error.
Args:
X: training dataset
seq_len: sequence length / window length
Returns:
numpy array with anomaly scores
"""
x_recon = self.predict(X, seq_len=seq_len)
recon_err = self.reconerr_func(X - x_recon)
return recon_err

def find_thresholds(
self, X: NDArray[float]
) -> Tuple[NDArray[float], NDArray[float], NDArray[float]]:
r"""
Calculate threshold for the anomaly model
Args:
X: training dataset
Returns:
Tuple consisting of thresholds, reconstruction error mean, reconstruction error std
"""
recon_err = self.recon_err(X, seq_len=self.seq_len)
recon_err_mean = np.mean(recon_err, axis=0)
recon_err_std = np.std(recon_err, axis=0)
Expand All @@ -183,6 +233,14 @@ def find_thresholds(
return thresholds, recon_err_mean, recon_err_std

def save(self, path: Optional[str] = None) -> Optional[BinaryIO]:
r"""
Save function to save the model. If path is provided then the model is saved in the given path
Args:
path: path to save the model (Optional parameter)
Returns:
Binary type object if path is None
"""
state_dict = copy(self.model_properties)
state_dict["model_state_dict"] = self._model.state_dict()
if path:
Expand All @@ -198,6 +256,14 @@ def __load_metadata(self, **metadata) -> None:
self._stats = metadata["err_stats"]

def load(self, path: Union[str, BinaryIO] = None, model=None, **metadata) -> None:
r"""
Load the model to pipeline.
Args:
path: path to load the model
model: machine learning model
metadata: additional pipeline metadata
"""
if (path and model) or (not path and not model):
raise ValueError("One of path or model needs to be provided!")
if model:
Expand Down
3 changes: 2 additions & 1 deletion numalogic/models/autoencoder/variants/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from numalogic.models.autoencoder.variants.vanilla import VanillaAE
from numalogic.models.autoencoder.variants.conv import Conv1dAE
from numalogic.models.autoencoder.variants.lstm import LSTMAE
from numalogic.models.autoencoder.variants.transformer import TransformerAE
from numalogic.models.autoencoder.base import TorchAE


__all__ = ["VanillaAE", "Conv1dAE", "LSTMAE", "TorchAE"]
__all__ = ["VanillaAE", "Conv1dAE", "LSTMAE", "TransformerAE", "TorchAE"]
40 changes: 32 additions & 8 deletions numalogic/models/autoencoder/variants/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,28 @@


class Conv1dAE(TorchAE):
"""
r"""
One dimensional Convolutional Autoencoder with multichannel support.
Args:
in_channels: Number of channels in the input
enc_channels: Number of channels produced by the convolution
kernel_size: kernel size (default=7)
stride: stride length (default=2)
padding: padding parameter for encoder (default=3)
output_padding: padding parameter for decoder (default=1)
"""

def __init__(self, in_channels: int, enc_channels: int):
def __init__(
self,
in_channels: int,
enc_channels: int,
kernel_size=7,
stride=2,
padding=3,
output_padding=1,
):
super(Conv1dAE, self).__init__()
self.encoder = nn.Sequential(
nn.Conv1d(in_channels, enc_channels, kernel_size=7, stride=2, padding=3),
Expand All @@ -43,14 +60,11 @@ def __init__(self, in_channels: int, enc_channels: int):

self.thresholds = None

def __repr__(self) -> str:
return summary(self)

def summary(self, input_shape: Tuple[int, ...]):
print(summary(self, input_size=input_shape))

@staticmethod
def init_weights(m: nn.Module) -> None:
r"""
Initiate parameters in the transformer model.
"""
if type(m) in (nn.ConvTranspose1d, nn.Conv1d):
nn.init.xavier_normal_(m.weight, gain=calculate_gain("relu"))

Expand All @@ -60,6 +74,16 @@ def forward(self, x: Tensor) -> Tuple[Tensor, Tensor]:
return encoded, decoded

def construct_dataset(self, x: Tensor, seq_len: int = None) -> SequenceDataset:
r"""
Constructs dataset given tensor and seq_len
Args:
x: Tensor type
seq_len: sequence length / window length
Returns:
SequenceDataset type
"""
__seq_len = seq_len or self.seq_len
dataset = SequenceDataset(x, __seq_len, permute=True)
return dataset
49 changes: 42 additions & 7 deletions numalogic/models/autoencoder/variants/lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@


class _Encoder(nn.Module):
r"""
Encoder network for the autoencoder network.
Args:
seq_len: sequence length / window length,
no_features: number of features
embedding_size: embedding layer size
num_layers: number of decoder layers
"""

def __init__(self, seq_len: int, no_features: int, embedding_size: int, num_layers=1):
super().__init__()

Expand All @@ -35,6 +45,16 @@ def forward(self, x: Tensor) -> Tensor:


class _Decoder(nn.Module):
r"""
Decoder network for the autoencoder network.
Args:
seq_len: sequence length / window length,
no_features: number of features
hidden_size: hidden layer size(default = 32)
num_layers: number of decoder layers
"""

def __init__(
self, seq_len: int, no_features: int, output_size: int, hidden_size=32, num_layers=1
):
Expand Down Expand Up @@ -62,8 +82,16 @@ def forward(self, x: Tensor) -> Tensor:


class LSTMAE(TorchAE):
"""
r"""
Long Short-Term Memory (LSTM) based autoencoder.
Args:
seq_len: sequence length / window length,
no_features: number of features
embedding_dim: embedding dimension for the network
encoder_layers: number of encoder layers (default = 1)
decoder_layers: number of decoder layers (default = 1)
"""

def __init__(
Expand Down Expand Up @@ -99,14 +127,11 @@ def __init__(
self.decoder = self.decoder.to(_DEVICE)
self.decoder.apply(self.init_weights)

def __repr__(self) -> str:
return summary(self)

def summary(self, input_shape: tuple) -> None:
print(summary(self, input_size=input_shape))

@staticmethod
def init_weights(m: nn.Module) -> None:
r"""
Initiate parameters in the transformer model.
"""
for node, param in m.named_parameters():
if "bias" in node:
nn.init.zeros_(param)
Expand All @@ -120,6 +145,16 @@ def forward(self, x: Tensor) -> Tuple[Tensor, Tensor]:
return encoded, decoded

def construct_dataset(self, x: Tensor, seq_len: int = None) -> SequenceDataset:
r"""
Constructs dataset given tensor and seq_len
Args:
x: Tensor type
seq_len: sequence length / window length
Returns:
SequenceDataset type
"""
__seq_len = seq_len or self.seq_len
dataset = SequenceDataset(x, __seq_len, permute=False)
return dataset
Loading

0 comments on commit 5314070

Please sign in to comment.