Skip to content

Commit

Permalink
fix: Sparse AE for vanilla and conv (#199)
Browse files Browse the repository at this point in the history
- Fix Sparsity calculation for Vanilla and Convolutional AE
- Update benchmarks
- Improve streaming dataset to support slicing

Signed-off-by: Avik Basu <[email protected]>
  • Loading branch information
ab93 committed Jun 2, 2023
1 parent 5e69f5f commit a2b00c1
Show file tree
Hide file tree
Showing 22 changed files with 305 additions and 108 deletions.
14 changes: 8 additions & 6 deletions benchmarks/kpi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@ The performance table is shown below, although note that the hyperparameters hav
The hyperparams used are available inside the results directory under each algorithm.


| KPI ID | KPI index | Algorithm | ROC-AUC |
|--------------------------------------|-----------|---------------|---------|
| 431a8542-c468-3988-a508-3afd06a218da | 14 | VanillaAE | 0.89 |
| 431a8542-c468-3988-a508-3afd06a218da | 14 | Conv1dAE | 0.88 |
| 431a8542-c468-3988-a508-3afd06a218da | 14 | LSTMAE | 0.86 |
| 431a8542-c468-3988-a508-3afd06a218da | 14 | TransformerAE | 0.82 |
| KPI ID | KPI index | Algorithm | ROC-AUC (test set) |
|--------------------------------------|-----------|-----------------|--------------------|
| 431a8542-c468-3988-a508-3afd06a218da | 14 | VanillaAE | 0.89 |
| 431a8542-c468-3988-a508-3afd06a218da | 14 | Conv1dAE | 0.88 |
| 431a8542-c468-3988-a508-3afd06a218da | 14 | LSTMAE | 0.86 |
| 431a8542-c468-3988-a508-3afd06a218da | 14 | TransformerAE | 0.82 |
| 431a8542-c468-3988-a508-3afd06a218da | 14 | SparseVanillaAE | 0.93 |
| 431a8542-c468-3988-a508-3afd06a218da | 14 | SparseConv1dAE | 0.77 |


Full credit to Zeyan Li et al. for constructing large-scale real world benchmark datasets for AIOps.
Expand Down
191 changes: 116 additions & 75 deletions benchmarks/kpi/benchmark.ipynb

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions benchmarks/kpi/results/kpi_idx_14/sparseconv/hyperparams.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"BATCH_SIZE": 64,
"SPLIT_RATIOS": [0.5, 0.2, 0.3],
"TRAINER": {"accelerator": "cpu", "max_epochs": 50},
"MODEL": {
"name": "SparseConv1dAE",
"conf": {
"seq_len": 12,
"in_channels": 1,
"enc_channels": [8, 16, 32],
"enc_kernel_sizes": [3, 3, 3],
"weight_decay": 1e-6
}
}
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
20 changes: 20 additions & 0 deletions benchmarks/kpi/results/kpi_idx_14/sparsevanilla/hyperparams.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"BATCH_SIZE": 64,
"SPLIT_RATIOS": [
0.5,
0.2,
0.3
],
"TRAINER": {
"accelerator": "cpu",
"max_epochs": 50
},
"MODEL": {
"name": "SparseVanillaAE",
"conf": {
"seq_len": 10,
"encoder_layersizes": [16, 32],
"decoder_layersizes": [32, 16]
}
}
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
27 changes: 25 additions & 2 deletions numalogic/models/autoencoder/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,26 @@
from torch import Tensor

from numalogic.tools.callbacks import ProgressDetails
from numalogic.tools.data import TimeseriesDataModule
from numalogic.tools.data import inverse_window

_LOGGER = logging.getLogger(__name__)


class AutoencoderTrainer(Trainer):
r"""
A PyTorch Lightning Trainer for Autoencoder models.
Args:
max_epochs: The maximum number of epochs to train for. (default: 100)
logger: The logger to use. (default: False)
check_val_every_n_epoch: The number of epochs between validation checks. (default: 5)
enable_checkpointing: Whether to enable checkpointing. (default: False)
enable_progress_bar: Whether to enable the progress bar. (default: False)
enable_model_summary: Whether to enable the model summary. (default: False)
callbacks: A list of callbacks to use. (default: None)
**trainer_kw: Additional keyword arguments to pass to the Lightning Trainer.
"""

def __init__(
self,
max_epochs=100,
Expand Down Expand Up @@ -55,8 +69,17 @@ def __init__(
)

def predict(self, model: pl.LightningModule = None, unbatch=True, **kwargs) -> Tensor:
r"""
Predicts the output of the model.
Args:
model: The model to predict with. (default: None)
unbatch: Whether to inverse window the output. (default: True)
**kwargs: Additional keyword arguments to pass to the Lightning
trainers predict method.
"""
recon_err = super().predict(model, **kwargs)
recon_err = torch.vstack(recon_err)
if unbatch:
return TimeseriesDataModule.unbatch_sequences(recon_err)
return inverse_window(recon_err, method="keep_last")
return recon_err
26 changes: 17 additions & 9 deletions numalogic/models/autoencoder/variants/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import torch
from torch import nn, Tensor
from torch.distributions import kl_divergence, Bernoulli
from torch.nn.init import calculate_gain

from numalogic.models.autoencoder.base import BaseAE
Expand Down Expand Up @@ -227,12 +228,12 @@ def __init__(
if isinstance(enc_kernel_sizes, int):
enc_kernel_sizes = [enc_kernel_sizes for _ in range(len(enc_channels))]

elif isinstance(enc_kernel_sizes, (tuple, list)):
elif isinstance(enc_kernel_sizes, Sequence):
assert len(enc_channels) == len(
enc_kernel_sizes
), "enc_channels and enc_kernel_sizes should be of the same length"
else:
raise TypeError(f"Invalid enc_kernel_sizes type provided: {enc_kernel_sizes}")
raise TypeError(f"Invalid enc_kernel_sizes type provided: {type(enc_kernel_sizes)}")

self.encoder = Encoder(
num_channels=enc_channels,
Expand Down Expand Up @@ -301,8 +302,8 @@ class SparseConv1dAE(Conv1dAE):
<https://web.stanford.edu/class/cs294a/sparseAutoencoder.pdf>
Args:
beta: regularization parameter (Defaults to 1e-3)
rho: sparsity parameter value (Defaults to 0.05)
beta: Penalty factor (Defaults to 1e-3)
rho: Sparsity parameter value (Defaults to 0.05)
**kwargs: VanillaAE kwargs
"""

Expand All @@ -324,13 +325,20 @@ def kl_divergence(self, activations: Tensor) -> Tensor:
"""
rho_hat = torch.mean(activations, dim=0)
rho = torch.full(rho_hat.size(), self.rho, device=self.device)
kl_loss = nn.KLDivLoss(reduction="sum")
_dim = 0 if rho_hat.dim() == 1 else 1
return kl_loss(torch.log_softmax(rho_hat, dim=_dim), torch.softmax(rho, dim=_dim))
kl_loss = kl_divergence(
Bernoulli(logits=torch.log(rho)), Bernoulli(logits=torch.log(rho_hat))
)
return torch.sum(torch.clamp(kl_loss, max=1.0))

def _get_reconstruction_loss(self, batch):
def _get_reconstruction_loss(self, batch) -> Tensor:
latent, recon = self.forward(batch)
batch = batch.view(-1, self.in_channels, self.seq_len)
loss = self.criterion(batch, recon)
penalty = self.kl_divergence(latent)
return loss + penalty
return loss + (self.beta * penalty)

def validation_step(self, batch: Tensor, batch_idx: int) -> Tensor:
recon = self.reconstruction(batch)
loss = self.criterion(batch, recon.view(-1, self.seq_len, self.in_channels))
self._total_val_loss += loss.detach().item()
return loss
25 changes: 17 additions & 8 deletions numalogic/models/autoencoder/variants/vanilla.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import torch
from torch import nn, Tensor
from torch.distributions import kl_divergence, Bernoulli

from numalogic.models.autoencoder.base import BaseAE
from numalogic.tools.exceptions import LayerSizeMismatchError
Expand Down Expand Up @@ -68,7 +69,7 @@ def _construct_layers(self, layersizes: Sequence[int]) -> nn.ModuleList:
[
nn.Linear(start_layersize, layersizes[-1]),
nn.BatchNorm1d(self.n_features),
nn.LeakyReLU(),
nn.ReLU(),
]
)
return layers
Expand Down Expand Up @@ -216,8 +217,8 @@ class SparseVanillaAE(VanillaAE):
<https://web.stanford.edu/class/cs294a/sparseAutoencoder.pdf>
Args:
beta: regularization parameter (Defaults to 1e-3)
rho: sparsity parameter value (Defaults to 0.05)
beta: Regularization factor (Defaults to 1e-3)
rho: Sparsity parameter value (Defaults to 0.05)
**kwargs: VanillaAE kwargs
"""

Expand All @@ -239,13 +240,21 @@ def kl_divergence(self, activations: Tensor) -> Tensor:
"""
rho_hat = torch.mean(activations, dim=0)
rho = torch.full(rho_hat.size(), self.rho, device=self.device)
kl_loss = nn.KLDivLoss(reduction="sum")
_dim = 0 if rho_hat.dim() == 1 else 1
return kl_loss(torch.log_softmax(rho_hat, dim=_dim), torch.softmax(rho, dim=_dim))
kl_loss = kl_divergence(
Bernoulli(logits=torch.log(rho)), Bernoulli(logits=torch.log(rho_hat))
)
return torch.sum(torch.clamp(kl_loss, max=1.0))

def _get_reconstruction_loss(self, batch):
def _get_reconstruction_loss(self, batch: Tensor) -> Tensor:
latent, recon = self.forward(batch)
x = batch.view(-1, self.n_features, self.seq_len)
loss = self.criterion(x, recon)
penalty = self.kl_divergence(latent)
return loss + penalty
return loss + (self.beta * penalty)

def validation_step(self, batch: Tensor, batch_idx: int) -> Tensor:
recon = self.reconstruction(batch)
recon = recon.view(-1, self.seq_len, self.n_features)
loss = self.criterion(batch, recon)
self._total_val_loss += loss.detach().item()
return loss
68 changes: 65 additions & 3 deletions numalogic/tools/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# limitations under the License.

import logging
from typing import Optional
from typing import Optional, Union
from collections.abc import Generator, Iterator

import numpy as np
Expand All @@ -26,6 +26,59 @@
_LOGGER = logging.getLogger(__name__)


def inverse_window(batched: Tensor, method="keep_last") -> Tensor:
r"""
Utility method to transform a 3D tensor of shape: (batch_size, seq_len, num_features)
back into a shape of (new_batch, num_features).
Args:
batched: A 3D tensor of shape: (batch_size, seq_len, num_features)
method: The method to use for the inverse transformation. (default: "keep_last")
Valid methods are: "keep_last", "keep_first"
Returns:
A 2D tensor of shape: (new_batch, num_features)
"""
if method == "keep_last":
return inverse_window_last_only(batched)
if method == "keep_first":
return inverse_window_first_only(batched)
raise ValueError(f"Invalid method: {method}")


def inverse_window_first_only(batched: Tensor) -> Tensor:
r"""
Utility method to transform a 3D tensor of shape: (batch_size, seq_len, num_features)
back into a shape of (new_batch, num_features).
Note: This is an approximate inverse transormation as only the
first element in seq_len is used for the first (new_batch - seq_len - 1) rows.
Args:
batched: A 3D tensor of shape: (batch_size, seq_len, num_features)
Returns:
A 2D tensor of shape: (new_batch, num_features)
"""
output = batched[:, 0, :]
return torch.vstack((output, batched[-1, 1::]))


def inverse_window_last_only(batched: Tensor) -> Tensor:
r"""
Utility method to transform a 3D tensor of shape: (batch_size, seq_len, num_features)
back into a shape of (new_batch, num_features).
Note: This is an approximate inverse transormation as only the
last element in seq_len is used for the last (new_batch - seq_len - 1) rows.
Args:
batched: A 3D tensor of shape: (batch_size, seq_len, num_features)
Returns:
A 2D tensor of shape: (new_batch, num_features)
"""
output = batched[:, -1, :]
return torch.vstack((batched[0, :-1, :], output))


class StreamingDataset(IterableDataset):
r"""
An iterable Dataset designed for streaming time series input.
Expand Down Expand Up @@ -93,10 +146,19 @@ def __len__(self) -> int:
"""
return len(self._data) - self._seq_len + 1

def __getitem__(self, idx: int) -> npt.NDArray[float]:
def __getitem__(self, idx: Union[int, slice]) -> npt.NDArray[float]:
r"""
Retrieves a sequence from the input data at the specified index.
"""
if isinstance(idx, slice):
if idx.step is not None:
raise ValueError("Slice with step is not supported in StreamingDataset")
output = []
start = idx.start or 0
stop = idx.stop or len(self)
for i in range(start, stop - self._seq_len + 1):
output.append(self._data[i : (i + self._seq_len)])
return np.stack(output)
if idx >= len(self):
raise IndexError(f"{idx} out of bound!")
return self._data[idx : idx + self._seq_len]
Expand Down Expand Up @@ -161,7 +223,7 @@ def val_dataloader(self) -> Optional[EVAL_DATALOADERS]:
def unbatch_sequences(batched: Tensor) -> Tensor:
r"""
Utility method to transform a 3D tensor of shape: (batch_size, seq_len, num_features)
back into a shape of (new_batch, num_feautres).
back into a shape of (new_batch, num_features).
Note: This is an approximate inverse transormation as only the
first element in seq_len is used for the first (new_batch - seq_len - 1) rows.
Expand Down
2 changes: 1 addition & 1 deletion tests/models/autoencoder/variants/test_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def test_conv1d_err(self):
seq_len=SEQ_LEN,
in_channels=self.X_train.shape[1],
enc_channels=[8, 16, 4],
enc_kernel_sizes={3, 3, 3},
enc_kernel_sizes={5, 3, 1},
dec_activation="random",
)

Expand Down
25 changes: 21 additions & 4 deletions tests/tools/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from torch.utils.data import DataLoader

from numalogic._constants import TESTS_DIR
from numalogic.tools.data import StreamingDataset, TimeseriesDataModule
from numalogic.tools.data import StreamingDataset, TimeseriesDataModule, inverse_window
from numalogic.tools.exceptions import InvalidDataShapeError

ROOT_DIR = os.path.join(TESTS_DIR, "resources", "data")
Expand Down Expand Up @@ -96,7 +96,20 @@ def test_datamodule_err(self):
with self.assertRaises(ValueError):
TimeseriesDataModule(SEQ_LEN, self.train_data, val_split_ratio=1.2)

def test_unbatch_sequences(self):

class TestInverseWindow(unittest.TestCase):
train_data = None
test_data = None
m = None
n = None

@classmethod
def setUpClass(cls) -> None:
cls.n = 3
cls.train_data = RNG.random((100, cls.n))
cls.test_data = RNG.random((20, cls.n))

def test_inverse_window(self):
ratio = 0.2
datamodule = TimeseriesDataModule(
SEQ_LEN, self.train_data, val_split_ratio=ratio, batch_size=256
Expand All @@ -106,19 +119,23 @@ def test_unbatch_sequences(self):
val_size = int(ratio * len(self.train_data))

for batch in datamodule.train_dataloader():
unbatched = datamodule.unbatch_sequences(batch)
unbatched = inverse_window(batch, method="keep_first")
self.assertTupleEqual(self.train_data[:-val_size].shape, unbatched.shape)
self.assertAlmostEqual(
torch.mean(unbatched).item(), np.mean(self.train_data[:-val_size]), places=5
)

for batch in datamodule.val_dataloader():
unbatched = datamodule.unbatch_sequences(batch)
unbatched = inverse_window(batch, method="keep_last")
self.assertTupleEqual(self.train_data[-val_size:].shape, unbatched.shape)
self.assertAlmostEqual(
torch.mean(unbatched).item(), np.mean(self.train_data[-val_size:]), places=5
)

def test_inverse_window_err(self):
with self.assertRaises(ValueError):
inverse_window(torch.tensor([1, 2, 3]), method="invalid_method")


if __name__ == "__main__":
unittest.main()

0 comments on commit a2b00c1

Please sign in to comment.