Skip to content

Commit

Permalink
fix: tensor dimension swap instead of view change (#240)
Browse files Browse the repository at this point in the history
Signed-off-by: Avik Basu <[email protected]>
  • Loading branch information
ab93 committed Aug 15, 2023
1 parent 2dfd84c commit 76cac48
Show file tree
Hide file tree
Showing 11 changed files with 536 additions and 452 deletions.
4 changes: 2 additions & 2 deletions numalogic/models/autoencoder/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ def init_optimizer(self, optim_algo: str):
return optim.RMSprop(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
raise NotImplementedError(f"Unsupported optimizer value provided: {optim_algo}")

def configure_shape(self, batch: Tensor) -> Tensor:
def configure_shape(self, x: Tensor) -> Tensor:
"""Method to configure the batch shape for each type of model architecture."""
return batch
return x

def _get_reconstruction_loss(self, batch: Tensor) -> Tensor:
_, recon = self.forward(batch)
Expand Down
26 changes: 17 additions & 9 deletions numalogic/models/autoencoder/variants/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,27 +259,36 @@ def _init_weights(self) -> None:
nn.init.xavier_normal_(module.weight, gain=calculate_gain("relu"))

def forward(self, batch: Tensor) -> tuple[Tensor, Tensor]:
"""
Forward pass for the Conv1dAE model.
Args:
----
batch: Input batch of shape (batch_size, seq_len, in_channels)
Returns
-------
A tuple of (encoded, decoded) tensors
"""
batch = self.configure_shape(batch)
encoded = self.encoder(batch)
decoded = self.decoder(encoded)
return encoded, decoded
return encoded, self.configure_shape(decoded)

def configure_shape(self, batch: Tensor) -> Tensor:
return batch.view(-1, self.in_channels, self.seq_len)
def configure_shape(self, x: Tensor) -> Tensor:
return torch.swapdims(x, 1, 2)

def encode(self, batch: Tensor) -> Tensor:
batch = self.configure_shape(batch)
return self.encoder(batch)

def _get_reconstruction_loss(self, batch: Tensor) -> Tensor:
_, recon = self.forward(batch)
x = batch.view(-1, self.in_channels, self.seq_len)
return self.criterion(x, recon)
return self.criterion(batch, recon)

def predict_step(self, batch: Tensor, batch_idx: int, dataloader_idx: int = 0) -> Tensor:
"""Returns reconstruction for streaming input."""
recon = self.reconstruction(batch)
recon = recon.view(-1, self.seq_len, self.in_channels)
return self.criterion(batch, recon, reduction="none")


Expand All @@ -300,7 +309,7 @@ class SparseConv1dAE(Conv1dAE):
**kwargs: VanillaAE kwargs
"""

def __init__(self, beta=1e-3, rho=0.05, *args, **kwargs):
def __init__(self, beta: float = 1e-3, rho: float = 0.05, *args, **kwargs):
super().__init__(*args, **kwargs)
self.beta = beta
self.rho = rho
Expand All @@ -326,13 +335,12 @@ def kl_divergence(self, activations: Tensor) -> Tensor:

def _get_reconstruction_loss(self, batch) -> Tensor:
latent, recon = self.forward(batch)
batch = batch.view(-1, self.in_channels, self.seq_len)
loss = self.criterion(batch, recon)
penalty = self.kl_divergence(latent)
return loss + (self.beta * penalty)

def validation_step(self, batch: Tensor, batch_idx: int) -> Tensor:
recon = self.reconstruction(batch)
loss = self.criterion(batch, recon.view(-1, self.seq_len, self.in_channels))
loss = self.criterion(batch, recon)
self._total_val_loss += loss.detach().item()
return loss
16 changes: 6 additions & 10 deletions numalogic/models/autoencoder/variants/vanilla.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,25 +182,23 @@ def __init__(

@staticmethod
def init_weights(m: nn.Module) -> None:
r"""Initiate parameters in the transformer model."""
"""Initialize the parameters in the model."""
if type(m) == nn.Linear:
nn.init.xavier_normal_(m.weight)

def forward(self, batch: Tensor) -> tuple[Tensor, Tensor]:
batch = batch.view(-1, self.n_features, self.seq_len)
batch = torch.swapdims(batch, 1, 2)
encoded = self.encoder(batch)
decoded = self.decoder(encoded)
return encoded, decoded
return encoded, torch.swapdims(decoded, 1, 2)

def _get_reconstruction_loss(self, batch):
def _get_reconstruction_loss(self, batch: Tensor):
_, recon = self.forward(batch)
x = batch.view(-1, self.n_features, self.seq_len)
return self.criterion(x, recon)
return self.criterion(batch, recon)

def predict_step(self, batch: Tensor, batch_idx: int, dataloader_idx: int = 0):
"""Returns reconstruction for streaming input."""
recon = self.reconstruction(batch)
recon = recon.view(-1, self.seq_len, self.n_features)
return self.criterion(batch, recon, reduction="none")


Expand Down Expand Up @@ -247,14 +245,12 @@ def kl_divergence(self, activations: Tensor) -> Tensor:

def _get_reconstruction_loss(self, batch: Tensor) -> Tensor:
latent, recon = self.forward(batch)
x = batch.view(-1, self.n_features, self.seq_len)
loss = self.criterion(x, recon)
loss = self.criterion(batch, recon)
penalty = self.kl_divergence(latent)
return loss + (self.beta * penalty)

def validation_step(self, batch: Tensor, batch_idx: int) -> Tensor:
recon = self.reconstruction(batch)
recon = recon.view(-1, self.seq_len, self.n_features)
loss = self.criterion(batch, recon)
self._total_val_loss += loss.detach().item()
return loss
4 changes: 2 additions & 2 deletions numalogic/models/vae/variants/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def forward(self, z: Tensor) -> Tensor:
out = self.unflatten(out)
out = torch.relu(self.bnorm(self.conv_tr(out)))
out = torch.relu(self.fc_out(out))
out = out.view(-1, self.seq_len, self.n_features)
out = torch.swapdims(out, 1, 2)
return self.td_linear(out)


Expand Down Expand Up @@ -213,7 +213,7 @@ def forward(self, x: Tensor) -> tuple[MultivariateNormal, Tensor]:

def configure_shape(self, x: Tensor) -> Tensor:
"""Method to configure the batch shape for each type of model architecture."""
return x.view(-1, self.n_features, self.seq_len)
return torch.swapdims(x, 1, 2)

def kld_loss(self, p: MultivariateNormal) -> Tensor:
"""
Expand Down
27 changes: 27 additions & 0 deletions numalogic/tools/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ def inverse_window_last_only(batched: Tensor) -> Tensor:
class StreamingDataset(IterableDataset):
r"""An iterable Dataset designed for streaming time series input.
Iterates over the input data and returns a sequence of shape
(batch_size, seq_len, num_features)
Args:
----
data: A numpy array containing the input data in the shape of (batch, num_features).
Expand Down Expand Up @@ -179,6 +182,30 @@ def __getitem__(self, idx: Union[int, slice]) -> npt.NDArray[float]:
return self._data[idx : idx + self._seq_len]


class StreamingDataLoader(DataLoader):
"""
A DataLoader for convinience that uses StreamingDataset for handling time series data.
Args:
----
data: A numpy array containing the input data in the shape of (batch, num_features).
seq_len: Length of the sliding window sequences to be generated from the input data
kwargs: Additional arguments to be passed to the DataLoader
Raises
------
ValueError: If the sequence length is greater than the data size
InvalidDataShapeError: If the input data array does not
have a minimum dimension size of 2
TypeError: If wrong argument is passed in kwargs
"""

def __init__(self, data: npt.NDArray[float], seq_len: int, **kwargs):
if "dataset" in kwargs:
raise TypeError("dataset argument is not supported for StreamingDataLoader!")
super().__init__(StreamingDataset(data, seq_len), **kwargs)


class TimeseriesDataModule(pl.LightningDataModule):
r"""A timeseries data module for use in PyTorch Lightning.
Expand Down
Loading

0 comments on commit 76cac48

Please sign in to comment.