Skip to content

Commit

Permalink
chore!: refactor preproc and postproc into transforms module (#201)
Browse files Browse the repository at this point in the history
- Move preprocess and postprocess files & modules -> transforms
- Avoid setting up handler in loggers
- Add base classes for extensibility
---------

Signed-off-by: Avik Basu <[email protected]>
  • Loading branch information
ab93 committed Jun 1, 2023
1 parent 7b3531a commit 2298be4
Show file tree
Hide file tree
Showing 29 changed files with 502 additions and 394 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ setup:

# test your application (tests in the tests/ directory)
test:
poetry run pytest tests/
poetry run pytest -v tests/

publish:
@rm -rf dist
Expand Down
7 changes: 4 additions & 3 deletions docs/post-processing.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@ Tanh normalization step is an optional step, where we normalize the anomalies be

```python
import numpy as np
from numalogic.postprocess import tanh_norm
from numalogic.transforms import tanh_norm

raw_anomaly_score = np.random.randn(10, 2)
test_anomaly_score_norm = tanh_norm(raw_anomaly_score)
```

A scikit-learn compatible API is also available.

```python
import numpy as np
from numalogic.postprocess import TanhNorm
from numalogic.transforms import TanhNorm

raw_score = np.random.randn(10, 2)

Expand All @@ -35,7 +36,7 @@ facilitating timely detection and response to potential anomalies.

```python
import numpy as np
from numalogic.postprocess import ExpMovingAverage
from numalogic.transforms import ExpMovingAverage

raw_score = np.array([1.0, 1.5, 1.2, 3.5, 2.7, 5.6, 7.1, 6.9, 4.2, 1.1]).reshape(-1, 1)

Expand Down
6 changes: 3 additions & 3 deletions docs/pre-processing.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Log transformation reduces the variance in some distributions, especially with l
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
from numalogic.preprocess.transformer import LogTransformer
from numalogic.transforms import LogTransformer

# Generate some random train and test data
x_train = np.random.randn(100, 3)
Expand All @@ -43,7 +43,7 @@ When `add_factor` is provided, each data point x is converted to (x + add_factor

```python
import numpy as np
from numalogic.preprocess.transformer import StaticPowerTransformer
from numalogic.transforms import StaticPowerTransformer

# Generate some random train and test data
x_train = np.random.randn(100, 3)
Expand All @@ -65,7 +65,7 @@ in the data.

```python
import numpy as np
from numalogic.preprocess import TanhScaler
from numalogic.transforms import TanhScaler

# Generate some random train and test data
x_train = np.random.randn(100, 3)
Expand Down
2 changes: 1 addition & 1 deletion docs/quick-start.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ from torch.utils.data import DataLoader
from numalogic.models.autoencoder import AutoencoderTrainer
from numalogic.models.autoencoder.variants import VanillaAE
from numalogic.models.threshold import StdDevThreshold
from numalogic.postprocess import TanhNorm
from numalogic.transforms import TanhNorm
from numalogic.tools.data import StreamingDataset

# Create some synthetic data
Expand Down
2 changes: 1 addition & 1 deletion examples/numalogic-simple-pipeline/src/udf/postprocess.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging

import numpy as np
from numalogic.postprocess import TanhNorm
from numalogic.transforms import TanhNorm
from pynumaflow.function import Messages, Message, Datum

from src.utils import Payload
Expand Down
2 changes: 1 addition & 1 deletion examples/numalogic-simple-pipeline/src/udf/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import uuid

import numpy as np
from numalogic.preprocess.transformer import LogTransformer
from numalogic.transforms import LogTransformer
from pynumaflow.function import Messages, Message, Datum

from src.utils import Payload
Expand Down
2 changes: 1 addition & 1 deletion examples/numalogic-simple-pipeline/src/udf/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from numalogic.models.autoencoder import AutoencoderTrainer
from numalogic.models.autoencoder.variants import Conv1dAE
from numalogic.models.threshold import StdDevThreshold
from numalogic.preprocess.transformer import LogTransformer
from numalogic.transforms.transformer import LogTransformer
from numalogic.tools.data import TimeseriesDataModule
from pynumaflow.function import Datum, Messages, Message

Expand Down
9 changes: 1 addition & 8 deletions numalogic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,4 @@

LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.INFO)

stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.INFO)

formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
stream_handler.setFormatter(formatter)

LOGGER.addHandler(stream_handler)
LOGGER.addHandler(logging.NullHandler())
53 changes: 53 additions & 0 deletions numalogic/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright 2022 The Numaproj Authors.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http:https://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Base classes for all models and transforms."""


from abc import ABCMeta

import numpy.typing as npt
import pytorch_lightning as pl
from sklearn.base import TransformerMixin, BaseEstimator, OutlierMixin


class BaseTransformer(TransformerMixin, BaseEstimator):
"""Base class for all transformer classes."""

pass


class StatelessTransformer(BaseTransformer):
"""Base class for stateless transforms."""

def transform(self, input_: npt.NDArray, **__):
"""Implement the transform method."""
raise NotImplementedError("transform method not implemented")

def fit(self, _: npt.NDArray):
"""Fit method does nothing for stateless transforms."""
return self

def fit_transform(self, input_: npt.NDArray, _=None, **__):
"""Return the result of the transform method."""
return self.transform(input_)


class TorchModel(pl.LightningModule, metaclass=ABCMeta):
"""Base class for all Pytorch based models."""

pass


class BaseThresholdModel(OutlierMixin, BaseEstimator):
"""Base class for all threshold models."""

pass
4 changes: 2 additions & 2 deletions numalogic/config/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class PreprocessFactory(_ObjectFactory):
"""Factory class to create preprocess instances."""

from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler
from numalogic.preprocess import LogTransformer, StaticPowerTransformer, TanhScaler
from numalogic.transforms import LogTransformer, StaticPowerTransformer, TanhScaler

_CLS_MAP = {
"StandardScaler": StandardScaler,
Expand All @@ -55,7 +55,7 @@ class PreprocessFactory(_ObjectFactory):
class PostprocessFactory(_ObjectFactory):
"""Factory class to create postprocess instances."""

from numalogic.postprocess import TanhNorm, ExpMovingAverage
from numalogic.transforms import TanhNorm, ExpMovingAverage

_CLS_MAP = {"TanhNorm": TanhNorm, "ExpMovingAverage": ExpMovingAverage}

Expand Down
6 changes: 3 additions & 3 deletions numalogic/models/autoencoder/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
# limitations under the License.


from abc import ABCMeta
from typing import Any

import pytorch_lightning as pl
import torch.nn.functional as F
from torch import Tensor, optim

from numalogic.base import TorchModel

class BaseAE(pl.LightningModule, metaclass=ABCMeta):

class BaseAE(TorchModel):
r"""Abstract Base class for all Pytorch based autoencoder models for time-series data.
Args:
Expand Down
2 changes: 1 addition & 1 deletion numalogic/models/forecast/variants/naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, FunctionTransformer

from numalogic.postprocess import tanh_norm
from numalogic.transforms._postprocess import tanh_norm


class BaselineForecaster:
Expand Down
7 changes: 4 additions & 3 deletions numalogic/models/threshold/_static.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@
# limitations under the License.
import numpy as np
import numpy.typing as npt
from sklearn.base import BaseEstimator
from typing_extensions import Self

from numalogic.base import BaseThresholdModel

class StaticThreshold(BaseEstimator):

class StaticThreshold(BaseThresholdModel):
r"""Simple and stateless static thresholding as an estimator.
Values more than upper_limit is considered an outlier,
Expand Down Expand Up @@ -65,7 +66,7 @@ def score_samples(self, x: npt.NDArray[float]) -> npt.NDArray[float]:
return x


class SigmoidThreshold(BaseEstimator):
class SigmoidThreshold(BaseThresholdModel):
r"""Smooth and stateless static thesholding using sigmoid function as an estimator.
The values produced.
Expand Down
7 changes: 4 additions & 3 deletions numalogic/models/threshold/_std.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@

import numpy as np
from numpy.typing import NDArray
from sklearn.base import BaseEstimator
from typing_extensions import Self

from numalogic.base import BaseThresholdModel

class StdDevThreshold(BaseEstimator):

class StdDevThreshold(BaseThresholdModel):
r"""Threshold estimator that calculates based on the mean and the std deviation.
Threshold = Mean + (std_factor * Std)
Expand Down Expand Up @@ -50,7 +51,7 @@ def std(self):
def threshold(self):
return self._threshold

def fit(self, x_train: NDArray[float], y=None) -> Self:
def fit(self, x_train: NDArray[float], _=None) -> Self:
"""Fit the estimator on the training set."""
self._std = np.std(x_train, axis=0)
self._mean = np.mean(x_train, axis=0)
Expand Down
4 changes: 2 additions & 2 deletions numalogic/registry/redis_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from redis.exceptions import RedisError

from numalogic.registry import ArtifactManager, ArtifactData, ArtifactCache
from numalogic.registry.artifact import ArtifactManager, ArtifactData, ArtifactCache
from numalogic.registry._serialize import loads, dumps
from numalogic.tools.exceptions import ModelKeyNotFound, RedisRegistryError
from numalogic.tools.types import artifact_t, redis_client_t, KEYS, META_T, META_VT
Expand Down Expand Up @@ -154,7 +154,7 @@ def __save_artifact(
production_key = self.__construct_production_key(key)
pipe.set(name=production_key, value=new_version_key)
_LOGGER.info(
"Setting Production key : %d ,to this new key = %s", production_key, new_version_key
"Setting Production key : %s ,to this new key = %s", production_key, new_version_key
)
serialized_metadata = ""
if metadata:
Expand Down
9 changes: 0 additions & 9 deletions numalogic/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +0,0 @@
from numpy.typing import ArrayLike
from sklearn.base import TransformerMixin, BaseEstimator


class DataIndependentTransformers(TransformerMixin, BaseEstimator):
"""Base class for stateless transforms."""

def fit(self, _: ArrayLike):
return self
11 changes: 8 additions & 3 deletions numalogic/tools/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,21 @@

from typing import Union, TypeVar
from collections.abc import Sequence

from redis.client import AbstractRedis
from sklearn.base import BaseEstimator
from torch import nn


try:
from redis.client import AbstractRedis
except ImportError:
pass
else:
redis_client_t = TypeVar("redis_client_t", bound=AbstractRedis, covariant=True)

artifact_t = TypeVar("artifact_t", bound=Union[nn.Module, BaseEstimator], covariant=True)
META_T = TypeVar("META_T", bound=dict[str, Union[str, float, int, list, dict]])
META_VT = TypeVar("META_VT", str, int, float, list, dict)
EXTRA_T = TypeVar("EXTRA_T", bound=dict[str, Union[str, list, dict]])
redis_client_t = TypeVar("redis_client_t", bound=AbstractRedis, covariant=True)
KEYS = TypeVar("KEYS", bound=Sequence[str], covariant=True)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from numalogic.preprocess.transformer import LogTransformer, StaticPowerTransformer, TanhScaler
"""
Module to provide timeseries transformations needed for preprocessing,
feature engineering and postprocessing.
"""

__all__ = ["LogTransformer", "StaticPowerTransformer", "TanhScaler"]
from numalogic.transforms._scaler import TanhScaler
from numalogic.transforms._stateless import LogTransformer, StaticPowerTransformer
from numalogic.transforms._movavg import ExpMovingAverage, expmov_avg_aggregator
from numalogic.transforms._postprocess import TanhNorm, tanh_norm

__all__ = [
"TanhScaler",
"LogTransformer",
"StaticPowerTransformer",
"ExpMovingAverage",
"expmov_avg_aggregator",
"TanhNorm",
"tanh_norm",
]
Loading

0 comments on commit 2298be4

Please sign in to comment.