Skip to content

Commit

Permalink
feat: difference transform
Browse files Browse the repository at this point in the history
Signed-off-by: Avik Basu <[email protected]>
  • Loading branch information
ab93 committed Jan 16, 2024
1 parent 46363d3 commit 5ec5883
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 11 deletions.
4 changes: 4 additions & 0 deletions numalogic/config/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class PreprocessFactory(_ObjectFactory):
TanhScaler,
DataClipper,
GaussianNoiseAdder,
DifferenceTransform,
)

_CLS_MAP: ClassVar[dict] = {
Expand All @@ -61,6 +62,7 @@ class PreprocessFactory(_ObjectFactory):
"TanhScaler": TanhScaler,
"DataClipper": DataClipper,
"GaussianNoiseAdder": GaussianNoiseAdder,
"DifferenceTransform": DifferenceTransform,
}

def get_pipeline_instance(self, objs_info: list[ModelInfo]):
Expand Down Expand Up @@ -88,6 +90,7 @@ class ThresholdFactory(_ObjectFactory):

from numalogic.models.threshold import (
StdDevThreshold,
AggStdDevThreshold,
MahalanobisThreshold,
RobustMahalanobisThreshold,
StaticThreshold,
Expand All @@ -96,6 +99,7 @@ class ThresholdFactory(_ObjectFactory):

_CLS_MAP: ClassVar[dict] = {
"StdDevThreshold": StdDevThreshold,
"AggStdDevThreshold": AggStdDevThreshold,
"StaticThreshold": StaticThreshold,
"SigmoidThreshold": SigmoidThreshold,
"MahalanobisThreshold": MahalanobisThreshold,
Expand Down
3 changes: 2 additions & 1 deletion numalogic/models/threshold/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from numalogic.models.threshold._std import StdDevThreshold
from numalogic.models.threshold._std import StdDevThreshold, AggStdDevThreshold
from numalogic.models.threshold._mahalanobis import MahalanobisThreshold, RobustMahalanobisThreshold
from numalogic.models.threshold._static import StaticThreshold, SigmoidThreshold

__all__ = [
"StdDevThreshold",
"AggStdDevThreshold",
"StaticThreshold",
"SigmoidThreshold",
"MahalanobisThreshold",
Expand Down
3 changes: 2 additions & 1 deletion numalogic/models/threshold/_mahalanobis.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,10 @@ def __init__(
self,
max_outlier_prob: float = 0.1,
max_inlier_percentile: Optional[float] = None,
support_fraction: Optional[float] = 0.7,
):
super().__init__(max_outlier_prob)
self._mcd = MinCovDet(store_precision=False)
self._mcd = MinCovDet(store_precision=False, support_fraction=support_fraction)
if max_inlier_percentile and (not 75.0 <= max_inlier_percentile < 100.0):
raise ValueError("max_inlier_percentile should be in range [75, 100)")
self._max_inlier_percentile = max_inlier_percentile
Expand Down
17 changes: 16 additions & 1 deletion numalogic/models/threshold/_std.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional

import numpy as np
from numpy.typing import NDArray
Expand Down Expand Up @@ -72,3 +72,18 @@ def predict(self, x_test: NDArray[float]) -> NDArray[int]:
def score_samples(self, x_test: NDArray[float]) -> NDArray[float]:
"""Returns an anomaly score array with the same shape as input."""
return x_test / self.threshold


class AggStdDevThreshold(StdDevThreshold):
def __init__(
self,
feature_weights: Optional[list[float]] = None,
std_factor: float = 3.0,
min_threshold: float = 0.0,
):
super().__init__(std_factor, min_threshold)
self.feature_weights = feature_weights

def score_samples(self, x_test: NDArray[float]) -> NDArray[float]:
scores = super().score_samples(x_test)
return np.average(scores, weights=self.feature_weights, axis=1, keepdims=True)
21 changes: 14 additions & 7 deletions numalogic/synthetic/anomalies.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ def __init__(
self.anomaly_type = anomaly_type
self.anomaly_ratio = anomaly_ratio
self.anomaly_sign = anomaly_sign
self.freq = ref_df.index.freq
try:
self.freq = ref_df.index.freq
except AttributeError:
self.freq = None
self.mu, self.sigma = mu, sigma

self.scaler = StandardScaler()
Expand Down Expand Up @@ -138,18 +141,22 @@ def _inject_global_anomalies(
).merge(anomaly_df, left_index=True, right_index=True)

def _inject_contextual_anomalies(
self, target_df: pd.DataFrame, cols: Sequence[str], impact=1
self,
target_df: pd.DataFrame,
cols: Sequence[str],
impact=1,
) -> pd.DataFrame:
target_df = self._init_target_df(target_df, cols)
anomaly_df = pd.DataFrame(index=target_df.index)
anomaly_df["is_anomaly"] = 0

for col in self.__injected_cols:
tseries = target_df[col]
sample = tseries[: -self.block_size].sample(1)
idx_start = sample.index
idx_end = idx_start + (self.block_size * self.freq)
outlier_block = tseries[idx_start.values[0] : idx_end.values[0]]

idx_start = self._find_start_idx(None, target_df)
idx_end = idx_start + self.block_size

outlier_block = tseries[idx_start:idx_end]

# Add gaussian noise to the data
noise = self._rnd_gen.normal(self.mu, self.sigma, outlier_block.shape)
Expand All @@ -172,7 +179,7 @@ def _inject_contextual_anomalies(
)

anomaly_col = anomaly_df["is_anomaly"]
anomaly_block = anomaly_col[idx_start.values[0] : idx_end.values[0]]
anomaly_block = anomaly_col[idx_start:idx_end]
anomaly_block += self.add_impact_sign()

return pd.DataFrame(
Expand Down
6 changes: 5 additions & 1 deletion numalogic/tools/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,11 @@ def inverse_window_last_only(batched: Tensor) -> Tensor:
-------
A 2D tensor of shape: (new_batch, num_features)
"""
output = batched[:, -1, :]
try:
output = batched[:, -1, :]
except IndexError:
batched = batched.unsqueeze(-1)
output = batched[:, -1, :]
return torch.vstack((batched[0, :-1, :], output))


Expand Down
2 changes: 2 additions & 0 deletions numalogic/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
StaticPowerTransformer,
DataClipper,
GaussianNoiseAdder,
DifferenceTransform,
)
from numalogic.transforms._movavg import ExpMovingAverage, expmov_avg_aggregator
from numalogic.transforms._postprocess import TanhNorm, tanh_norm
Expand All @@ -34,4 +35,5 @@
"TanhNorm",
"tanh_norm",
"GaussianNoiseAdder",
"DifferenceTransform",
]
18 changes: 18 additions & 0 deletions numalogic/transforms/_stateless.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class LogTransformer(StatelessTransformer):
add_factor: float value to be added to the feature before taking log.
"""

__slots__ = ("add_factor",)

def __init__(self, add_factor=2):
self.add_factor = add_factor

Expand Down Expand Up @@ -112,6 +114,8 @@ class GaussianNoiseAdder(StatelessTransformer):
seed: int value to be used as the random seed (default: 42).
"""

__slots__ = ("_rng", "_is_abs", "_scale")

def __init__(self, scale: float = 1e-8, positive_only: bool = True, seed: int = 42):
self._rng = np.random.default_rng(seed)
self._is_abs = positive_only
Expand All @@ -122,3 +126,17 @@ def transform(self, x: npt.NDArray[float], **__) -> npt.NDArray[float]:
if self._is_abs:
noise = np.abs(noise)
return x + noise


class DifferenceTransform(StatelessTransformer):
__slots__ = ("order",)

def __init__(
self,
order: int = 1,
):
self.order = order

def transform(self, input_: npt.NDArray, **__):
diff_df = pd.DataFrame(input_).diff().bfill()
return diff_df.to_numpy(dtype=np.float32)

0 comments on commit 5ec5883

Please sign in to comment.