Skip to content

Commit

Permalink
add: anomaly sign and return labels for anomalies generated (#146)
Browse files Browse the repository at this point in the history
* add: anomaly sign and return labels for anomalies generated

Signed-off-by: s0nicboOm <[email protected]>

* fix: unused import

Signed-off-by: s0nicboOm <[email protected]>

* add: Gaussian noise

Signed-off-by: s0nicboOm <[email protected]>

* fix: formatting

Signed-off-by: s0nicboOm <[email protected]>

* fix: comments

Signed-off-by: s0nicboOm <[email protected]>

* fix: re-run quick start exploration

Signed-off-by: s0nicboOm <[email protected]>

* recommit: quick start notebook

Signed-off-by: s0nicboOm <[email protected]>

* recommit: quick start notebook

Signed-off-by: s0nicboOm <[email protected]>

* recommit: quick start notebook

Signed-off-by: s0nicboOm <[email protected]>

* Update quick-start.ipynb

* Update quick-start.ipynb

* fix: recommit

Signed-off-by: s0nicboOm <[email protected]>

* fix: notebook

Signed-off-by: s0nicboOm <[email protected]>

* fix: nb

Signed-off-by: s0nicboOm <[email protected]>

* fix: notebook

Signed-off-by: s0nicboOm <[email protected]>

* Update quick-start.ipynb

* Update quick-start.ipynb

* fix: notebook

Signed-off-by: s0nicboOm <[email protected]>

---------

Signed-off-by: s0nicboOm <[email protected]>
  • Loading branch information
s0nicboOm committed Mar 22, 2023
1 parent b6f63ef commit cb5509a
Show file tree
Hide file tree
Showing 5 changed files with 649 additions and 446 deletions.
214 changes: 40 additions & 174 deletions examples/quick-start.ipynb

Large diffs are not rendered by default.

121 changes: 96 additions & 25 deletions numalogic/synthetic/anomalies.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,40 @@
class AnomalyGenerator:
__MIN_COLUMNS = {"global": 1, "contextual": 1, "causal": 2, "collective": 2}

def __init__(self, ref_df: pd.DataFrame, anomaly_type="global", anomaly_ratio=0.1):
def __init__(
self,
ref_df: pd.DataFrame,
anomaly_type: str = "global",
anomaly_ratio: float = 0.1,
anomaly_sign: str = "positive",
mu: float = 0.1,
sigma: float = 0.01,
):
"""
@param ref_df: Reference Multivariate time series DataFrame
@param anomaly_type: Type of anomaly to impute.
Possible values include:
- "global": Outliers in the global context
- "contextual": Outliers only in the seasonal context
- "causal": Outliers caused by a temporal causal effect
- "collective": Outliers present simultaneously in two or more time series
@param anomaly_ratio: Ratio of anomalous data points to inject wrt
to number of samples
Class to inject synthetic anomaly to the input time series based on parameters.
Args:
ref_df: Reference Multivariate time series DataFrame
anomaly_type: Type of anomaly to impute.
Possible values include:
- "global": Outliers in the global context
- "contextual": Outliers only in the seasonal context
- "causal": Outliers caused by a temporal causal effect
- "collective": Outliers present simultaneously in two or more time series
anomaly_ratio: Ratio of anomalous data points to inject wrt
to number of samples
anomaly_sign: Positive or Negative anomaly to be injected
Possible values include:
- "positive": higher outlier value injected compared to the current actual value
- "negative": lower outliers injected compared to the current actual value
mu: Distributions mean of the Gaussian Noise injected
sigma: Distributions std of the Gaussian Noise injected
"""

self.anomaly_type = anomaly_type
self.anomaly_ratio = anomaly_ratio
self.anomaly_sign = anomaly_sign
self.freq = ref_df.index.freq
self.mu, self.sigma = mu, sigma

self.scaler = StandardScaler()
scaled_ref_df = pd.DataFrame(
Expand All @@ -51,6 +69,13 @@ def __init__(self, ref_df: pd.DataFrame, anomaly_type="global", anomaly_ratio=0.
def injected_cols(self) -> List[str]:
return self.__injected_cols

def add_impact_sign(self) -> int:
if self.anomaly_sign == "positive":
return 1
if self.anomaly_sign == "negative":
return -1
raise ValueError(f"Invalid anomaly sign provided: {self.anomaly_sign}")

def inject_anomalies(
self, target_df: pd.DataFrame, cols: Sequence[str] = None, **kwargs
) -> pd.DataFrame:
Expand All @@ -73,27 +98,38 @@ def _inject_global_anomalies(
self, target_df: pd.DataFrame, cols: Sequence[str] = None, impact=3
) -> pd.DataFrame:
target_df = self._init_target_df(target_df, cols)
anomaly_df = pd.DataFrame(index=target_df.index)
anomaly_df["is_anomaly"] = 0

for col in self.__injected_cols:
tseries = target_df[col]
sample = tseries[: -self.block_size].sample(1)
idx_start = sample.index
idx_end = idx_start + (self.block_size * self.freq)
outlier_block = tseries[idx_start.values[0] : idx_end.values[0]]

factor = abs(self.ref_stats_df.loc["max", col] - outlier_block.mean())
outlier_block += impact * factor * abs(outlier_block)

# Add gaussian noise to the data
noise = np.random.normal(self.mu, self.sigma, outlier_block.shape)
outlier_block += noise + impact * factor * abs(outlier_block) * self.add_impact_sign()

# Add labels to the data
anomaly_col = anomaly_df["is_anomaly"]
anomaly_block = anomaly_col[idx_start.values[0] : idx_end.values[0]]
anomaly_block += self.add_impact_sign()

return pd.DataFrame(
self.scaler.inverse_transform(target_df.to_numpy()),
index=target_df.index,
columns=target_df.columns,
)
).merge(anomaly_df, left_index=True, right_index=True)

def _inject_contextual_anomalies(
self, target_df: pd.DataFrame, cols: Sequence[str], impact=1
) -> pd.DataFrame:
target_df = self._init_target_df(target_df, cols)
anomaly_df = pd.DataFrame(index=target_df.index)
anomaly_df["is_anomaly"] = 0

for col in self.__injected_cols:
tseries = target_df[col]
Expand All @@ -102,29 +138,43 @@ def _inject_contextual_anomalies(
idx_end = idx_start + (self.block_size * self.freq)
outlier_block = tseries[idx_start.values[0] : idx_end.values[0]]

# Add gaussian noise to the data
noise = np.random.normal(self.mu, self.sigma, outlier_block.shape)

dist_from_min = np.linalg.norm(
outlier_block.to_numpy() - self.ref_stats_df.loc["min", col]
)
dist_from_max = np.linalg.norm(
outlier_block.to_numpy() - self.ref_stats_df.loc["max", col]
)

if dist_from_min > dist_from_max:
factor = abs(self.ref_stats_df.loc["min", col] - outlier_block.mean())
outlier_block -= impact * factor * abs(outlier_block)
outlier_block -= (
noise + impact * factor * abs(outlier_block) * self.add_impact_sign()
)
else:
factor = abs(outlier_block.mean() - self.ref_stats_df.loc["max", col])
outlier_block += impact * factor * abs(outlier_block)
outlier_block += (
noise + impact * factor * abs(outlier_block) * self.add_impact_sign()
)

anomaly_col = anomaly_df["is_anomaly"]
anomaly_block = anomaly_col[idx_start.values[0] : idx_end.values[0]]
anomaly_block += self.add_impact_sign()

return pd.DataFrame(
self.scaler.inverse_transform(target_df),
self.scaler.inverse_transform(target_df.to_numpy()),
index=target_df.index,
columns=target_df.columns,
)
).merge(anomaly_df, left_index=True, right_index=True)

def _inject_collective_anomalies(
self, target_df: pd.DataFrame, cols: Sequence[str], impact=0.8
) -> pd.DataFrame:
target_df = self._init_target_df(target_df, cols)
anomaly_df = pd.DataFrame(index=target_df.index)
anomaly_df["is_anomaly"] = 0

sample = target_df[: -self.block_size].sample(1)
idx_start = sample.index
Expand All @@ -134,6 +184,9 @@ def _inject_collective_anomalies(
tseries = target_df[col]
outlier_block = tseries[idx_start.values[0] : idx_end.values[0]]

# Add gaussian noise to the data
noise = np.random.normal(self.mu, self.sigma, outlier_block.shape)

dist_from_min = np.linalg.norm(
outlier_block.to_numpy() - self.ref_stats_df.loc["min", col]
)
Expand All @@ -142,21 +195,30 @@ def _inject_collective_anomalies(
)
if dist_from_min > dist_from_max:
factor = abs(self.ref_stats_df.loc["min", col] - outlier_block.mean())
outlier_block -= impact * factor * abs(outlier_block)
outlier_block -= (
noise + impact * factor * abs(outlier_block) * self.add_impact_sign()
)
else:
factor = abs(outlier_block.mean() - self.ref_stats_df.loc["max", col])
outlier_block += impact * factor * abs(outlier_block)
outlier_block += (
noise + impact * factor * abs(outlier_block) * self.add_impact_sign()
)
anomaly_col = anomaly_df["is_anomaly"]
anomaly_block = anomaly_col[idx_start.values[0] : idx_end.values[0]]
anomaly_block += self.add_impact_sign()

return pd.DataFrame(
self.scaler.inverse_transform(target_df),
self.scaler.inverse_transform(target_df.to_numpy()),
index=target_df.index,
columns=target_df.columns,
)
).merge(anomaly_df, left_index=True, right_index=True)

def _inject_causal_anomalies(
self, target_df: pd.DataFrame, cols: Sequence[str], impact=2, gap_range=(5, 20)
) -> pd.DataFrame:
target_df = self._init_target_df(target_df, cols)
anomaly_df = pd.DataFrame(index=target_df.index)
anomaly_df["is_anomaly"] = 0

sample = target_df[: -len(self.__injected_cols) * self.block_size].sample(1)
idx_start = sample.index
Expand All @@ -165,22 +227,31 @@ def _inject_causal_anomalies(
tseries = target_df[col]
idx_end = idx_start + (self.block_size * self.freq)
outlier_block = tseries[idx_start.values[0] : idx_end.values[0]]
# Add gaussian noise to the data
noise = np.random.normal(self.mu, self.sigma, outlier_block.shape)

if np.random.binomial(1, 0.5):
factor = abs(self.ref_stats_df.loc["min", col] - outlier_block.mean())
outlier_block -= impact * factor * abs(outlier_block)
outlier_block -= (
noise + impact * factor * abs(outlier_block) * self.add_impact_sign()
)
else:
factor = abs(outlier_block.mean() - self.ref_stats_df.loc["max", col])
outlier_block += impact * factor * abs(outlier_block)
outlier_block += (
noise + impact * factor * abs(outlier_block) * self.add_impact_sign()
)

anomaly_col = anomaly_df["is_anomaly"]
anomaly_block = anomaly_col[idx_start.values[0] : idx_end.values[0]]
anomaly_block += self.add_impact_sign()
gap = np.random.randint(*gap_range)
idx_start = idx_end + (gap * self.freq)

return pd.DataFrame(
self.scaler.inverse_transform(target_df),
self.scaler.inverse_transform(target_df.to_numpy()),
index=target_df.index,
columns=target_df.columns,
)
).merge(anomaly_df, left_index=True, right_index=True)

def _init_target_df(self, target_df: pd.DataFrame, cols: Sequence[str]) -> pd.DataFrame:
target_df = target_df.copy()
Expand Down
Loading

0 comments on commit cb5509a

Please sign in to comment.