Skip to content

Commit

Permalink
feat: Sigmoid threshold (#141)
Browse files Browse the repository at this point in the history
* feat: Sigmoid threshold
* chore: support passing run id while saving in mlflow

Signed-off-by: Avik Basu <[email protected]>
  • Loading branch information
ab93 committed Mar 9, 2023
1 parent 9659cbd commit ea01b44
Show file tree
Hide file tree
Showing 10 changed files with 744 additions and 536 deletions.
2 changes: 1 addition & 1 deletion docs/quick-start.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pip install numalogic

## Numalogic as a Library

Numalogic can be used as an independent library, and it provides various ML models and tools. Here, we are using a `AutoencoderPipeline`. Refer to [training section](autoencoders.md) for other available options.
Numalogic can be used as an independent library, and it provides various ML models and tools. Here, we are using the `AutoencoderTrainer`. Refer to [training section](autoencoders.md) for other available options.

In this example, the train data set has numbers ranging from 1-10. Whereas in the test data set, there are data points that go out of this range, which the algorithm should be able to detect as anomalies.

Expand Down
37 changes: 14 additions & 23 deletions docs/threshold.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,22 @@ It is a simple Estimator that extends BaseEstimator.
Currently, the library supports `StdDevThreshold`. This takes in paramaters `min_thresh` and `std_factor`. This model
defines threshold as `mean + 3 * std_factor`.


Fitting the threshold model
```python
# preprocess step
clf = LogTransformer()
train_data = clf.fit_transform(X_train)
test_data = clf.transform(X_test)
import numpy as np
from numalogic.models.threshold import StdDevThreshold

# Fitting the Threshold model
thresh_clf = StdDevThreshold(std_factor=1.2)
```
# Generate positive random data
x_train = np.abs(np.random.randn(1000, 3))
x_test = np.abs(np.random.randn(30, 3))

Train the model
```python
# Train the Autoencoder model and fit the model on train data
ae_pl = AutoencoderPipeline(
model=Conv1dAE(in_channels=1, enc_channels=4), seq_len=8, num_epochs=30
)
ae_pl.fit(X_train)
# Here we want a threshold such that anything
# outside 5 deviations from the mean will be anomalous.
thresh_clf = StdDevThreshold(std_factor=5)
thresh_clf.fit(x_train)

# predict method returns the reconstruction error
anomaly_score = ae_pl.predict(X_test)
```
Predicting score using the threshold model
```python
# Predict final anomaly score using threshold estimator
anomaly_score = thresh_clf.predict(anomaly_score)
# Let's get the predictions
y_pred = thresh_clf.predict(x_test)

# Anomaly scores can be given by, score_samples method
y_score = thresh_clf.score_samples(x_test)
```
4 changes: 2 additions & 2 deletions numalogic/models/threshold/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from numalogic.models.threshold._std import StdDevThreshold
from numalogic.models.threshold._static import StaticThreshold
from numalogic.models.threshold._static import StaticThreshold, SigmoidThreshold

__all__ = ["StdDevThreshold", "StaticThreshold"]
__all__ = ["StdDevThreshold", "StaticThreshold", "SigmoidThreshold"]
71 changes: 59 additions & 12 deletions numalogic/models/threshold/_static.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import numpy as np
import numpy.typing as npt
from sklearn.base import BaseEstimator
from typing_extensions import Self
Expand Down Expand Up @@ -46,22 +45,70 @@ def fit(self, _: npt.NDArray[float]) -> Self:
"""Does not do anything. Only for API compatibility"""
return self

def predict(self, x_test: npt.NDArray[float]) -> npt.NDArray[int]:
def predict(self, x: npt.NDArray[float]) -> npt.NDArray[int]:
"""
Returns an integer array of same shape as input.
1 denotes anomaly.
"""
y = x.copy()
y[x < self.upper_limit] = 0
y[x >= self.upper_limit] = 1
return y

def score_samples(self, x: npt.NDArray[float]) -> npt.NDArray[float]:
"""
Returns an array of same shape as input
with values being anomaly scores.
"""
x = x.copy()
x[x < self.upper_limit] = self.inlier_score
x[x >= self.upper_limit] = self.outlier_score
return x


class SigmoidThreshold(BaseEstimator):
r"""
Smooth and stateless static thesholding using sigmoid function as an estimator.
The values produced
Score is given by:
score = score_limit * 1/ exp(-coeff * (x - upper_limit))
Args:
upper_limit: is the desired threshold limit of x
slope_factor: determines the slope of the curve
score_limit: is the scaler multiplier for the score
e.g. a value of 10 means that the output score
will be between 0 and 10.
"""
__slots__ = ("upper_limit", "coeff", "score_limit")

def __init__(self, upper_limit: float, slope_factor: int = 5, score_limit: int = 10):
self.upper_limit = float(upper_limit)
self.coeff = slope_factor * np.pi
self.score_limit = score_limit

def fit(self, _: npt.NDArray[float]) -> Self:
"""Does not do anything. Only for API compatibility"""
return self

def predict(self, x: npt.NDArray[float]) -> npt.NDArray[int]:
"""
Returns an integer array of same shape as input.
1 denotes anomaly.
This is calculated as a hard threshold at upper limit.
"""
y_test = x_test.copy()
y_test[x_test < self.upper_limit] = 0
y_test[x_test >= self.upper_limit] = 1
return y_test
y = x.copy()
y[x < self.upper_limit] = 0
y[x >= self.upper_limit] = 1
return y

def score_samples(self, x_test: npt.NDArray[float]) -> npt.NDArray[float]:
def score_samples(self, x: npt.NDArray[float]) -> npt.NDArray[float]:
"""
Returns an array of same shape as input
with values being anomaly scores.
"""
x_test = x_test.copy()
x_test[x_test < self.upper_limit] = self.inlier_score
x_test[x_test >= self.upper_limit] = self.outlier_score
return x_test
x = x.copy()
y = 10 / (1 + np.exp(-self.coeff * (x - self.upper_limit)))
return y
2 changes: 1 addition & 1 deletion numalogic/models/threshold/_std.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class StdDevThreshold(BaseEstimator):
min_threshold: clip the threshold value to be above this value
"""

def __init__(self, std_factor: float = 3.0, min_threshold: float = 0.1):
def __init__(self, std_factor: float = 3.0, min_threshold: float = 0.0):
self.std_factor = std_factor
self.min_threshold = min_threshold

Expand Down
4 changes: 3 additions & 1 deletion numalogic/registry/mlflow_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def save(
skeys: Sequence[str],
dkeys: Sequence[str],
artifact: Artifact,
run_id: str = None,
**metadata: str,
) -> Optional[ModelVersion]:
"""
Expand All @@ -171,14 +172,15 @@ def save(
skeys: static key fields as list/tuple of strings
dkeys: dynamic key fields as list/tuple of strings
artifact: primary artifact to be saved
run_id: mlflow run id
metadata: additional metadata surrounding the artifact that needs to be saved
Returns:
mlflow ModelVersion instance
"""
model_key = self.construct_key(skeys, dkeys)
try:
mlflow.start_run()
mlflow.start_run(run_id=run_id)
self.handler.log_model(artifact, "model", registered_model_name=model_key)
if metadata:
mlflow.log_params(metadata)
Expand Down
Loading

0 comments on commit ea01b44

Please sign in to comment.