tests: median thresh tests

Signed-off-by: Avik Basu <[email protected]>
numaproj · Jan 27, 2024 · 36db5df · 36db5df
1 parent 777b5e2
commit 36db5df
Show file tree

Hide file tree

Showing 7 changed files with 136 additions and 71 deletions.
diff --git a/numalogic/models/threshold/_median.py b/numalogic/models/threshold/_median.py
@@ -83,5 +83,5 @@ def agg_score_samples(
  y: npt.NDArray[float], weights: Optional[Sequence[float]] = None
  ) -> npt.NDArray[float]:
  if weights:
- return np.average(y, weights=weights, axis=1)
- return np.mean(y, axis=1)
+ return np.average(y, weights=weights, axis=1, keepdims=True)
+ return np.mean(y, axis=1, keepdims=True)
diff --git a/numalogic/transforms/_stateless.py b/numalogic/transforms/_stateless.py
@@ -129,14 +129,6 @@ def transform(self, x: npt.NDArray[float], **__) -> npt.NDArray[float]:
 
 
 class DifferenceTransform(StatelessTransformer):
- __slots__ = ("order",)
-
- def __init__(
- self,
- order: int = 1,
- ):
- self.order = order
-
  def transform(self, input_: npt.NDArray, **__):
  diff_df = pd.DataFrame(input_).diff().bfill()
  return diff_df.to_numpy(dtype=np.float32)
diff --git a/tests/models/threshold/__init__.py b/tests/models/threshold/__init__.py
diff --git a/tests/models/test_threshold.py → tests/models/threshold/test_maha.py b/tests/models/test_threshold.py → tests/models/threshold/test_maha.py
@@ -3,32 +3,11 @@
 import numpy as np
 
 from numalogic.models.threshold import (
- StdDevThreshold,
- StaticThreshold,
- SigmoidThreshold,
  MahalanobisThreshold,
 )
 from numalogic.tools.exceptions import ModelInitializationError, InvalidDataShapeError
 
 
-class TestStdDevThreshold(unittest.TestCase):
- def setUp(self) -> None:
- self.x_train = np.arange(100).reshape(50, 2)
- self.x_test = np.arange(100, 160, 6).reshape(5, 2)
-
- def test_estimator_predict(self):
- clf = StdDevThreshold()
- clf.fit(self.x_train)
- y = clf.predict(self.x_test)
- self.assertAlmostEqual(0.4, np.mean(y), places=1)
-
- def test_estimator_score(self):
- clf = StdDevThreshold()
- clf.fit(self.x_train)
- score = clf.score_samples(self.x_test)
- self.assertAlmostEqual(0.93317, np.mean(score), places=2)
-
-
 class TestMahalanobisThreshold(unittest.TestCase):
  @classmethod
  def setUpClass(cls) -> None:
@@ -92,45 +71,5 @@ def test_score_samples_err(self):
  clf.score_samples(self.x_test)
 
 
-class TestStaticThreshold(unittest.TestCase):
- def setUp(self) -> None:
- self.x = np.arange(20).reshape(10, 2).astype(float)
-
- def test_predict(self):
- clf = StaticThreshold(upper_limit=5)
- clf.fit(self.x)
- y = clf.predict(self.x)
- self.assertTupleEqual(self.x.shape, y.shape)
- self.assertEqual(np.max(y), 1)
- self.assertEqual(np.min(y), 0)
-
- def test_score(self):
- clf = StaticThreshold(upper_limit=5.0)
- y = clf.score_samples(self.x)
- self.assertTupleEqual(self.x.shape, y.shape)
- self.assertEqual(np.max(y), clf.outlier_score)
- self.assertEqual(np.min(y), clf.inlier_score)
-
-
-class TestSigmoidThreshold(unittest.TestCase):
- def setUp(self) -> None:
- self.x = np.arange(20).reshape(10, 2).astype(float)
-
- def test_predict(self):
- clf = SigmoidThreshold(upper_limit=5)
- clf.fit(self.x)
- y = clf.predict(self.x)
- self.assertTupleEqual(self.x.shape, y.shape)
- self.assertEqual(np.max(y), 1)
- self.assertEqual(np.min(y), 0)
-
- def test_score(self):
- clf = SigmoidThreshold(upper_limit=5.0)
- y = clf.score_samples(self.x)
- self.assertTupleEqual(self.x.shape, y.shape)
- self.assertEqual(np.max(y), clf.score_limit)
- self.assertGreater(np.min(y), 0.0)
-
-
 if __name__ == "__main__":
  unittest.main()
diff --git a/tests/models/threshold/test_median.py b/tests/models/threshold/test_median.py
@@ -0,0 +1,58 @@
+import os
+
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+import pytest
+
+from numalogic._constants import TESTS_DIR
+from numalogic.models.threshold import MaxPercentileThreshold
+
+
+@pytest.fixture
+def data() -> tuple[npt.NDArray[float], npt.NDArray[float]]:
+ x = pd.read_csv(
+ os.path.join(TESTS_DIR, "resources", "data", "prom_mv.csv"), index_col="timestamp"
+ ).to_numpy(dtype=np.float32)
+ return x[:-50], x[-50:]
+
+
+@pytest.fixture()
+def fitted(data):
+ clf = MaxPercentileThreshold(max_inlier_percentile=75, min_threshold=1e-3)
+ x_train, _ = data
+ clf.fit(x_train)
+ return clf
+
+
+def test_score_samples(data, fitted):
+ _, x_test = data
+ y_scores = fitted.score_samples(x_test)
+ assert len(fitted.threshold) == 3
+ assert fitted.threshold[1] == 1e-3
+ assert y_scores.shape == (50, 3)
+
+
+def test_predict(data, fitted):
+ _, x_test = data
+ y_pred = fitted.predict(x_test)
+ assert y_pred.shape == (50, 3)
+
+
+def test_agg_score_samples(data):
+ x_train, x_test = data
+ clf_1 = MaxPercentileThreshold(max_inlier_percentile=75, min_threshold=1e-3, aggregate=True)
+ clf_2 = MaxPercentileThreshold(
+ max_inlier_percentile=75,
+ min_threshold=1e-3,
+ aggregate=True,
+ feature_weights=[0.1, 0.7, 0.2],
+ )
+ clf_1.fit(x_train)
+ clf_2.fit(x_train)
+
+ y_scores_1 = clf_1.score_samples(x_test)
+ y_scores_2 = clf_2.score_samples(x_test)
+
+ assert y_scores_1.shape == y_scores_2.shape == (50, 1)
+ assert np.sum(y_scores_1) > np.sum(y_scores_2)
diff --git a/tests/models/threshold/test_static.py b/tests/models/threshold/test_static.py
@@ -0,0 +1,49 @@
+import unittest
+
+import numpy as np
+
+from numalogic.models.threshold import StaticThreshold, SigmoidThreshold
+
+
+class TestStaticThreshold(unittest.TestCase):
+ def setUp(self) -> None:
+ self.x = np.arange(20).reshape(10, 2).astype(float)
+
+ def test_predict(self):
+ clf = StaticThreshold(upper_limit=5)
+ clf.fit(self.x)
+ y = clf.predict(self.x)
+ self.assertTupleEqual(self.x.shape, y.shape)
+ self.assertEqual(np.max(y), 1)
+ self.assertEqual(np.min(y), 0)
+
+ def test_score(self):
+ clf = StaticThreshold(upper_limit=5.0)
+ y = clf.score_samples(self.x)
+ self.assertTupleEqual(self.x.shape, y.shape)
+ self.assertEqual(np.max(y), clf.outlier_score)
+ self.assertEqual(np.min(y), clf.inlier_score)
+
+
+class TestSigmoidThreshold(unittest.TestCase):
+ def setUp(self) -> None:
+ self.x = np.arange(20).reshape(10, 2).astype(float)
+
+ def test_predict(self):
+ clf = SigmoidThreshold(upper_limit=5)
+ clf.fit(self.x)
+ y = clf.predict(self.x)
+ self.assertTupleEqual(self.x.shape, y.shape)
+ self.assertEqual(np.max(y), 1)
+ self.assertEqual(np.min(y), 0)
+
+ def test_score(self):
+ clf = SigmoidThreshold(upper_limit=5.0)
+ y = clf.score_samples(self.x)
+ self.assertTupleEqual(self.x.shape, y.shape)
+ self.assertEqual(np.max(y), clf.score_limit)
+ self.assertGreater(np.min(y), 0.0)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/models/threshold/test_std.py b/tests/models/threshold/test_std.py
@@ -0,0 +1,27 @@
+import unittest
+
+import numpy as np
+
+from numalogic.models.threshold import StdDevThreshold
+
+
+class TestStdDevThreshold(unittest.TestCase):
+ def setUp(self) -> None:
+ self.x_train = np.arange(100).reshape(50, 2)
+ self.x_test = np.arange(100, 160, 6).reshape(5, 2)
+
+ def test_estimator_predict(self):
+ clf = StdDevThreshold()
+ clf.fit(self.x_train)
+ y = clf.predict(self.x_test)
+ self.assertAlmostEqual(0.4, np.mean(y), places=1)
+
+ def test_estimator_score(self):
+ clf = StdDevThreshold()
+ clf.fit(self.x_train)
+ score = clf.score_samples(self.x_test)
+ self.assertAlmostEqual(0.93317, np.mean(score), places=2)
+
+
+if __name__ == "__main__":
+ unittest.main()