Skip to content

Commit

Permalink
feat: generate static scores for backtesting (#355)
Browse files Browse the repository at this point in the history
Signed-off-by: Avik Basu <[email protected]>
  • Loading branch information
ab93 committed Mar 26, 2024
1 parent e351206 commit 377ada2
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 3 deletions.
44 changes: 44 additions & 0 deletions numalogic/backtest/_prom.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,50 @@ def generate_scores(
unified_out=unified_scores,
)

def generate_static_scores(self, df: pd.DataFrame) -> pd.DataFrame:
if not self.nlconf.score.adjust:
raise ValueError("No adjust params provided in the config!")

metrics = list(self.nlconf.score.adjust.upper_limits)
x_test = df[metrics].to_numpy(dtype=np.float32)

postproc_udf = UDFFactory.get_udf_cls("postprocess")
ds = StreamingDataset(x_test, seq_len=self.conf.window_size)

feature_scores = np.zeros((len(ds), len(metrics)), dtype=np.float32)
unified_scores = np.zeros((len(ds), 1), dtype=np.float32)

for idx, arr in enumerate(ds):
feature_scores[idx] = postproc_udf.compute_static_threshold(
arr, score_conf=self.nlconf.score
)
unified_scores[idx] = postproc_udf.compute_unified_score(
feature_scores[idx], feat_agg_conf=self.nlconf.score.adjust.feature_agg
)
feature_scores = np.vstack(
[
np.full((self.conf.window_size - 1, len(metrics)), fill_value=np.nan),
feature_scores,
]
)
unified_scores = np.vstack(
[np.full((self.conf.window_size - 1, 1), fill_value=np.nan), unified_scores]
)
dfs = {
"input": df,
"static_features": pd.DataFrame(
feature_scores,
columns=metrics,
index=df.index,
),
"static_unified": pd.DataFrame(
unified_scores,
columns=["unified"],
index=df.index,
),
}
return pd.concat(dfs, axis=1)

@classmethod
def get_outdir(cls, expname: str, outdir=DEFAULT_OUTPUT_DIR) -> str:
"""Get the output directory for the given metric."""
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "numalogic"
version = "0.8.dev0"
version = "0.8.1dev0"
description = "Collection of operational Machine Learning models and tools."
authors = ["Numalogic Developers"]
packages = [{ include = "numalogic" }]
Expand Down
25 changes: 23 additions & 2 deletions tests/test_backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@

from numalogic._constants import TESTS_DIR
from numalogic.backtest import PromBacktester
from numalogic.config import NumalogicConf, ModelInfo, TrainerConf, LightningTrainerConf
from numalogic.config import (
NumalogicConf,
ModelInfo,
TrainerConf,
LightningTrainerConf,
ScoreConf,
ScoreAdjustConf,
)
from numalogic.models.vae import Conv1dVAE

URL = "http:https://localhost:9090"
Expand All @@ -16,6 +23,14 @@
model=ModelInfo(name="Conv1dVAE", conf=dict(seq_len=12, n_features=3, latent_dim=1)),
threshold=ModelInfo(name="MaxPercentileThreshold"),
trainer=TrainerConf(pltrainer_conf=LightningTrainerConf(accelerator="cpu", max_epochs=1)),
score=ScoreConf(
adjust=ScoreAdjustConf(
upper_limits={
"namespace_app_rollouts_cpu_utilization": 80,
"namespace_app_rollouts_memory_utilization": 80,
}
)
),
)
)

Expand All @@ -24,7 +39,7 @@
def backtester(tmp_path):
return PromBacktester(
url=URL,
query="namespace_app_rollouts_http_request_error_rate{namespace='sandbox-numalogic-demo'}",
query="{namespace='sandbox-numalogic-demo'}",
metrics=[
"namespace_app_rollouts_cpu_utilization",
"namespace_app_rollouts_http_request_error_rate",
Expand Down Expand Up @@ -52,3 +67,9 @@ def test_scores(backtester, read_data):
out_df = backtester.generate_scores(read_data)
assert isinstance(out_df, pd.DataFrame)
assert out_df.shape[0] == int(backtester.test_ratio * read_data.shape[0])


def test_static_scores(backtester, read_data):
out_df = backtester.generate_static_scores(read_data)
assert isinstance(out_df, pd.DataFrame)
assert out_df.shape[0] == read_data.shape[0]

0 comments on commit 377ada2

Please sign in to comment.