Prerelease 0.4 (#173)

* docs: updated CHANGELOG.md (#158) * chore!: use torch and lightning 2.0 (#159) * feat: support weight decay in optimizers (#161) * feat: first benchmarking using KPI anomaly data (#163) * chore!: drop support for python 3.8 (#164) * chore: introduce ruff as a linter (#167) * feat: local memory artifact cache (#165) * feat: in-memory cache * feat: redis registry (#170) * chore: allow actions for release branches (#174) * fix: accelerator --------- Signed-off-by: GitHub <[email protected]> Signed-off-by: Avik Basu <[email protected]> Signed-off-by: s0nicboOm <[email protected]> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: ab93 <[email protected]> Co-authored-by: Kushal Batra <[email protected]>
numaproj · May 8, 2023 · b664e49 · b664e49
1 parent 85fb527
commit b664e49
Show file tree

Hide file tree

Showing 105 changed files with 2,522 additions and 1,084 deletions.
diff --git a/.codecov.yml b/.codecov.yml
@@ -3,8 +3,8 @@ coverage:
  project:
  default:
  target: auto
- threshold: 5%
+ threshold: 3%
  patch:
  default:
  target: auto
- threshold: 10%
+ threshold: 20%
diff --git a/.flake8 b/.flake8
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -2,9 +2,9 @@ name: Build
 
 on:
  push:
- branches: [ "main" ]
+ branches: [ "main", "release/*" ]
  pull_request:
- branches: [ "main" ]
+ branches: [ "main", "release/*" ]
 
 jobs:
  build:
@@ -13,13 +13,13 @@ jobs:
  strategy:
  fail-fast: false
  matrix:
- python-version: ["3.8", "3.9", "3.10"]
+ python-version: ["3.9", "3.10"]
 
  steps:
  - uses: actions/checkout@v3
 
  - name: Install poetry
- run: pipx install poetry
+ run: pipx install poetry==1.4.2
 
  - name: Set up Python ${{ matrix.python-version }}
  uses: actions/setup-python@v4

diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
@@ -19,7 +19,7 @@ jobs:
  - uses: actions/checkout@v3
 
  - name: Install poetry
- run: pipx install poetry
+ run: pipx install poetry==1.4.2
 
  - name: Set up Python ${{ matrix.python-version }}
  uses: actions/setup-python@v4

diff --git a/.github/workflows/gh-pages.yaml b/.github/workflows/gh-pages.yaml
@@ -4,9 +4,11 @@ on:
  push:
  branches:
  - main
+ - "release/*"
  pull_request:
  branches:
  - main
+ - "release/*"
 
 jobs:
  docs:

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -2,57 +2,23 @@ name: Lint
 
 on:
  push:
- branches: [ "main" ]
+ branches: [ "main", "release/*" ]
  pull_request:
- branches: [ "main" ]
+ branches: [ "main", "release/*" ]
 
 jobs:
  black:
  name: Black format
  runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- python-version: ["3.9"]
-
  steps:
- - uses: actions/checkout@v3
-
- - name: Install poetry
- run: pipx install poetry
-
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- cache: 'poetry'
-
- - name: Install dependencies
- run: |
- poetry env use ${{ matrix.python-version }}
- poetry install --with dev
+ - uses: actions/checkout@v3
+ - uses: psf/black@stable
+ with:
+ options: "--check --verbose"
+ version: "~= 23.3"
 
- - name: Black format check
- run: poetry run black --check .
-
- flake8:
- name: flake8 check
+ ruff:
  runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- python-version: [ "3.9" ]
-
  steps:
- - uses: actions/checkout@v3
-
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
-
- - name: flake8 Lint
- uses: py-actions/flake8@v2
- with:
- config: "./.flake8"
- path: "numalogic"
+ - uses: actions/checkout@v3
+ - uses: chartboost/ruff-action@v1
diff --git a/.gitignore b/.gitignore
@@ -166,4 +166,4 @@ cython_debug/
 # Mac related
 *.DS_Store
 
-.python-version
+.python-version
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,31 @@
+default_language_version:
+ python: python3.9
+repos:
+- repo: https://github.com/psf/black
+ rev: 23.3.0
+ hooks:
+ - id: black
+ language_version: python3.9
+ args: [--config=pyproject.toml, --diff, --color ]
+- repo: https://github.com/charliermarsh/ruff-pre-commit
+ # Ruff version.
+ rev: 'v0.0.264'
+ hooks:
+ - id: ruff
+ args: [ --fix, --exit-non-zero-on-fix ]
+- repo: https://github.com/adamchainz/blacken-docs
+ rev: "1.13.0"
+ hooks:
+ - id: blacken-docs
+ additional_dependencies:
+ - black==22.12.0
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.4.0
+ hooks:
+ - id: end-of-file-fixer
+ - id: trailing-whitespace
+ - id: check-toml
+ - id: check-added-large-files
+ - id: check-ast
+ - id: check-case-conflict
+ - id: check-docstring-first
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Changelog
 
+## v0.3.8 (2023-04-18)
+
+ * [3160c2b](https://github.com/numaproj/numalogic/commit/3160c2b4a248f974bc6c6e4893e7c68c1fdd7890) feat: exponential moving average postprocessing (#156)
+ * [9de8e4c](https://github.com/numaproj/numalogic/commit/9de8e4cfa7438047b8e7bd22c84bdcf859edc292) fix: validation loss not being logged (#155)
+
+### Contributors
+
+ * Avik Basu
+
 ## v0.3.7 (2023-03-27)
 
  * [b61ac1f](https://github.com/numaproj/numalogic/commit/b61ac1fbd639f482b7ecb661da47254695139299) fix: Tanhscaler nan output for constant feature (#153)
@@ -203,4 +212,3 @@
 ### Contributors
 
  * Kushal Batra
-
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -1,3 +1,3 @@
 # Contributor Covenant Code of Conduct
- 
+
 Please refer to [Code of Conduct](https://github.com/numaproj/numaproj/blob/main/CODE_OF_CONDUCT.md)
diff --git a/Makefile b/Makefile
@@ -16,10 +16,10 @@ clean:
  @find . -type f -name "*.py[co]" -exec rm -rf {} +
 
 format: clean
- poetry run black numalogic/ examples/ tests/
+ poetry run black numalogic/ examples/ tests/ benchmarks/
 
 lint: format
- poetry run flake8 .
+ poetry run ruff check --fix .
 
 # install all dependencies
 setup:

diff --git a/README.md b/README.md
@@ -9,27 +9,27 @@
 
 
 ## Background
-Numalogic is a collection of ML models and algorithms for operation data analytics and AIOps. 
-At Intuit, we use Numalogic at scale for continuous real-time data enrichment including 
-anomaly scoring. We assign an anomaly score (ML inference) to any time-series 
-datum/event/message we receive on our streaming platform (say, Kafka). 95% of our 
-data sets are time-series, and we have a complex flowchart to execute ML inference on 
-our high throughput sources. We run multiple models on the same datum, say a model that is 
-sensitive towards +ve sentiments, another more tuned towards -ve sentiments, and another 
-optimized for neutral sentiments. We also have a couple of ML models trained for the same 
-data source to provide more accurate scores based on the data density in our model store. 
-An ensemble of models is required because some composite keys in the data tend to be less 
-dense than others, e.g., forgot-password interaction is less frequent than a status check 
-interaction. At runtime, for each datum that arrives, models are picked based on a conditional 
-forwarding filter set on the data density. ML engineers need to worry about only their 
+Numalogic is a collection of ML models and algorithms for operation data analytics and AIOps.
+At Intuit, we use Numalogic at scale for continuous real-time data enrichment including
+anomaly scoring. We assign an anomaly score (ML inference) to any time-series
+datum/event/message we receive on our streaming platform (say, Kafka). 95% of our
+data sets are time-series, and we have a complex flowchart to execute ML inference on
+our high throughput sources. We run multiple models on the same datum, say a model that is
+sensitive towards +ve sentiments, another more tuned towards -ve sentiments, and another
+optimized for neutral sentiments. We also have a couple of ML models trained for the same
+data source to provide more accurate scores based on the data density in our model store.
+An ensemble of models is required because some composite keys in the data tend to be less
+dense than others, e.g., forgot-password interaction is less frequent than a status check
+interaction. At runtime, for each datum that arrives, models are picked based on a conditional
+forwarding filter set on the data density. ML engineers need to worry about only their
 inference container; they do not have to worry about data movement and quality assurance.
 
-## Numalogic realtime training 
-For an always-on ML platform, the key requirement is the ability to train or retrain models 
-automatically based on the incoming messages. The composite key built at per message runtime 
-looks for a matching model, and if the model turns out to be stale or missing, an automatic 
-retriggering is applied. The conditional forwarding feature of the platform improves the 
-development velocity of the ML developer when they have to make a decision whether to forward 
+## Numalogic realtime training
+For an always-on ML platform, the key requirement is the ability to train or retrain models
+automatically based on the incoming messages. The composite key built at per message runtime
+looks for a matching model, and if the model turns out to be stale or missing, an automatic
+retriggering is applied. The conditional forwarding feature of the platform improves the
+development velocity of the ML developer when they have to make a decision whether to forward
 the result further or drop it after a trigger request.
 
 
@@ -59,17 +59,12 @@ For set-up information and running your first pipeline using numalogic, please s
 Numalogic requires Python 3.8 or higher.
 
 ### Prerequisites
-Numalogic needs [PyTorch](https://pytorch.org/) and 
-[PyTorch Lightning](https://pytorch-lightning.readthedocs.io/en/stable/) to work. 
-But since these packages are platform dependendent, 
+Numalogic needs [PyTorch](https://pytorch.org/) and
+[PyTorch Lightning](https://pytorch-lightning.readthedocs.io/en/stable/) to work.
+But since these packages are platform dependendent,
 they are not included in the numalogic package itself. Kindly install them first.
 
-Numalogic supports the following pytorch versions:
-- 1.11.x
-- 1.12.x
-- 1.13.x
-
-Other versions do work, it is just that they are not tested.
+Numalogic supports pytorch versions `2.0.0` and above.
 
 numalogic can be installed using pip.
 ```shell
@@ -103,7 +98,7 @@ pip install numalogic[mlflow]
  ```
  make test
  ```
-5. To format code style using black:
+5. To format code style using black and ruff:
  ```
  make lint
  ```

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,10 @@
+## Benchmarks
+
+This section contains some benchmarking results of numalogic's algorithms on real as well
+synthetic data. Datasets here are publicly available from their respective repositories.
+
+Note that efforts have not really been made on hyperparameter tuning. This is just to give users an
+idea on how each algorithm is suitable for different kinds of data, and shows how they can do
+their own benchmarking too.
+
+This is an ongoing process, and we will add more benchmarking results in the near future.
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
diff --git a/benchmarks/kpi/README.md b/benchmarks/kpi/README.md
@@ -0,0 +1,29 @@
+## KPI Anomaly dataset
+
+KPI anomaly dataset consists of KPI (key performace index) time series data from
+many real scenarios of Internet companies with ground truth label.
+The dataset can be found (here)[https://github.com/NetManAIOps/KPI-Anomaly-Detection]
+
+The full dataset contains multiple KPI IDs. Different KPI time series have different structures
+and patterns.
+For our purpose, we are running anomaly detection for some of these KPI indices.
+
+The performance table is shown below, although note that the hyperparameters have not been tuned.
+The hyperparams used are available inside the results directory under each algorithm.
+
+
+| KPI ID | KPI index | Algorithm | ROC-AUC |
+|--------------------------------------|-----------|---------------|---------|
+| 431a8542-c468-3988-a508-3afd06a218da | 14 | VanillaAE | 0.89 |
+| 431a8542-c468-3988-a508-3afd06a218da | 14 | Conv1dAE | 0.88 |
+| 431a8542-c468-3988-a508-3afd06a218da | 14 | LSTMAE | 0.86 |
+| 431a8542-c468-3988-a508-3afd06a218da | 14 | TransformerAE | 0.82 |
+
+
+Full credit to Zeyan Li et al. for constructing large-scale real world benchmark datasets for AIOps.
+
+@misc{2208.03938,
+Author = {Zeyan Li and Nengwen Zhao and Shenglin Zhang and Yongqian Sun and Pengfei Chen and Xidao Wen and Minghua Ma and Dan Pei},
+Title = {Constructing Large-Scale Real-World Benchmark Datasets for AIOps},
+Year = {2022},
+Eprint = {arXiv:2208.03938},
diff --git a/benchmarks/kpi/__init__.py b/benchmarks/kpi/__init__.py