Skip to content

Commit

Permalink
fix: percentile scaler, exp mov avg, sigmoid norm (#369)
Browse files Browse the repository at this point in the history
Signed-off-by: Avik Basu <[email protected]>
  • Loading branch information
ab93 committed Apr 21, 2024
1 parent f7bd30b commit b36356c
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 42 deletions.
6 changes: 4 additions & 2 deletions numalogic/config/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class PreprocessFactory(_ObjectFactory):
DifferenceTransform,
FlattenVector,
PercentileScaler,
ExpMovingAverage
)

_CLS_MAP: ClassVar[dict] = {
Expand All @@ -67,6 +68,7 @@ class PreprocessFactory(_ObjectFactory):
"DifferenceTransform": DifferenceTransform,
"FlattenVector": FlattenVector,
"PercentileScaler": PercentileScaler,
"ExpMovingAverage": ExpMovingAverage
}

def get_pipeline_instance(self, objs_info: list[ModelInfo]):
Expand All @@ -84,9 +86,9 @@ def get_pipeline_instance(self, objs_info: list[ModelInfo]):
class PostprocessFactory(_ObjectFactory):
"""Factory class to create postprocess instances."""

from numalogic.transforms import TanhNorm, ExpMovingAverage
from numalogic.transforms import TanhNorm, ExpMovingAverage, SigmoidNorm

_CLS_MAP: ClassVar[dict] = {"TanhNorm": TanhNorm, "ExpMovingAverage": ExpMovingAverage}
_CLS_MAP: ClassVar[dict] = {"TanhNorm": TanhNorm, "ExpMovingAverage": ExpMovingAverage, "SigmoidNorm": SigmoidNorm}

Check failure on line 91 in numalogic/config/factory.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

numalogic/config/factory.py:91:101: E501 Line too long (119 > 100 characters)


class ThresholdFactory(_ObjectFactory):
Expand Down
3 changes: 2 additions & 1 deletion numalogic/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
FlattenVector,
)
from numalogic.transforms._movavg import ExpMovingAverage, expmov_avg_aggregator
from numalogic.transforms._postprocess import TanhNorm, tanh_norm
from numalogic.transforms._postprocess import TanhNorm, tanh_norm, SigmoidNorm

__all__ = [
"TanhScaler",
Expand All @@ -39,4 +39,5 @@
"DifferenceTransform",
"FlattenVector",
"PercentileScaler",
"SigmoidNorm"
]
44 changes: 7 additions & 37 deletions numalogic/transforms/_movavg.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# limitations under the License.

import numpy as np
import pandas as pd

from numalogic.base import StatelessTransformer
from numalogic.tools.exceptions import InvalidDataShapeError
Expand Down Expand Up @@ -86,24 +87,20 @@ class ExpMovingAverage(StatelessTransformer):
Args:
----
beta: how much weight to give to the previous weighted average
bias_correction: flag to perform bias correction (default: true)
Note: this only supports single feature input array.
Raises
------
ValueError: if beta is not between 0 and 1
"""

__slots__ = ("beta", "bias_correction")
__slots__ = ("alpha",)

def __init__(self, beta: float, bias_correction: bool = True):
def __init__(self, beta: float = 0.5):
if beta <= 0.0 or beta >= 1.0:
raise ValueError("beta only accepts values between 0 and 1 (not inclusive)")
self.beta = beta
self.bias_correction = bias_correction
self.alpha = 1.0 - beta

def transform(self, input_: npt.NDArray[float], **__):
def transform(self, input_: npt.NDArray[float], **__) -> npt.NDArray[float]:
r"""Returns transformed output.
Args:
Expand All @@ -114,32 +111,5 @@ def transform(self, input_: npt.NDArray[float], **__):
------
InvalidDataShapeError: if input array is not single featured
"""
_allow_only_single_feature(input_)

# alpha is the weight given to the latest element
alpha = 1.0 - self.beta
n = len(input_)

theta = input_.reshape(-1, 1)
theta_tril = np.multiply(theta.T, np.tril(np.ones((n, n))))
powers = np.arange(1, n + 1).reshape(-1, 1)

# Calculate increasing powers of beta of the form,
# [beta, beta**2, .., beta**n]
beta_powers = np.power(self.beta, powers)

# Calculate the array of reciprocals of beta powers of form,
# [beta**(-1), beta**(-2), .., beta**(-n)]
beta_arr_inv = np.reciprocal(beta_powers)

# Calculate the summation of the ratio between (theta(i) / beta**i),
# [ theta(1)/beta, sum(theta(1)/beta, theta(2)/beta**2), .., ]
theta_beta_ratio = theta_tril @ beta_arr_inv

# Elemental multiply with beta powers
exp_avg = alpha * np.multiply(beta_powers, theta_beta_ratio)
if not self.bias_correction:
return exp_avg

# Calculate array of 1 / (1 - beta**i) values
return np.divide(exp_avg, 1.0 - beta_powers)
x_df = pd.DataFrame(input_)
return x_df.ewm(alpha=self.alpha).mean().to_numpy(dtype=np.float32)
10 changes: 10 additions & 0 deletions numalogic/transforms/_postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,13 @@ def __init__(self, scale_factor=10, smooth_factor=10):

def transform(self, input_: npt.NDArray[float], **__) -> npt.NDArray[float]:
return tanh_norm(input_, scale_factor=self.scale_factor, smooth_factor=self.smooth_factor)


class SigmoidNorm(StatelessTransformer):
def __init__(self, scale_factor: float = 10., smooth_factor: float = 0.5):
super().__init__()
self.scale_factor = scale_factor
self.smooth_factor = smooth_factor

def transform(self, x: npt.NDArray[float], **__) -> npt.NDArray[float]:
return self.scale_factor / (1.0 + np.exp(5 - (self.smooth_factor * x)))

Check failure on line 60 in numalogic/transforms/_postprocess.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (W292)

numalogic/transforms/_postprocess.py:60:80: W292 No newline at end of file
32 changes: 30 additions & 2 deletions numalogic/transforms/_scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,24 +82,52 @@ class PercentileScaler(BaseTransformer):
-----
max_percentile: float, optional
The upper percentile to clip the data.
Default is 90.
Default is 99.
min_percentile: float, optional
The lower percentile to clip the data.
If None, minimum value of the data is used.
Default is None.
"""

def __init__(self, max_percentile: float = 95, min_percentile: Optional[float] = None):
def __init__(self, max_percentile: float = 99, min_percentile: Optional[float] = None, eps: float = 1e-2):

Check failure on line 92 in numalogic/transforms/_scaler.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

numalogic/transforms/_scaler.py:92:101: E501 Line too long (110 > 100 characters)
self._max_px = max_percentile
self._min_px = min_percentile
self.tx = MinMaxScaler()

self._data_pth_max = None
self._data_pth_min = None
self._eps = eps

@property
def data_pth_max(self) -> float:
return self._data_pth_max

@property
def data_pth_min(self) -> float:
return self._data_pth_min

def fit(self, x: npt.NDArray[float]) -> Self:
data_max_px = np.percentile(x, self._max_px, axis=0)
data_max = np.max(x, axis=0)

if self._min_px is None:
data_min_px = np.min(x, axis=0)
else:
data_min_px = np.percentile(x, self._min_px, axis=0)

p_ranges = data_max_px - data_min_px

for idx, _range in enumerate(p_ranges):
if _range <= self._eps:
LOGGER.warning(
"Max and Min percentile difference is less than "
"epsilon: %s for column %s", self._eps, idx
)
data_max_px[idx] = data_max[idx]

self._data_pth_max = data_max_px
self._data_pth_min = data_min_px

x_clipped = DataClipper(lower=data_min_px, upper=data_max_px).transform(x)
return self.tx.fit(x_clipped)

Expand Down

0 comments on commit b36356c

Please sign in to comment.