Skip to content

Commit

Permalink
Fix PowerTransformer leaves constant feature unchanged (scikit-learn#…
Browse files Browse the repository at this point in the history
…26566)

Co-authored-by: Olivier Grisel <[email protected]>
  • Loading branch information
jeremiedbb and ogrisel committed Jun 14, 2023
1 parent 83cb686 commit e5df5fe
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 10 deletions.
5 changes: 5 additions & 0 deletions doc/whats_new/v1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,11 @@ Changelog
using `method="box-cox"` on data with a constant `np.nan` column.
:pr:`26400` by :user:`Yao Xiao <Charlie-XIAO>`.

- |Fix| :class:`preprocessing.PowerTransformer` with `method="yeo-johnson"` now leaves
constant features unchanged instead of transforming with an arbitrary value for
the `lambdas_` fitted parameter.
:pr:`26566` by :user:`Jérémie du Boisberranger <jeremiedbb>`.

:mod:`sklearn.svm`
..................

Expand Down
33 changes: 23 additions & 10 deletions sklearn/preprocessing/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3150,21 +3150,34 @@ def _fit(self, X, y=None, force_transform=False):
if not self.copy and not force_transform: # if call from fit()
X = X.copy() # force copy so that fit does not change X inplace

n_samples = X.shape[0]
mean = np.mean(X, axis=0, dtype=np.float64)
var = np.var(X, axis=0, dtype=np.float64)

optim_function = {
"box-cox": self._box_cox_optimize,
"yeo-johnson": self._yeo_johnson_optimize,
}[self.method]

transform_function = {
"box-cox": boxcox,
"yeo-johnson": self._yeo_johnson_transform,
}[self.method]

with np.errstate(invalid="ignore"): # hide NaN warnings
self.lambdas_ = np.array([optim_function(col) for col in X.T])

if self.standardize or force_transform:
transform_function = {
"box-cox": boxcox,
"yeo-johnson": self._yeo_johnson_transform,
}[self.method]
for i, lmbda in enumerate(self.lambdas_):
with np.errstate(invalid="ignore"): # hide NaN warnings
X[:, i] = transform_function(X[:, i], lmbda)
self.lambdas_ = np.empty(X.shape[1], dtype=X.dtype)
for i, col in enumerate(X.T):
# For yeo-johnson, leave constant features unchanged
# lambda=1 corresponds to the identity transformation
is_constant_feature = _is_constant_feature(var[i], mean[i], n_samples)
if self.method == "yeo-johnson" and is_constant_feature:
self.lambdas_[i] = 1.0
continue

self.lambdas_[i] = optim_function(col)

if self.standardize or force_transform:
X[:, i] = transform_function(X[:, i], self.lambdas_[i])

if self.standardize:
self._scaler = StandardScaler(copy=False)
Expand Down
19 changes: 19 additions & 0 deletions sklearn/preprocessing/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2669,3 +2669,22 @@ def test_kernel_centerer_feature_names_out():
names_out = centerer.get_feature_names_out()
samples_out2 = X_pairwise.shape[1]
assert_array_equal(names_out, [f"kernelcenterer{i}" for i in range(samples_out2)])


@pytest.mark.parametrize("standardize", [True, False])
def test_power_transformer_constant_feature(standardize):
"""Check that PowerTransfomer leaves constant features unchanged."""
X = [[-2, 0, 2], [-2, 0, 2], [-2, 0, 2]]

pt = PowerTransformer(method="yeo-johnson", standardize=standardize).fit(X)

assert_allclose(pt.lambdas_, [1, 1, 1])

Xft = pt.fit_transform(X)
Xt = pt.transform(X)

for Xt_ in [Xft, Xt]:
if standardize:
assert_allclose(Xt_, np.zeros_like(X))
else:
assert_allclose(Xt_, X)

0 comments on commit e5df5fe

Please sign in to comment.