Skip to content

Commit

Permalink
ENH Makes OneToOneFeatureMixin and ClassNamePrefixFeaturesOutMixin pu…
Browse files Browse the repository at this point in the history
…blic (scikit-learn#24688)

Co-authored-by: Guillaume Lemaitre <[email protected]>
  • Loading branch information
thomasjpfan and glemaitre committed Oct 18, 2022
1 parent d4306ba commit 1dc23d7
Show file tree
Hide file tree
Showing 29 changed files with 108 additions and 73 deletions.
14 changes: 12 additions & 2 deletions doc/developers/develop.rst
Original file line number Diff line number Diff line change
Expand Up @@ -647,8 +647,18 @@ scikit-learn introduces the `set_output` API for configuring transformers to
output pandas DataFrames. The `set_output` API is automatically defined if the
transformer defines :term:`get_feature_names_out` and subclasses
:class:`base.TransformerMixin`. :term:`get_feature_names_out` is used to get the
column names of pandas output. You can opt-out of the `set_output` API by
setting `auto_wrap_output_keys=None` when defining a custom subclass::
column names of pandas output.

:class:`base.OneToOneFeatureMixin` and
:class:`base.ClassNamePrefixFeaturesOutMixin` are helpful mixins for defining
:term:`get_feature_names_out`. :class:`base.OneToOneFeatureMixin` is useful when
the transformer has a one-to-one correspondence between input features and output
features, such as :class:`~preprocessing.StandardScaler`.
:class:`base.ClassNamePrefixFeaturesOutMixin` is useful when the transformer
needs to generate its own feature names out, such as :class:`~decomposition.PCA`.

You can opt-out of the `set_output` API by setting `auto_wrap_output_keys=None`
when defining a custom subclass::

class MyTransformer(TransformerMixin, BaseEstimator, auto_wrap_output_keys=None):

Expand Down
2 changes: 2 additions & 0 deletions doc/modules/classes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ Base classes
base.DensityMixin
base.RegressorMixin
base.TransformerMixin
base.OneToOneFeatureMixin
base.ClassNamePrefixFeaturesOutMixin
feature_selection.SelectorMixin

Functions
Expand Down
8 changes: 8 additions & 0 deletions doc/whats_new/v1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ Changelog
:pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
where 123456 is the *pull request* number, not the issue number.
:mod:`sklearn.base`
-------------------

- |Enhancement| Introduces :class:`base.ClassNamePrefixFeaturesOutMixin` and
:class:`base.ClassNamePrefixFeaturesOutMixin` mixins that defines
:term:`get_feature_names_out` for common transformer uses cases.
:pr:`24688` by `Thomas Fan`_.

:mod:`sklearn.calibration`
..........................

Expand Down
25 changes: 20 additions & 5 deletions sklearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,10 @@ class TransformerMixin(_SetOutputMixin):
If :term:`get_feature_names_out` is defined, then `BaseEstimator` will
automatically wrap `transform` and `fit_transform` to follow the `set_output`
API. See the :ref:`developer_api_set_output` for details.
:class:`base.OneToOneFeatureMixin` and
:class:`base.ClassNamePrefixFeaturesOutMixin` are helpful mixins for
defining :term:`get_feature_names_out`.
"""

def fit_transform(self, X, y=None, **fit_params):
Expand Down Expand Up @@ -847,11 +851,11 @@ def fit_transform(self, X, y=None, **fit_params):
return self.fit(X, y, **fit_params).transform(X)


class _OneToOneFeatureMixin:
class OneToOneFeatureMixin:
"""Provides `get_feature_names_out` for simple transformers.
Assumes there's a 1-to-1 correspondence between input features
and output features.
This mixin assumes there's a 1-to-1 correspondence between input features
and output features, such as :class:`~preprocessing.StandardScaler`.
"""

def get_feature_names_out(self, input_features=None):
Expand All @@ -877,15 +881,26 @@ def get_feature_names_out(self, input_features=None):
return _check_feature_names_in(self, input_features)


class _ClassNamePrefixFeaturesOutMixin:
class ClassNamePrefixFeaturesOutMixin:
"""Mixin class for transformers that generate their own names by prefixing.
Assumes that `_n_features_out` is defined for the estimator.
This mixin is useful when the transformer needs to generate its own feature
names out, such as :class:`~decomposition.PCA`. For example, if
:class:`~decomposition.PCA` outputs 3 features, then the generated feature
names out are: `["pca0", "pca1", "pca2"]`.
This mixin assumes that a `_n_features_out` attribute is defined when the
transformer is fitted. `_n_features_out` is the number of output features
that the transformer will return in `transform` of `fit_transform`.
"""

def get_feature_names_out(self, input_features=None):
"""Get output feature names for transformation.
The feature names out will prefixed by the lowercased class name. For
example, if the transformer outputs 3 features, then the feature names
out are: `["class_name0", "class_name1", "class_name2"]`.
Parameters
----------
input_features : array-like of str or None, default=None
Expand Down
4 changes: 2 additions & 2 deletions sklearn/cluster/_agglomerative.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from scipy import sparse
from scipy.sparse.csgraph import connected_components

from ..base import BaseEstimator, ClusterMixin, _ClassNamePrefixFeaturesOutMixin
from ..base import BaseEstimator, ClusterMixin, ClassNamePrefixFeaturesOutMixin
from ..metrics.pairwise import paired_distances
from ..metrics.pairwise import _VALID_METRICS
from ..metrics import DistanceMetric
Expand Down Expand Up @@ -1100,7 +1100,7 @@ def fit_predict(self, X, y=None):


class FeatureAgglomeration(
_ClassNamePrefixFeaturesOutMixin, AgglomerativeClustering, AgglomerationTransform
ClassNamePrefixFeaturesOutMixin, AgglomerativeClustering, AgglomerationTransform
):
"""Agglomerate features.
Expand Down
4 changes: 2 additions & 2 deletions sklearn/cluster/_birch.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
TransformerMixin,
ClusterMixin,
BaseEstimator,
_ClassNamePrefixFeaturesOutMixin,
ClassNamePrefixFeaturesOutMixin,
)
from ..utils.extmath import row_norms
from ..utils._param_validation import Interval
Expand Down Expand Up @@ -357,7 +357,7 @@ def radius(self):


class Birch(
_ClassNamePrefixFeaturesOutMixin, ClusterMixin, TransformerMixin, BaseEstimator
ClassNamePrefixFeaturesOutMixin, ClusterMixin, TransformerMixin, BaseEstimator
):
"""Implements the BIRCH clustering algorithm.
Expand Down
4 changes: 2 additions & 2 deletions sklearn/cluster/_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
BaseEstimator,
ClusterMixin,
TransformerMixin,
_ClassNamePrefixFeaturesOutMixin,
ClassNamePrefixFeaturesOutMixin,
)
from ..metrics.pairwise import euclidean_distances
from ..metrics.pairwise import _euclidean_distances
Expand Down Expand Up @@ -813,7 +813,7 @@ def _labels_inertia_threadpool_limit(


class _BaseKMeans(
_ClassNamePrefixFeaturesOutMixin, TransformerMixin, ClusterMixin, BaseEstimator, ABC
ClassNamePrefixFeaturesOutMixin, TransformerMixin, ClusterMixin, BaseEstimator, ABC
):
"""Base class for KMeans and MiniBatchKMeans"""

Expand Down
6 changes: 3 additions & 3 deletions sklearn/cross_decomposition/_pls.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from ..base import BaseEstimator, RegressorMixin, TransformerMixin
from ..base import MultiOutputMixin
from ..base import _ClassNamePrefixFeaturesOutMixin
from ..base import ClassNamePrefixFeaturesOutMixin
from ..utils import check_array, check_consistent_length
from ..utils.fixes import sp_version
from ..utils.fixes import parse_version
Expand Down Expand Up @@ -159,7 +159,7 @@ def _svd_flip_1d(u, v):


class _PLS(
_ClassNamePrefixFeaturesOutMixin,
ClassNamePrefixFeaturesOutMixin,
TransformerMixin,
RegressorMixin,
MultiOutputMixin,
Expand Down Expand Up @@ -901,7 +901,7 @@ def __init__(
)


class PLSSVD(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
class PLSSVD(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
"""Partial Least Square SVD.
This transformer simply performs a SVD on the cross-covariance matrix
Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
import numpy as np
from scipy import linalg

from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ..utils.validation import check_is_fitted
from abc import ABCMeta, abstractmethod


class _BasePCA(
_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator, metaclass=ABCMeta
ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator, metaclass=ABCMeta
):
"""Base class for PCA methods.
Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/_dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from scipy import linalg
from joblib import Parallel, effective_n_jobs

from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ..utils import check_array, check_random_state, gen_even_slices, gen_batches
from ..utils import deprecated
from ..utils._param_validation import Hidden, Interval, StrOptions
Expand Down Expand Up @@ -1152,7 +1152,7 @@ def dict_learning_online(
return dictionary


class _BaseSparseCoding(_ClassNamePrefixFeaturesOutMixin, TransformerMixin):
class _BaseSparseCoding(ClassNamePrefixFeaturesOutMixin, TransformerMixin):
"""Base class from SparseCoder and DictionaryLearning algorithms."""

def __init__(
Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/_factor_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@
from scipy import linalg


from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ..utils import check_random_state
from ..utils._param_validation import Interval, StrOptions
from ..utils.extmath import fast_logdet, randomized_svd, squared_norm
from ..utils.validation import check_is_fitted
from ..exceptions import ConvergenceWarning


class FactorAnalysis(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
class FactorAnalysis(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
"""Factor Analysis (FA).
A simple linear generative model with Gaussian latent variables.
Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/_fastica.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import numpy as np
from scipy import linalg

from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ..exceptions import ConvergenceWarning
from ..utils import check_array, as_float_array, check_random_state
from ..utils.validation import check_is_fitted
Expand Down Expand Up @@ -337,7 +337,7 @@ def my_g(x):
return returned_values


class FastICA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
class FastICA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
"""FastICA: a fast algorithm for Independent Component Analysis.
The implementation is based on [1]_.
Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/_kernel_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
)
from ..utils._param_validation import Interval, StrOptions
from ..exceptions import NotFittedError
from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ..preprocessing import KernelCenterer
from ..metrics.pairwise import pairwise_kernels


class KernelPCA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
class KernelPCA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
"""Kernel Principal component analysis (KPCA) [1]_.
Non-linear dimensionality reduction through the use of kernels (see
Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/_lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from scipy.special import gammaln, logsumexp
from joblib import Parallel, effective_n_jobs

from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ..utils import check_random_state, gen_batches, gen_even_slices
from ..utils.validation import check_non_negative
from ..utils.validation import check_is_fitted
Expand Down Expand Up @@ -154,7 +154,7 @@ def _update_doc_distribution(


class LatentDirichletAllocation(
_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator
ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator
):
"""Latent Dirichlet Allocation with online variational Bayes algorithm.
Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/_nmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from ._cdnmf_fast import _update_cdnmf_fast
from .._config import config_context
from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ..exceptions import ConvergenceWarning
from ..utils import check_random_state, check_array, gen_batches
from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm
Expand Down Expand Up @@ -1130,7 +1130,7 @@ def non_negative_factorization(
return W, H, n_iter


class _BaseNMF(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator, ABC):
class _BaseNMF(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator, ABC):
"""Base class for NMF and MiniBatchNMF."""

_parameter_constraints: dict = {
Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/_sparse_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
from ..utils._param_validation import Hidden, Interval, StrOptions
from ..utils.validation import check_array, check_is_fitted
from ..linear_model import ridge_regression
from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ._dict_learning import dict_learning, MiniBatchDictionaryLearning


class _BaseSparsePCA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
class _BaseSparsePCA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
"""Base class for SparsePCA and MiniBatchSparsePCA"""

_parameter_constraints: dict = {
Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/_truncated_svd.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import scipy.sparse as sp
from scipy.sparse.linalg import svds

from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
from ..utils import check_array, check_random_state
from ..utils._arpack import _init_arpack_v0
from ..utils.extmath import randomized_svd, safe_sparse_dot, svd_flip
Expand All @@ -22,7 +22,7 @@
__all__ = ["TruncatedSVD"]


class TruncatedSVD(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
class TruncatedSVD(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
"""Dimensionality reduction using truncated SVD (aka LSA).
This transformer performs linear dimensionality reduction by means of
Expand Down
4 changes: 2 additions & 2 deletions sklearn/discriminant_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from numbers import Real, Integral

from .base import BaseEstimator, TransformerMixin, ClassifierMixin
from .base import _ClassNamePrefixFeaturesOutMixin
from .base import ClassNamePrefixFeaturesOutMixin
from .linear_model._base import LinearClassifierMixin
from .covariance import ledoit_wolf, empirical_covariance, shrunk_covariance
from .utils.multiclass import unique_labels
Expand Down Expand Up @@ -171,7 +171,7 @@ def _class_cov(X, y, priors, shrinkage=None, covariance_estimator=None):


class LinearDiscriminantAnalysis(
_ClassNamePrefixFeaturesOutMixin,
ClassNamePrefixFeaturesOutMixin,
LinearClassifierMixin,
TransformerMixin,
BaseEstimator,
Expand Down
4 changes: 2 additions & 2 deletions sklearn/feature_extraction/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import numpy as np
import scipy.sparse as sp

from ..base import BaseEstimator, TransformerMixin, _OneToOneFeatureMixin
from ..base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
from ..preprocessing import normalize
from ._hash import FeatureHasher
from ._stop_words import ENGLISH_STOP_WORDS
Expand Down Expand Up @@ -1486,7 +1486,7 @@ def _make_int_array():


class TfidfTransformer(
_OneToOneFeatureMixin, TransformerMixin, BaseEstimator, auto_wrap_output_keys=None
OneToOneFeatureMixin, TransformerMixin, BaseEstimator, auto_wrap_output_keys=None
):
"""Transform a count matrix to a normalized tf or tf-idf representation.
Expand Down
2 changes: 1 addition & 1 deletion sklearn/isotonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def predict(self, T):
return self.transform(T)

# We implement get_feature_names_out here instead of using
# `_ClassNamePrefixFeaturesOutMixin`` because `input_features` are ignored.
# `ClassNamePrefixFeaturesOutMixin`` because `input_features` are ignored.
# `input_features` are ignored because `IsotonicRegression` accepts 1d
# arrays and the semantics of `feature_names_in_` are not clear for 1d arrays.
def get_feature_names_out(self, input_features=None):
Expand Down
Loading

0 comments on commit 1dc23d7

Please sign in to comment.