Skip to content

Commit

Permalink
[MRG] Correcting length of explained_variance_ratio_, eigen solver (s…
Browse files Browse the repository at this point in the history
  • Loading branch information
JPFrancoia authored and jnothman committed Oct 25, 2016
1 parent 3c18735 commit f260898
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 21 deletions.
26 changes: 23 additions & 3 deletions doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,31 @@ Bug fixes
`#6497 <https://github.com/scikit-learn/scikit-learn/pull/6497>`_
by `Sebastian Säger`_

- Attribute ``explained_variance_ratio`` of
:class:`discriminant_analysis.LinearDiscriminantAnalysis` calculated
with SVD and Eigen solver are now of the same length. (`#7632
<https://github.com/scikit-learn/scikit-learn/pull/7632>`_).
By `JPFrancoia`_

- Fixes issue in :ref:`univariate_feature_selection` where score
functions were not accepting multi-label targets.(`#7676
<https://github.com/scikit-learn/scikit-learn/pull/7676>`_)
by `Mohammed Affan`_



API changes summary
-------------------

Linear, kernelized and related models

- Length of `explained_variance_ratio` of
:class:`discriminant_analysis.LinearDiscriminantAnalysis`
changed for both Eigen and SVD solvers. The attribute has now a length
of min(n_components, n_classes - 1). (`#7632
<https://github.com/scikit-learn/scikit-learn/pull/7632>`_).
By `JPFrancoia`_


.. _changes_0_18:

Version 0.18
Expand Down Expand Up @@ -571,8 +591,8 @@ Decomposition, manifold learning and clustering
:class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`,
and :class:`manifold.SpectralEmbedding` (`#5012 <https://github.com/scikit-learn/scikit-learn/pull/5012>`_). By `Peter Fischer`_.

- Attribute ``explained_variance_ratio_`` calculated with the SVD solver of
:class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns
- Attribute ``explained_variance_ratio_`` calculated with the SVD solver
of :class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns
correct results. By `JPFrancoia`_

Preprocessing and feature selection
Expand Down
29 changes: 18 additions & 11 deletions sklearn/discriminant_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ def _solve_lsqr(self, X, y, shrinkage):
self.means_ = _class_means(X, y)
self.covariance_ = _class_cov(X, y, self.priors_, shrinkage)
self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T
self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T))
+ np.log(self.priors_))
self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T)) +
np.log(self.priors_))

def _solve_eigen(self, X, y, shrinkage):
"""Eigenvalue solver.
Expand Down Expand Up @@ -336,15 +336,16 @@ class scatter). This solver supports both classification and
Sb = St - Sw # between scatter

evals, evecs = linalg.eigh(Sb, Sw)
self.explained_variance_ratio_ = np.sort(evals / np.sum(evals))[::-1]
self.explained_variance_ratio_ = np.sort(evals / np.sum(evals)
)[::-1][:self._max_components]
evecs = evecs[:, np.argsort(evals)[::-1]] # sort eigenvectors
# evecs /= np.linalg.norm(evecs, axis=0) # doesn't work with numpy 1.6
evecs /= np.apply_along_axis(np.linalg.norm, 0, evecs)

self.scalings_ = evecs
self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)
self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T))
+ np.log(self.priors_))
self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T)) +
np.log(self.priors_))

def _solve_svd(self, X, y):
"""SVD solver.
Expand Down Expand Up @@ -400,12 +401,12 @@ def _solve_svd(self, X, y):
_, S, V = linalg.svd(X, full_matrices=0)

self.explained_variance_ratio_ = (S**2 / np.sum(
S**2))[:self.n_components]
S**2))[:self._max_components]
rank = np.sum(S > self.tol * S[0])
self.scalings_ = np.dot(scalings, V.T[:, :rank])
coef = np.dot(self.means_ - self.xbar_, self.scalings_)
self.intercept_ = (-0.5 * np.sum(coef ** 2, axis=1)
+ np.log(self.priors_))
self.intercept_ = (-0.5 * np.sum(coef ** 2, axis=1) +
np.log(self.priors_))
self.coef_ = np.dot(coef, self.scalings_.T)
self.intercept_ -= np.dot(self.xbar_, self.coef_.T)

Expand Down Expand Up @@ -457,6 +458,13 @@ def fit(self, X, y, store_covariance=None, tol=None):
UserWarning)
self.priors_ = self.priors_ / self.priors_.sum()

# Get the maximum number of components
if self.n_components is None:
self._max_components = len(self.classes_) - 1
else:
self._max_components = min(len(self.classes_) - 1,
self.n_components)

if self.solver == 'svd':
if self.shrinkage is not None:
raise NotImplementedError('shrinkage not supported')
Expand Down Expand Up @@ -497,9 +505,8 @@ def transform(self, X):
X_new = np.dot(X - self.xbar_, self.scalings_)
elif self.solver == 'eigen':
X_new = np.dot(X, self.scalings_)
n_components = X.shape[1] if self.n_components is None \
else self.n_components
return X_new[:, :n_components]

return X_new[:, :self._max_components]

def predict_proba(self, X):
"""Estimate probability.
Expand Down
12 changes: 5 additions & 7 deletions sklearn/tests/test_discriminant_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,19 +171,17 @@ def test_lda_explained_variance_ratio():
clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen")
clf_lda_eigen.fit(X, y)
assert_almost_equal(clf_lda_eigen.explained_variance_ratio_.sum(), 1.0, 3)
assert_equal(clf_lda_eigen.explained_variance_ratio_.shape, (2,),
"Unexpected length for explained_variance_ratio_")

clf_lda_svd = LinearDiscriminantAnalysis(solver="svd")
clf_lda_svd.fit(X, y)
assert_almost_equal(clf_lda_svd.explained_variance_ratio_.sum(), 1.0, 3)
assert_equal(clf_lda_svd.explained_variance_ratio_.shape, (2,),
"Unexpected length for explained_variance_ratio_")

tested_length = min(clf_lda_svd.explained_variance_ratio_.shape[0],
clf_lda_eigen.explained_variance_ratio_.shape[0])

# NOTE: clf_lda_eigen.explained_variance_ratio_ is not of n_components
# length. Make it the same length as clf_lda_svd.explained_variance_ratio_
# before comparison.
assert_array_almost_equal(clf_lda_svd.explained_variance_ratio_,
clf_lda_eigen.explained_variance_ratio_[:tested_length])
clf_lda_eigen.explained_variance_ratio_)


def test_lda_orthogonality():
Expand Down

0 comments on commit f260898

Please sign in to comment.