Skip to content

Commit

Permalink
Moved sparsefuncs to sparsefuncs_fast
Browse files Browse the repository at this point in the history
  • Loading branch information
MechCoder committed Mar 27, 2014
1 parent 8cf30f1 commit bbb8f1d
Show file tree
Hide file tree
Showing 12 changed files with 682 additions and 1,155 deletions.
376 changes: 188 additions & 188 deletions sklearn/cluster/_k_means.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion sklearn/cluster/_k_means.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ cimport numpy as np
cimport cython

from ..utils.extmath import norm
from sklearn.utils.sparsefuncs cimport add_row_csr
from sklearn.utils.sparsefuncs_fast cimport add_row_csr

ctypedef np.float64_t DOUBLE
ctypedef np.int32_t INT
Expand Down
3 changes: 2 additions & 1 deletion sklearn/cluster/k_means_.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
from ..base import BaseEstimator, ClusterMixin, TransformerMixin
from ..metrics.pairwise import euclidean_distances
from ..utils.extmath import row_norms
from ..utils.sparsefuncs import assign_rows_csr, mean_variance_axis0
from ..utils.sparsefuncs_fast import assign_rows_csr
from ..utils.sparsefuncs import mean_variance_axis0
from ..utils import check_arrays
from ..utils import check_random_state
from ..utils import atleast2d_or_csr
Expand Down
2 changes: 1 addition & 1 deletion sklearn/feature_selection/variance_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ..base import BaseEstimator
from .base import SelectorMixin
from ..utils import atleast2d_or_csr
from ..utils.sparsefuncs import csr_mean_variance_axis0
from ..utils.sparsefuncs_fast import csr_mean_variance_axis0


class VarianceThreshold(BaseEstimator, SelectorMixin):
Expand Down
6 changes: 3 additions & 3 deletions sklearn/preprocessing/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
from ..utils import safe_asarray
from ..utils import warn_if_not_float
from ..utils.extmath import row_norms
from ..utils.sparsefuncs import inplace_csr_row_normalize_l1
from ..utils.sparsefuncs import inplace_csr_row_normalize_l2
from ..utils.sparsefuncs import inplace_csr_column_scale
from ..utils.sparsefuncs_fast import inplace_csr_row_normalize_l1
from ..utils.sparsefuncs_fast import inplace_csr_row_normalize_l2
from ..utils.sparsefuncs_fast import inplace_csr_column_scale
from ..utils.sparsefuncs import mean_variance_axis0

zip = six.moves.zip
Expand Down
2 changes: 1 addition & 1 deletion sklearn/utils/extmath.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .fixes import np_version
from ._logistic_sigmoid import _log_logistic_sigmoid
from ..externals.six.moves import xrange
from .sparsefuncs import csr_row_norms
from .sparsefuncs_fast import csr_row_norms
from .validation import array2d, NonBLASDotWarning


Expand Down
2 changes: 1 addition & 1 deletion sklearn/utils/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def configuration(parent_package='', top_path=None):
libraries.append('m')
cblas_libs.append('m')

config.add_extension('sparsefuncs', sources=['sparsefuncs.c'],
config.add_extension('sparsefuncs_fast', sources=['sparsefuncs_fast.c'],
libraries=libraries)

config.add_extension('arrayfuncs',
Expand Down
58 changes: 58 additions & 0 deletions sklearn/utils/sparsefuncs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Authors: Manoj Kumar

# License: BSD 3 clause
import scipy.sparse as sp

from .sparsefuncs_fast import (csr_mean_variance_axis0,
csc_mean_variance_axis0,
inplace_csr_column_scale,
inplace_csc_column_scale)

def mean_variance_axis0(X):
"""Compute mean and variance along axis 0 on a CSR or CSC matrix
Parameters
----------
X: CSR or CSC sparse matrix, shape (n_samples, n_features)
Input data.
Returns
-------
means: float array with shape (n_features,)
Feature-wise means
variances: float array with shape (n_features,)
Feature-wise variances
"""
if isinstance(X, sp.csr_matrix):
return csr_mean_variance_axis0(X)
elif isinstance(X, sp.csc_matrix):
return csc_mean_variance_axis0(X)
else:
raise TypeError(
"Unsupported type; expected a CSR or CSC sparse matrix.")


def inplace_column_scale(X, scale):
"""Inplace column scaling of a CSC/CSR matrix.
Scale each feature of the data matrix by multiplying with specific scale
provided by the caller assuming a (n_samples, n_features) shape.
Parameters
----------
X: CSC or CSR matrix with shape (n_samples, n_features)
Matrix to normalize using the variance of the features.
scale: float array with shape (n_features,)
Array of precomputed feature-wise values to use for scaling.
"""
if isinstance(X, sp.csr_matrix):
return inplace_csr_column_scale(X, scale)
elif isinstance(X, sp.csc_matrix):
return inplace_csc_column_scale(X, scale)
else:
raise TypeError(
"Unsupported type; expected a CSR or CSC sparse matrix.")
1,329 changes: 423 additions & 906 deletions sklearn/utils/sparsefuncs.c → sklearn/utils/sparsefuncs_fast.c

Large diffs are not rendered by default.

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -291,56 +291,6 @@ def inplace_csc_column_scale(X, np.ndarray[DOUBLE, ndim=1] scale):
X_data[j] *= scale[i]


def inplace_column_scale(X, np.ndarray[DOUBLE, ndim=1] scale):
"""Inplace column scaling of a CSC/CSR matrix.
Scale each feature of the data matrix by multiplying with specific scale
provided by the caller assuming a (n_samples, n_features) shape.
Parameters
----------
X: CSC or CSR matrix with shape (n_samples, n_features)
Matrix to normalize using the variance of the features.
scale: float array with shape (n_features,)
Array of precomputed feature-wise values to use for scaling.
"""
if isinstance(X, sp.csr_matrix):
return inplace_csr_column_scale(X, scale)
elif isinstance(X, sp.csc_matrix):
return inplace_csc_column_scale(X, scale)
else:
raise TypeError(
"Unsupported type; expected a CSR or CSC sparse matrix.")


def mean_variance_axis0(X):
"""Compute mean and variance along axis 0 on a CSR or CSC matrix
Parameters
----------
X: CSR or CSC sparse matrix, shape (n_samples, n_features)
Input data.
Returns
-------
means: float array with shape (n_features,)
Feature-wise means
variances: float array with shape (n_features,)
Feature-wise variances
"""
if isinstance(X, sp.csr_matrix):
return csr_mean_variance_axis0(X)
elif isinstance(X, sp.csc_matrix):
return csc_mean_variance_axis0(X)
else:
raise TypeError(
"Unsupported type; expected a CSR or CSC sparse matrix.")


@cython.boundscheck(False)
@cython.wraparound(False)
cdef void add_row_csr(np.ndarray[np.float64_t, ndim=1] data,
Expand Down
7 changes: 4 additions & 3 deletions sklearn/utils/tests/test_sparsefuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
from numpy.testing import assert_array_almost_equal, assert_array_equal

from sklearn.datasets import make_classification
from sklearn.utils.sparsefuncs import (assign_rows_csr, mean_variance_axis0,
inplace_csc_column_scale,
inplace_csr_column_scale)
from sklearn.utils.sparsefuncs import mean_variance_axis0
from sklearn.utils.sparsefuncs_fast import (assign_rows_csr,
inplace_csc_column_scale,
inplace_csr_column_scale)

def test_mean_variance_axis0():
X, _ = make_classification(5, 4, random_state=0)
Expand Down

0 comments on commit bbb8f1d

Please sign in to comment.