forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
completed sparsity sections; added benchmark
- Loading branch information
Showing
3 changed files
with
151 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
""" | ||
Benchmark SGD prediction time with dense/sparse coefficients. | ||
Invoke with | ||
----------- | ||
$ kernprof.py -l sparsity_benchmark.py | ||
$ python -m line_profiler sparsity_benchmark.py.lprof | ||
Typical output | ||
-------------- | ||
input data sparsity: 0.050000 | ||
true coef sparsity: 0.000100 | ||
test data sparsity: 0.027400 | ||
model sparsity: 0.000024 | ||
r^2 on test data (dense model) : 0.233651 | ||
r^2 on test data (sparse model) : 0.233651 | ||
Wrote profile results to sparsity_benchmark.py.lprof | ||
Timer unit: 1e-06 s | ||
File: sparsity_benchmark.py | ||
Function: benchmark_dense_predict at line 51 | ||
Total time: 0.532979 s | ||
Line # Hits Time Per Hit % Time Line Contents | ||
============================================================== | ||
51 @profile | ||
52 def benchmark_dense_predict(): | ||
53 301 640 2.1 0.1 for _ in range(300): | ||
54 300 532339 1774.5 99.9 clf.predict(X_test) | ||
File: sparsity_benchmark.py | ||
Function: benchmark_sparse_predict at line 56 | ||
Total time: 0.39274 s | ||
Line # Hits Time Per Hit % Time Line Contents | ||
============================================================== | ||
56 @profile | ||
57 def benchmark_sparse_predict(): | ||
58 1 10854 10854.0 2.8 X_test_sparse = csr_matrix(X_test) | ||
59 301 477 1.6 0.1 for _ in range(300): | ||
60 300 381409 1271.4 97.1 clf.predict(X_test_sparse) | ||
""" | ||
|
||
from scipy.sparse.csr import csr_matrix | ||
import numpy as np | ||
from sklearn.linear_model.stochastic_gradient import SGDRegressor | ||
from sklearn.metrics import r2_score | ||
|
||
np.random.seed(42) | ||
|
||
|
||
def sparsity_ratio(X): | ||
return np.count_nonzero(X) / float(n_samples * n_features) | ||
|
||
n_samples, n_features = 5000, 300 | ||
X = np.random.randn(n_samples, n_features) | ||
inds = np.arange(n_samples) | ||
np.random.shuffle(inds) | ||
X[inds[n_features/1.2:]] = 0 # sparsify input | ||
print("input data sparsity: %f" % sparsity_ratio(X)) | ||
coef = 3 * np.random.randn(n_features) | ||
inds = np.arange(n_features) | ||
np.random.shuffle(inds) | ||
coef[inds[n_features/2:]] = 0 # sparsify coef | ||
print("true coef sparsity: %f" % sparsity_ratio(coef)) | ||
y = np.dot(X, coef) | ||
|
||
# add noise | ||
y += 0.01 * np.random.normal((n_samples,)) | ||
|
||
# Split data in train set and test set | ||
n_samples = X.shape[0] | ||
X_train, y_train = X[:n_samples / 2], y[:n_samples / 2] | ||
X_test, y_test = X[n_samples / 2:], y[n_samples / 2:] | ||
print("test data sparsity: %f" % sparsity_ratio(X_test)) | ||
|
||
############################################################################### | ||
clf = SGDRegressor(penalty='l1', alpha=.2, fit_intercept=True, n_iter=2000) | ||
clf.fit(X_train, y_train) | ||
print("model sparsity: %f" % sparsity_ratio(clf.coef_)) | ||
|
||
@profile | ||
def benchmark_dense_predict(): | ||
for _ in range(300): | ||
clf.predict(X_test) | ||
|
||
@profile | ||
def benchmark_sparse_predict(): | ||
X_test_sparse = csr_matrix(X_test) | ||
for _ in range(300): | ||
clf.predict(X_test_sparse) | ||
|
||
def score(y_test, y_pred, case): | ||
r2 = r2_score(y_test, y_pred) | ||
print("r^2 on test data (%s) : %f" % (case, r2)) | ||
|
||
score(y_test, clf.predict(X_test), 'dense model') | ||
benchmark_dense_predict() | ||
clf.sparsify() | ||
score(y_test, clf.predict(X_test), 'sparse model') | ||
benchmark_sparse_predict() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.