Skip to content

Commit

Permalink
CLN remove redundant default parameters in examples and tests (scikit…
Browse files Browse the repository at this point in the history
…-learn#14590)

remove redundant 'fit_intercept=True' in examples and tests along with some instances of other redundant parameters (max_iter=100, C=1 and alpha=1.0)
  • Loading branch information
qdeffense authored and TomDLT committed Aug 7, 2019
1 parent 36bca23 commit f13c9c0
Show file tree
Hide file tree
Showing 13 changed files with 43 additions and 57 deletions.
2 changes: 1 addition & 1 deletion benchmarks/bench_sparsify.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def sparsity_ratio(X):
print("test data sparsity: %f" % sparsity_ratio(X_test))

###############################################################################
clf = SGDRegressor(penalty='l1', alpha=.2, fit_intercept=True, max_iter=2000,
clf = SGDRegressor(penalty='l1', alpha=.2, max_iter=2000,
tol=None)
clf.fit(X_train, y_train)
print("model sparsity: %f" % sparsity_ratio(clf.coef_))
Expand Down
3 changes: 1 addition & 2 deletions examples/applications/plot_prediction_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,7 @@ def plot_benchmark_throughput(throughputs, configuration):
'estimators': [
{'name': 'Linear Model',
'instance': SGDRegressor(penalty='elasticnet', alpha=0.01,
l1_ratio=0.25, fit_intercept=True,
tol=1e-4),
l1_ratio=0.25, tol=1e-4),
'complexity_label': 'non-zero coefficients',
'complexity_computer': lambda clf: np.count_nonzero(clf.coef_)},
{'name': 'RandomForest',
Expand Down
5 changes: 2 additions & 3 deletions examples/linear_model/plot_huber_vs_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,13 @@
x = np.linspace(X.min(), X.max(), 7)
epsilon_values = [1.35, 1.5, 1.75, 1.9]
for k, epsilon in enumerate(epsilon_values):
huber = HuberRegressor(fit_intercept=True, alpha=0.0, max_iter=100,
epsilon=epsilon)
huber = HuberRegressor(alpha=0.0, epsilon=epsilon)
huber.fit(X, y)
coef_ = huber.coef_ * x + huber.intercept_
plt.plot(x, coef_, colors[k], label="huber loss, %s" % epsilon)

# Fit a ridge regressor to compare it to huber regressor.
ridge = Ridge(fit_intercept=True, alpha=0.0, random_state=0, normalize=True)
ridge = Ridge(alpha=0.0, random_state=0, normalize=True)
ridge.fit(X, y)
coef_ridge = ridge.coef_
coef_ = ridge.coef_ * x + ridge.intercept_
Expand Down
4 changes: 2 additions & 2 deletions examples/linear_model/plot_sgd_separating_hyperplane.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60)

# fit the model
clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=200,
fit_intercept=True)
clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=200)

clf.fit(X, Y)

# plot the line, the points, and the nearest vectors to the plane
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,7 @@
(model_params['name'], solver, this_max_iter))
lr = LogisticRegression(solver=solver,
multi_class=model,
C=1,
penalty='l1',
fit_intercept=True,
max_iter=this_max_iter,
random_state=42,
)
Expand Down
6 changes: 3 additions & 3 deletions sklearn/linear_model/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,10 @@ def test_fit_intercept():
y = np.array([1, 1])

lr2_without_intercept = LinearRegression(fit_intercept=False).fit(X2, y)
lr2_with_intercept = LinearRegression(fit_intercept=True).fit(X2, y)
lr2_with_intercept = LinearRegression().fit(X2, y)

lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y)
lr3_with_intercept = LinearRegression(fit_intercept=True).fit(X3, y)
lr3_with_intercept = LinearRegression().fit(X3, y)

assert (lr2_with_intercept.coef_.shape ==
lr2_without_intercept.coef_.shape)
Expand Down Expand Up @@ -179,7 +179,7 @@ def test_linear_regression_multiple_outcome(random_state=0):
Y = np.vstack((y, y)).T
n_features = X.shape[1]

reg = LinearRegression(fit_intercept=True)
reg = LinearRegression()
reg.fit((X), Y)
assert reg.coef_.shape == (2, n_features)
Y_pred = reg.predict(X)
Expand Down
13 changes: 6 additions & 7 deletions sklearn/linear_model/tests/test_coordinate_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,10 +359,10 @@ def test_enet_cv_positive_constraint():


def test_uniform_targets():
enet = ElasticNetCV(fit_intercept=True, n_alphas=3)
m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3)
lasso = LassoCV(fit_intercept=True, n_alphas=3)
m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3)
enet = ElasticNetCV(n_alphas=3)
m_enet = MultiTaskElasticNetCV(n_alphas=3)
lasso = LassoCV(n_alphas=3)
m_lasso = MultiTaskLassoCV(n_alphas=3)

models_single_task = (enet, lasso)
models_multi_task = (m_enet, m_lasso)
Expand Down Expand Up @@ -432,7 +432,7 @@ def test_enet_multitarget():
n_targets = 3
X, y, _, _ = build_dataset(n_samples=10, n_features=8,
n_informative_features=10, n_targets=n_targets)
estimator = ElasticNet(alpha=0.01, fit_intercept=True)
estimator = ElasticNet(alpha=0.01)
estimator.fit(X, y)
coef, intercept, dual_gap = (estimator.coef_, estimator.intercept_,
estimator.dual_gap_)
Expand Down Expand Up @@ -695,8 +695,7 @@ def test_enet_copy_X_False_check_input_False():
def test_overrided_gram_matrix():
X, y, _, _ = build_dataset(n_samples=20, n_features=10)
Gram = X.T.dot(X)
clf = ElasticNet(selection='cyclic', tol=1e-8, precompute=Gram,
fit_intercept=True)
clf = ElasticNet(selection='cyclic', tol=1e-8, precompute=Gram)
assert_warns_message(UserWarning,
"Gram matrix was provided but X was centered"
" to fit intercept, "
Expand Down
20 changes: 10 additions & 10 deletions sklearn/linear_model/tests/test_huber.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def make_regression_with_outliers(n_samples=50, n_features=20):
def test_huber_equals_lr_for_high_epsilon():
# Test that Ridge matches LinearRegression for large epsilon
X, y = make_regression_with_outliers()
lr = LinearRegression(fit_intercept=True)
lr = LinearRegression()
lr.fit(X, y)
huber = HuberRegressor(fit_intercept=True, epsilon=1e3, alpha=0.0)
huber = HuberRegressor(epsilon=1e3, alpha=0.0)
huber.fit(X, y)
assert_almost_equal(huber.coef_, lr.coef_, 3)
assert_almost_equal(huber.intercept_, lr.intercept_, 2)
Expand Down Expand Up @@ -74,7 +74,7 @@ def test_huber_sample_weights():
# Test sample_weights implementation in HuberRegressor"""

X, y = make_regression_with_outliers()
huber = HuberRegressor(fit_intercept=True)
huber = HuberRegressor()
huber.fit(X, y)
huber_coef = huber.coef_
huber_intercept = huber.intercept_
Expand Down Expand Up @@ -108,19 +108,19 @@ def test_huber_sample_weights():

# Test sparse implementation with sample weights.
X_csr = sparse.csr_matrix(X)
huber_sparse = HuberRegressor(fit_intercept=True)
huber_sparse = HuberRegressor()
huber_sparse.fit(X_csr, y, sample_weight=sample_weight)
assert_array_almost_equal(huber_sparse.coef_ / scale,
huber_coef / scale)


def test_huber_sparse():
X, y = make_regression_with_outliers()
huber = HuberRegressor(fit_intercept=True, alpha=0.1)
huber = HuberRegressor(alpha=0.1)
huber.fit(X, y)

X_csr = sparse.csr_matrix(X)
huber_sparse = HuberRegressor(fit_intercept=True, alpha=0.1)
huber_sparse = HuberRegressor(alpha=0.1)
huber_sparse.fit(X_csr, y)
assert_array_almost_equal(huber_sparse.coef_, huber.coef_)
assert_array_equal(huber.outliers_, huber_sparse.outliers_)
Expand Down Expand Up @@ -170,8 +170,8 @@ def test_huber_and_sgd_same_results():
def test_huber_warm_start():
X, y = make_regression_with_outliers()
huber_warm = HuberRegressor(
fit_intercept=True, alpha=1.0, max_iter=10000, warm_start=True,
tol=1e-1)
alpha=1.0, max_iter=10000, warm_start=True, tol=1e-1)

huber_warm.fit(X, y)
huber_warm_coef = huber_warm.coef_.copy()
huber_warm.fit(X, y)
Expand All @@ -186,7 +186,7 @@ def test_huber_warm_start():
def test_huber_better_r2_score():
# Test that huber returns a better r2 score than non-outliers"""
X, y = make_regression_with_outliers()
huber = HuberRegressor(fit_intercept=True, alpha=0.01, max_iter=100)
huber = HuberRegressor(alpha=0.01)
huber.fit(X, y)
linear_loss = np.dot(X, huber.coef_) + huber.intercept_ - y
mask = np.abs(linear_loss) < huber.epsilon * huber.scale_
Expand All @@ -196,7 +196,7 @@ def test_huber_better_r2_score():
# The Ridge regressor should be influenced by the outliers and hence
# give a worse score on the non-outliers as compared to the huber
# regressor.
ridge = Ridge(fit_intercept=True, alpha=0.01)
ridge = Ridge(alpha=0.01)
ridge.fit(X, y)
ridge_score = ridge.score(X[mask], y[mask])
ridge_outlier_score = ridge.score(X[~mask], y[~mask])
Expand Down
6 changes: 2 additions & 4 deletions sklearn/linear_model/tests/test_least_angle.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,7 @@ def test_lasso_lars_vs_lasso_cd_early_stopping():
for alpha_min in alphas_min:
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
alpha_min=alpha_min)
lasso_cd = linear_model.Lasso(fit_intercept=True, normalize=True,
tol=1e-8)
lasso_cd = linear_model.Lasso(normalize=True, tol=1e-8)
lasso_cd.alpha = alphas[-1]
lasso_cd.fit(X, y)
error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
Expand Down Expand Up @@ -688,8 +687,7 @@ def test_lasso_lars_vs_R_implementation():
[0, 0, -1.569380717440311, -5.924804108067312,
-7.996385265061972]])

model_lasso_lars2 = linear_model.LassoLars(alpha=0, fit_intercept=True,
normalize=True)
model_lasso_lars2 = linear_model.LassoLars(alpha=0, normalize=True)
model_lasso_lars2.fit(X, y)
skl_betas2 = model_lasso_lars2.coef_path_

Expand Down
8 changes: 4 additions & 4 deletions sklearn/linear_model/tests/test_logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,9 +373,9 @@ def test_consistency_path():
for solver in ('lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'):
Cs = [1e3]
coefs, Cs, _ = f(_logistic_regression_path)(
X, y, Cs=Cs, fit_intercept=True, tol=1e-6, solver=solver,
X, y, Cs=Cs, tol=1e-6, solver=solver,
intercept_scaling=10000., random_state=0, multi_class='ovr')
lr = LogisticRegression(C=Cs[0], fit_intercept=True, tol=1e-4,
lr = LogisticRegression(C=Cs[0], tol=1e-4,
intercept_scaling=10000., random_state=0,
multi_class='ovr', solver=solver)
lr.fit(X, y)
Expand Down Expand Up @@ -596,9 +596,9 @@ def test_logistic_cv_sparse():
X[X < 1.0] = 0.0
csr = sp.csr_matrix(X)

clf = LogisticRegressionCV(fit_intercept=True)
clf = LogisticRegressionCV()
clf.fit(X, y)
clfs = LogisticRegressionCV(fit_intercept=True)
clfs = LogisticRegressionCV()
clfs.fit(csr, y)
assert_array_almost_equal(clfs.coef_, clf.coef_)
assert_array_almost_equal(clfs.intercept_, clf.intercept_)
Expand Down
18 changes: 6 additions & 12 deletions sklearn/linear_model/tests/test_passive_aggressive.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,7 @@ def test_classifier_partial_fit():
classes = np.unique(y)
for data in (X, X_csr):
for average in (False, True):
clf = PassiveAggressiveClassifier(
C=1.0, fit_intercept=True, random_state=0,
clf = PassiveAggressiveClassifier(random_state=0,
average=average, max_iter=5)
for t in range(30):
clf.partial_fit(data, y, classes)
Expand Down Expand Up @@ -123,13 +122,11 @@ def test_classifier_correctness(loss):
y_bin = y.copy()
y_bin[y != 1] = -1

clf1 = MyPassiveAggressive(
C=1.0, loss=loss, fit_intercept=True, n_iter=2)
clf1 = MyPassiveAggressive(loss=loss, n_iter=2)
clf1.fit(X, y_bin)

for data in (X, X_csr):
clf2 = PassiveAggressiveClassifier(
C=1.0, loss=loss, fit_intercept=True, max_iter=2,
clf2 = PassiveAggressiveClassifier(loss=loss, max_iter=2,
shuffle=False, tol=None)
clf2.fit(data, y_bin)

Expand Down Expand Up @@ -254,8 +251,7 @@ def test_regressor_partial_fit():

for data in (X, X_csr):
for average in (False, True):
reg = PassiveAggressiveRegressor(
C=1.0, fit_intercept=True, random_state=0,
reg = PassiveAggressiveRegressor(random_state=0,
average=average, max_iter=100)
for t in range(50):
reg.partial_fit(data, y_bin)
Expand All @@ -277,13 +273,11 @@ def test_regressor_correctness(loss):
y_bin = y.copy()
y_bin[y != 1] = -1

reg1 = MyPassiveAggressive(
C=1.0, loss=loss, fit_intercept=True, n_iter=2)
reg1 = MyPassiveAggressive(loss=loss, n_iter=2)
reg1.fit(X, y_bin)

for data in (X, X_csr):
reg2 = PassiveAggressiveRegressor(
C=1.0, tol=None, loss=loss, fit_intercept=True, max_iter=2,
reg2 = PassiveAggressiveRegressor(tol=None, loss=loss, max_iter=2,
shuffle=False)
reg2.fit(data, y_bin)

Expand Down
7 changes: 3 additions & 4 deletions sklearn/linear_model/tests/test_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,8 +1025,8 @@ def test_ridge_fit_intercept_sparse(solver):
# so the reference we use for both ("auto" and "sparse_cg") is
# Ridge(solver="sparse_cg"), fitted using the dense representation (note
# that "sparse_cg" can fit sparse or dense data)
dense_ridge = Ridge(alpha=1., solver='sparse_cg', fit_intercept=True)
sparse_ridge = Ridge(alpha=1., solver=solver, fit_intercept=True)
dense_ridge = Ridge(solver='sparse_cg')
sparse_ridge = Ridge(solver=solver)
dense_ridge.fit(X, y)
with pytest.warns(None) as record:
sparse_ridge.fit(X_csr, y)
Expand All @@ -1039,12 +1039,11 @@ def test_ridge_fit_intercept_sparse(solver):
def test_ridge_fit_intercept_sparse_error(solver):
X, y = _make_sparse_offset_regression(n_features=20, random_state=0)
X_csr = sp.csr_matrix(X)
sparse_ridge = Ridge(alpha=1., solver=solver, fit_intercept=True)
sparse_ridge = Ridge(solver=solver)
err_msg = "solver='{}' does not support".format(solver)
with pytest.raises(ValueError, match=err_msg):
sparse_ridge.fit(X_csr, y)


def test_ridge_fit_intercept_sparse_sag():
X, y = _make_sparse_offset_regression(
n_features=5, n_samples=20, random_state=0, X_offset=5.)
Expand Down
6 changes: 3 additions & 3 deletions sklearn/linear_model/tests/test_sparse_coordinate_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def test_normalize_option():
# Check that the normalize option in enet works
X = sp.csc_matrix([[-1], [0], [1]])
y = [-1, 0, 1]
clf_dense = ElasticNet(fit_intercept=True, normalize=True)
clf_sparse = ElasticNet(fit_intercept=True, normalize=True)
clf_dense = ElasticNet(normalize=True)
clf_sparse = ElasticNet(normalize=True)
clf_dense.fit(X, y)
X = sp.csc_matrix(X)
clf_sparse.fit(X, y)
Expand Down Expand Up @@ -216,7 +216,7 @@ def test_enet_multitarget():
n_targets = 3
X, y = make_sparse_data(n_targets=n_targets)

estimator = ElasticNet(alpha=0.01, fit_intercept=True, precompute=None)
estimator = ElasticNet(alpha=0.01, precompute=None)
# XXX: There is a bug when precompute is not None!
estimator.fit(X, y)
coef, intercept, dual_gap = (estimator.coef_,
Expand Down

0 comments on commit f13c9c0

Please sign in to comment.