Skip to content

Commit

Permalink
MAINT parameter validation for sklearn.datasets.dump_svmlight_file (s…
Browse files Browse the repository at this point in the history
…cikit-learn#25726)

Co-authored-by: jeremiedbb <[email protected]>
  • Loading branch information
Shivachauhan17 and jeremiedbb committed Feb 28, 2023
1 parent fa0866a commit 00f49eb
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
18 changes: 15 additions & 3 deletions sklearn/datasets/_svmlight_format_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from .. import __version__

from ..utils import check_array, IS_PYPY
from ..utils._param_validation import validate_params, HasMethods

if not IS_PYPY:
from ._svmlight_format_fast import (
Expand Down Expand Up @@ -404,6 +405,17 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id):
)


@validate_params(
{
"X": ["array-like", "sparse matrix"],
"y": ["array-like", "sparse matrix"],
"f": [str, HasMethods(["write"])],
"zero_based": ["boolean"],
"comment": [str, bytes, None],
"query_id": ["array-like", None],
"multilabel": ["boolean"],
}
)
def dump_svmlight_file(
X,
y,
Expand All @@ -428,7 +440,7 @@ def dump_svmlight_file(
Training vectors, where `n_samples` is the number of samples and
`n_features` is the number of features.
y : {array-like, sparse matrix}, shape = [n_samples (, n_labels)]
y : {array-like, sparse matrix}, shape = (n_samples,) or (n_samples, n_labels)
Target values. Class labels must be an
integer or float, or array-like objects of integer or float for
multilabel classifications.
Expand All @@ -442,7 +454,7 @@ def dump_svmlight_file(
Whether column indices should be written zero-based (True) or one-based
(False).
comment : str, default=None
comment : str or bytes, default=None
Comment to insert at the top of the file. This should be either a
Unicode string, which will be encoded as UTF-8, or an ASCII byte
string.
Expand All @@ -459,7 +471,7 @@ def dump_svmlight_file(
https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html).
.. versionadded:: 0.17
parameter *multilabel* to support multilabel datasets.
parameter `multilabel` to support multilabel datasets.
"""
if comment is not None:
# Convert comment string to list of lines in UTF-8.
Expand Down
1 change: 1 addition & 0 deletions sklearn/tests/test_public_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def _check_function_param_validation(
"sklearn.cluster.ward_tree",
"sklearn.covariance.empirical_covariance",
"sklearn.covariance.shrunk_covariance",
"sklearn.datasets.dump_svmlight_file",
"sklearn.datasets.fetch_california_housing",
"sklearn.datasets.fetch_kddcup99",
"sklearn.datasets.make_classification",
Expand Down

0 comments on commit 00f49eb

Please sign in to comment.