MAINT Fix typos found by codespell (scikit-learn#26448)

neurodata · May 31, 2023 · 2fd022d · 2fd022d
1 parent 41b0bd8
commit 2fd022d
Show file tree

Hide file tree

Showing 44 changed files with 62 additions and 62 deletions.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -71,7 +71,7 @@ jobs:
  #
  # The nogil build relies on a dedicated PyPI-style index to install patched
  # versions of NumPy, SciPy and Cython maintained by @colesbury and that
- # include specifc fixes to make them run correctly without relying on the GIL.
+ # include specific fixes to make them run correctly without relying on the GIL.
  #
  # The goal of this CI entry is to make sure that we do not introduce any
  # dependency on the GIL in scikit-learn itself. An auxiliary goal is to early

diff --git a/build_tools/linting.sh b/build_tools/linting.sh
@@ -27,7 +27,7 @@ bad_deprecation_property_order=`git grep -A 10 "@property" -- "*.py" | awk '/@p
 if [ ! -z "$bad_deprecation_property_order" ]
 then
  echo "property decorator should come before deprecated decorator"
- echo "found the following occurrencies:"
+ echo "found the following occurrences:"
  echo $bad_deprecation_property_order
  exit 1
 fi

diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
@@ -185,7 +185,7 @@ which allows you to edit the code in-place. This builds the extension in place a
 creates a link to the development directory (see `the pip docs
 <https://pip.pypa.io/en/stable/topics/local-project-installs/#editable-installs>`_).
 
-As the doc aboves explains, this is fundamentally similar to using the command
+As the doc above explains, this is fundamentally similar to using the command
 ``python setup.py develop``. (see `the setuptool docs
 <https://setuptools.pypa.io/en/latest/userguide/development_mode.html>`_).
 It is however preferred to use pip.

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
@@ -1194,7 +1194,7 @@ Examples of use cases include:
 * Risk modeling / insurance policy pricing: number of claim events /
  policyholder per year (Poisson), cost per event (Gamma), total cost per
  policyholder per year (Tweedie / Compound Poisson Gamma).
-* Credit Default: probability that a loan can't be payed back (Bernouli).
+* Credit Default: probability that a loan can't be paid back (Bernouli).
 * Fraud Detection: probability that a financial transaction like a cash transfer
  is a fraudulent transaction (Bernoulli).
 * Predictive maintenance: number of production interruption events per year

diff --git a/examples/cluster/plot_adjusted_for_chance_measures.py b/examples/cluster/plot_adjusted_for_chance_measures.py
@@ -102,7 +102,7 @@ def fixed_classes_uniform_labelings_scores(
 
 
 # %%
-# In this first example we set the number of clases (true number of clusters) to
+# In this first example we set the number of classes (true number of clusters) to
 # `n_classes=10`. The number of clusters varies over the values provided by
 # `n_clusters_range`.
 

diff --git a/examples/cluster/plot_dbscan.py b/examples/cluster/plot_dbscan.py
@@ -89,7 +89,7 @@
 # ------------
 #
 # Core samples (large dots) and non-core samples (small dots) are color-coded
-# according to the asigned cluster. Samples tagged as noise are represented in
+# according to the assigned cluster. Samples tagged as noise are represented in
 # black.
 
 unique_labels = set(labels)

diff --git a/examples/cluster/plot_face_compress.py b/examples/cluster/plot_face_compress.py
@@ -99,7 +99,7 @@
 # image is still looking good.
 #
 # We observe that the distribution of pixels values have been mapped to 8
-# different values. We can check the correspondance between such values and the
+# different values. We can check the correspondence between such values and the
 # original pixel values.
 
 bin_edges = encoder.bin_edges_[0]

diff --git a/examples/cluster/plot_mini_batch_kmeans.py b/examples/cluster/plot_mini_batch_kmeans.py
@@ -132,8 +132,8 @@
 for k in range(n_clusters):
  different += (k_means_labels == k) != (mbk_means_labels == k)
 
-identic = np.logical_not(different)
-ax.plot(X[identic, 0], X[identic, 1], "w", markerfacecolor="#bbbbbb", marker=".")
+identical = np.logical_not(different)
+ax.plot(X[identical, 0], X[identical, 1], "w", markerfacecolor="#bbbbbb", marker=".")
 ax.plot(X[different, 0], X[different, 1], "w", markerfacecolor="m", marker=".")
 ax.set_title("Difference")
 ax.set_xticks(())

diff --git a/examples/ensemble/plot_monotonic_constraints.py b/examples/ensemble/plot_monotonic_constraints.py
@@ -93,7 +93,7 @@
 # Using feature names to specify monotonic constraints
 # ----------------------------------------------------
 #
-# Note that if the training data has feature names, it's possible to specifiy the
+# Note that if the training data has feature names, it's possible to specify the
 # monotonic constraints by passing a dictionary:
 import pandas as pd
 

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -452,7 +452,7 @@ def score_estimator(
 #
 # We conclude that the claim amount is very challenging to predict. Still, the
 # :class:`~sklearn.linear.GammaRegressor` is able to leverage some information
-# from the input features to slighly improve upon the mean baseline in terms
+# from the input features to slightly improve upon the mean baseline in terms
 # of D².
 #
 # Note that the resulting model is the average claim amount per claim. As such,

diff --git a/examples/miscellaneous/plot_kernel_ridge_regression.py b/examples/miscellaneous/plot_kernel_ridge_regression.py
@@ -122,7 +122,7 @@
 # The previous figure compares the learned model of KRR and SVR when both
 # complexity/regularization and bandwidth of the RBF kernel are optimized using
 # grid-search. The learned functions are very similar; however, fitting KRR is
-# approximatively 3-4 times faster than fitting SVR (both with grid-search).
+# approximately 3-4 times faster than fitting SVR (both with grid-search).
 #
 # Prediction of 100000 target values could be in theory approximately three
 # times faster with SVR since it has learned a sparse model using only

diff --git a/examples/miscellaneous/plot_set_output.py b/examples/miscellaneous/plot_set_output.py
@@ -65,7 +65,7 @@
 
 # %%
 # Next we load the titanic dataset to demonstrate `set_output` with
-# :class:`compose.ColumnTransformer` and heterogenous data.
+# :class:`compose.ColumnTransformer` and heterogeneous data.
 from sklearn.datasets import fetch_openml
 
 X, y = fetch_openml(

diff --git a/examples/mixture/plot_gmm_init.py b/examples/mixture/plot_gmm_init.py
@@ -57,7 +57,7 @@
 
 
 def get_initial_means(X, init_params, r):
- # Run a GaussianMixture with max_iter=0 to output the initalization means
+ # Run a GaussianMixture with max_iter=0 to output the initialization means
  gmm = GaussianMixture(
  n_components=4, init_params=init_params, tol=1e-9, max_iter=0, random_state=r
  ).fit(X)

diff --git a/examples/model_selection/plot_det.py b/examples/model_selection/plot_det.py
@@ -56,7 +56,7 @@
 # Define the classifiers
 # ----------------------
 #
-# Here we define two different classifiers. The goal is to visualy compare their
+# Here we define two different classifiers. The goal is to visually compare their
 # statistical performance across thresholds using the ROC and DET curves. There
 # is no particular reason why these classifiers are chosen other classifiers
 # available in scikit-learn.

diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py
@@ -224,7 +224,7 @@ def extract_score(cv_results):
  disp.ax_.legend(*scatter.legend_elements())
 
 # %%
-# We define a function for bootstraping.
+# We define a function for bootstrapping.
 
 
 def scoring_on_bootstrap(estimator, X, y, rng, n_bootstrap=100):
@@ -241,7 +241,7 @@ def scoring_on_bootstrap(estimator, X, y, rng, n_bootstrap=100):
 
 
 # %%
-# We score the base model for each prevalence using bootstraping.
+# We score the base model for each prevalence using bootstrapping.
 
 results = defaultdict(list)
 n_bootstrap = 100

diff --git a/examples/preprocessing/plot_scaling_importance.py b/examples/preprocessing/plot_scaling_importance.py
@@ -100,7 +100,7 @@ def fit_and_plot_model(X_plot, y, clf, ax):
 _ = ax2.set_title("KNN with scaling")
 
 # %%
-# Here the desicion boundary shows that fitting scaled or non-scaled data lead
+# Here the decision boundary shows that fitting scaled or non-scaled data lead
 # to completely different models. The reason is that the variable "proline" has
 # values which vary between 0 and 1,000; whereas the variable "hue" varies
 # between 1 and 10. Because of this, distances between samples are mostly
@@ -187,7 +187,7 @@ def fit_and_plot_model(X_plot, y, clf, ax):
 # %%
 # From the plot above we observe that scaling the features before reducing the
 # dimensionality results in components with the same order of magnitude. In this
-# case it also improves the separability of the clases. Indeed, in the next
+# case it also improves the separability of the classes. Indeed, in the next
 # section we confirm that a better separability has a good repercussion on the
 # overall model's performance.
 #

diff --git a/examples/text/plot_document_clustering.py b/examples/text/plot_document_clustering.py
@@ -197,7 +197,7 @@ def fit_and_evaluate(km, X, name=None, n_runs=5):
 # `max_df=0.5`) and terms that are not present in at least 5 documents (set by
 # `min_df=5`), the resulting number of unique terms `n_features` is around
 # 8,000. We can additionally quantify the sparsity of the `X_tfidf` matrix as
-# the fraction of non-zero entries devided by the total number of elements.
+# the fraction of non-zero entries divided by the total number of elements.
 
 print(f"{X_tfidf.nnz / np.prod(X_tfidf.shape):.3f}")
 
@@ -230,7 +230,7 @@ def fit_and_evaluate(km, X, name=None, n_runs=5):
  random_state=seed,
  ).fit(X_tfidf)
  cluster_ids, cluster_sizes = np.unique(kmeans.labels_, return_counts=True)
- print(f"Number of elements asigned to each cluster: {cluster_sizes}")
+ print(f"Number of elements assigned to each cluster: {cluster_sizes}")
 print()
 print(
  "True number of documents in each category according to the class labels: "

diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py
@@ -45,7 +45,7 @@ def _retry_with_clean_cache(
  """If the first call to the decorated function fails, the local cached
  file is removed, and the function is called again. If ``data_home`` is
  ``None``, then the function is called once. We can provide a specific
- exception to not retry on usign `no_retry_exception` parameter.
+ exception to not retry on using `no_retry_exception` parameter.
  """
 
  def decorator(f):
@@ -998,22 +998,22 @@ def fetch_openml(
  if as_frame:
  err_msg = (
  "Returning pandas objects requires pandas to be installed. "
- "Alternatively, explicitely set `as_frame=False` and "
+ "Alternatively, explicitly set `as_frame=False` and "
  "`parser='liac-arff'`."
  )
  raise ImportError(err_msg) from exc
  else:
  err_msg = (
  f"Using `parser={parser_!r}` requires pandas to be installed. "
- "Alternatively, explicitely set `parser='liac-arff'`."
+ "Alternatively, explicitly set `parser='liac-arff'`."
  )
  if parser == "auto":
  # TODO(1.4): In version 1.4, we will raise an error instead of
  # a warning.
  warn(
  (
  "From version 1.4, `parser='auto'` with `as_frame=False` "
- "will use pandas. Either install pandas or set explicitely "
+ "will use pandas. Either install pandas or set explicitly "
  "`parser='liac-arff'` to preserve the current behavior."
  ),
  FutureWarning,

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
@@ -603,7 +603,7 @@ def test_fetch_openml_difference_parsers(monkeypatch):
 
 ###############################################################################
 # Test the ARFF parsing on several dataset to check if detect the correct
-# types (categories, intgers, floats).
+# types (categories, integers, floats).
 
 
 @pytest.fixture(scope="module")
@@ -1009,7 +1009,7 @@ def test_fetch_openml_requires_pandas_error(monkeypatch, params):
  check_pandas_support("test_fetch_openml_requires_pandas")
  except ImportError:
  _monkey_patch_webbased_functions(monkeypatch, data_id, True)
- err_msg = "requires pandas to be installed. Alternatively, explicitely"
+ err_msg = "requires pandas to be installed. Alternatively, explicitly"
  with pytest.raises(ImportError, match=err_msg):
  fetch_openml(data_id=data_id, **params)
  else:

diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py
@@ -107,7 +107,7 @@ def _update_doc_distribution(
  X_indptr = X.indptr
 
  # These cython functions are called in a nested loop on usually very small arrays
- # (lenght=n_topics). In that case, finding the appropriate signature of the
+ # (length=n_topics). In that case, finding the appropriate signature of the
  # fused-typed function can be more costly than its execution, hence the dispatch
  # is done outside of the loop.
  ctype = "float" if X.dtype == np.float32 else "double"

diff --git a/sklearn/externals/_lobpcg.py b/sklearn/externals/_lobpcg.py
@@ -214,7 +214,7 @@ def lobpcg(
  Notes
  -----
  The iterative loop in lobpcg runs maxit=maxiter (or 20 if maxit=None)
- iterations at most and finishes earler if the tolerance is met.
+ iterations at most and finishes earlier if the tolerance is met.
  Breaking backward compatibility with the previous version, lobpcg
  now returns the block of iterative vectors with the best accuracy rather
  than the last one iterated, as a cure for possible divergence.

diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py
@@ -308,7 +308,7 @@ def test_dataframe_labels_used(pyplot, fitted_clf):
  assert ax.get_xlabel() == "hello"
  assert ax.get_ylabel() == "world"
 
- # labels get overriden only if provided to the `plot` method
+ # labels get overridden only if provided to the `plot` method
  disp.plot(ax=ax, xlabel="overwritten_x", ylabel="overwritten_y")
  assert ax.get_xlabel() == "overwritten_x"
  assert ax.get_ylabel() == "overwritten_y"

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
@@ -124,8 +124,8 @@ class _GeneralizedLinearRegressor(RegressorMixin, BaseEstimator):
  HalfSquaredError identity y any real number
  HalfPoissonLoss log 0 <= y
  HalfGammaLoss log 0 < y
- HalfTweedieLoss log dependend on tweedie power
- HalfTweedieLossIdentity identity dependend on tweedie power
+ HalfTweedieLoss log dependent on tweedie power
+ HalfTweedieLossIdentity identity dependent on tweedie power
  ======================= ======== ==========================
 
  The link function of the GLM, i.e. mapping from linear predictor

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -109,7 +109,7 @@ def glm_dataset(global_random_seed, request):
  Last column of 1, i.e. intercept.
  y : ndarray
  coef_unpenalized : ndarray
- Minimum norm solutions, i.e. min sum(loss(w)) (with mininum ||w||_2 in
+ Minimum norm solutions, i.e. min sum(loss(w)) (with minimum ||w||_2 in
  case of ambiguity)
  Last coefficient is intercept.
  coef_penalized : ndarray

diff --git a/sklearn/linear_model/tests/test_common.py b/sklearn/linear_model/tests/test_common.py
@@ -71,7 +71,7 @@
  RidgeCV(),
  pytest.param(
  SGDRegressor(tol=1e-15),
- marks=pytest.mark.xfail(reason="Unsufficient precision."),
+ marks=pytest.mark.xfail(reason="Insufficient precision."),
  ),
  SGDRegressor(penalty="elasticnet", max_iter=10_000),
  TweedieRegressor(power=0), # same as Ridge

diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
@@ -104,7 +104,7 @@ def ols_ridge_dataset(global_random_seed, request):
  Last column of 1, i.e. intercept.
  y : ndarray
  coef_ols : ndarray of shape
- Minimum norm OLS solutions, i.e. min ||X w - y||_2_2 (with mininum ||w||_2 in
+ Minimum norm OLS solutions, i.e. min ||X w - y||_2_2 (with minimum ||w||_2 in
  case of ambiguity)
  Last coefficient is intercept.
  coef_ridge : ndarray of shape (5,)

diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py
@@ -498,7 +498,7 @@ def trustworthiness(X, X_embedded, *, n_neighbors=5, metric="euclidean"):
  (ICANN '01). Springer-Verlag, Berlin, Heidelberg, 485-491.
 
  .. [2] Laurens van der Maaten. Learning a Parametric Embedding by Preserving
- Local Structure. Proceedings of the Twelth International Conference on
+ Local Structure. Proceedings of the Twelfth International Conference on
  Artificial Intelligence and Statistics, PMLR 5:384-391, 2009.
  """
  n_samples = X.shape[0]

diff --git a/sklearn/metrics/_dist_metrics.pyx.tp b/sklearn/metrics/_dist_metrics.pyx.tp
@@ -2456,7 +2456,7 @@ cdef class RussellRaoDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
  else:
  i2 = i2 + 1
 
- # We don't need to go through all the longuest
+ # We don't need to go through all the longest
  # vector because tf1 or tf2 will be false
  # and thus n_tt won't be increased.
 

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
@@ -163,7 +163,7 @@ class ArgKmin(BaseDistancesReductionDispatcher):
  ArgKmin is typically used to perform
  bruteforce k-nearest neighbors queries.
 
- This class is not meant to be instanciated, one should only use
+ This class is not meant to be instantiated, one should only use
  its :meth:`compute` classmethod which handles allocation and
  deallocation consistently.
  """
@@ -301,7 +301,7 @@ class RadiusNeighbors(BaseDistancesReductionDispatcher):
  The distance function `dist` depends on the values of the `metric`
  and `metric_kwargs` parameters.
 
- This class is not meant to be instanciated, one should only use
+ This class is not meant to be instantiated, one should only use
  its :meth:`compute` classmethod which handles allocation and
  deallocation consistently.
  """
@@ -446,7 +446,7 @@ class ArgKminClassMode(BaseDistancesReductionDispatcher):
  queries when the weighted mode of the labels for the k-nearest neighbors
  are required, such as in `predict` methods.
 
- This class is not meant to be instanciated, one should only use
+ This class is not meant to be instantiated, one should only use
  its :meth:`compute` classmethod which handles allocation and
  deallocation consistently.
  """

diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py
@@ -141,7 +141,7 @@ def test_roc_curve_chance_level_line(
  chance_level_kw,
  constructor_name,
 ):
- """Check the chance leve line plotting behaviour."""
+ """Check the chance level line plotting behaviour."""
  X, y = data_binary
 
  lr = LogisticRegression()

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
@@ -1055,7 +1055,7 @@ def roc_curve(
  are reversed upon returning them to ensure they correspond to both ``fpr``
  and ``tpr``, which are sorted in reversed order during their calculation.
 
- An arbritrary threshold is added for the case `tpr=0` and `fpr=0` to
+ An arbitrary threshold is added for the case `tpr=0` and `fpr=0` to
  ensure that the curve starts at `(0, 0)`. This threshold corresponds to the
  `np.inf`.
 

diff --git a/sklearn/metrics/tests/test_dist_metrics.py b/sklearn/metrics/tests/test_dist_metrics.py
@@ -232,7 +232,7 @@ def test_distance_metrics_dtype_consistency(metric_param_grid):
  D64 = dm64.pairwise(X64)
  D32 = dm32.pairwise(X32)
 
- # Both results are np.float64 dtype because the accumulation accross
+ # Both results are np.float64 dtype because the accumulation across
  # features is done in float64. However the input data and the element
  # wise arithmetic operations are done in float32 so we can expect a
  # small discrepancy.

diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py
@@ -541,7 +541,7 @@ def _estimate_weights(self, nk):
  ),
  )
  else:
- # case Variationnal Gaussian mixture with dirichlet distribution
+ # case Variational Gaussian mixture with dirichlet distribution
  self.weight_concentration_ = self.weight_concentration_prior_ + nk
 
  def _estimate_means(self, nk, xk):
@@ -749,7 +749,7 @@ def _estimate_log_weights(self):
  + np.hstack((0, np.cumsum(digamma_b - digamma_sum)[:-1]))
  )
  else:
- # case Variationnal Gaussian mixture with dirichlet distribution
+ # case Variational Gaussian mixture with dirichlet distribution
  return digamma(self.weight_concentration_) - digamma(
  np.sum(self.weight_concentration_)
  )

diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py
@@ -932,7 +932,7 @@ class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron):
 
  best_loss_ : float or None
  The minimum loss reached by the solver throughout fitting.
- If `early_stopping=True`, this attribute is set ot `None`. Refer to
+ If `early_stopping=True`, this attribute is set to `None`. Refer to
  the `best_validation_score_` fitted attribute instead.
 
  loss_curve_ : list of shape (`n_iter_`,)