From e0ebc7839153da72e091f385ee4e6d4df51f96ef Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Wed, 23 Feb 2022 08:44:25 -0500
Subject: [PATCH] DOC Update documentation of gradient boosting estimators w/
 ranges (#22153)

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
---
 sklearn/ensemble/_gb.py | 81 +++++++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 32 deletions(-)

diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
index 0381537d2b48a..4818c8f7c9f1b 100644
--- a/sklearn/ensemble/_gb.py
+++ b/sklearn/ensemble/_gb.py
@@ -989,11 +989,13 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
     learning_rate : float, default=0.1
         Learning rate shrinks the contribution of each tree by `learning_rate`.
         There is a trade-off between learning_rate and n_estimators.
+        Values must be in the range `(0.0, inf)`.
 
     n_estimators : int, default=100
         The number of boosting stages to perform. Gradient boosting
         is fairly robust to over-fitting so a large number usually
         results in better performance.
+        Values must be in the range `[1, inf)`.
 
     subsample : float, default=1.0
         The fraction of samples to be used for fitting the individual base
@@ -1001,6 +1003,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
         Boosting. `subsample` interacts with the parameter `n_estimators`.
         Choosing `subsample < 1.0` leads to a reduction of variance
         and an increase in bias.
+        Values must be in the range `(0.0, 1.0]`.
 
     criterion : {'friedman_mse', 'squared_error', 'mse'}, \
             default='friedman_mse'
@@ -1019,10 +1022,9 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
     min_samples_split : int or float, default=2
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider `min_samples_split` as the minimum number.
-        - If float, then `min_samples_split` is a fraction and
-          `ceil(min_samples_split * n_samples)` are the minimum
-          number of samples for each split.
+        - If int, values must be in the range `[2, inf)`.
+        - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`
+          will be `ceil(min_samples_split * n_samples)`.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
@@ -1034,10 +1036,9 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
         right branches.  This may have the effect of smoothing the model,
         especially in regression.
 
-        - If int, then consider `min_samples_leaf` as the minimum number.
-        - If float, then `min_samples_leaf` is a fraction and
-          `ceil(min_samples_leaf * n_samples)` are the minimum
-          number of samples for each node.
+        - If int, values must be in the range `[1, inf)`.
+        - If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf`
+          will be `ceil(min_samples_leaf * n_samples)`.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
@@ -1046,16 +1047,19 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
         The minimum weighted fraction of the sum total of weights (of all
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
+        Values must be in the range `[0.0, 0.5]`.
 
     max_depth : int, default=3
         The maximum depth of the individual regression estimators. The maximum
         depth limits the number of nodes in the tree. Tune this parameter
         for best performance; the best value depends on the interaction
         of the input variables.
+        Values must be in the range `[1, inf)`.
 
     min_impurity_decrease : float, default=0.0
         A node will be split if this split induces a decrease of the impurity
         greater than or equal to this value.
+        Values must be in the range `[0.0, inf)`.
 
         The weighted impurity decrease equation is the following::
 
@@ -1090,10 +1094,9 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
     max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None
         The number of features to consider when looking for the best split:
 
-        - If int, then consider `max_features` features at each split.
-        - If float, then `max_features` is a fraction and
-          `int(max_features * n_features)` features are considered at each
-          split.
+        - If int, values must be in the range `[1, inf)`.
+        - If float, values must be in the range `(0.0, 1.0]` and the features
+          considered at each split will be `int(max_features * n_features)`.
         - If 'auto', then `max_features=sqrt(n_features)`.
         - If 'sqrt', then `max_features=sqrt(n_features)`.
         - If 'log2', then `max_features=log2(n_features)`.
@@ -1110,11 +1113,13 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
         Enable verbose output. If 1 then it prints progress and performance
         once in a while (the more trees the lower the frequency). If greater
         than 1 then it prints progress and performance for every tree.
+        Values must be in the range `[0, inf)`.
 
     max_leaf_nodes : int, default=None
         Grow trees with ``max_leaf_nodes`` in best-first fashion.
         Best nodes are defined as relative reduction in impurity.
-        If None then unlimited number of leaf nodes.
+        Values must be in the range `[2, inf)`.
+        If `None`, then unlimited number of leaf nodes.
 
     warm_start : bool, default=False
         When set to ``True``, reuse the solution of the previous call to fit
@@ -1123,7 +1128,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
 
     validation_fraction : float, default=0.1
         The proportion of training data to set aside as validation set for
-        early stopping. Must be between 0 and 1.
+        early stopping. Values must be in the range `(0.0, 1.0)`.
         Only used if ``n_iter_no_change`` is set to an integer.
 
         .. versionadded:: 0.20
@@ -1136,6 +1141,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
         data as validation and terminate training when validation score is not
         improving in all of the previous ``n_iter_no_change`` numbers of
         iterations. The split is stratified.
+        Values must be in the range `[1, inf)`.
 
         .. versionadded:: 0.20
 
@@ -1143,14 +1149,16 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
         Tolerance for the early stopping. When the loss is not improving
         by at least tol for ``n_iter_no_change`` iterations (if set to a
         number), the training stops.
+        Values must be in the range `(0.0, inf)`.
 
         .. versionadded:: 0.20
 
     ccp_alpha : non-negative float, default=0.0
         Complexity parameter used for Minimal Cost-Complexity Pruning. The
         subtree with the largest cost complexity that is smaller than
-        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
-        :ref:`minimal_cost_complexity_pruning` for details.
+        ``ccp_alpha`` will be chosen. By default, no pruning is performed.
+        Values must be in the range `[0.0, inf)`.
+        See :ref:`minimal_cost_complexity_pruning` for details.
 
         .. versionadded:: 0.22
 
@@ -1548,11 +1556,13 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
     learning_rate : float, default=0.1
         Learning rate shrinks the contribution of each tree by `learning_rate`.
         There is a trade-off between learning_rate and n_estimators.
+        Values must be in the range `(0.0, inf)`.
 
     n_estimators : int, default=100
         The number of boosting stages to perform. Gradient boosting
         is fairly robust to over-fitting so a large number usually
         results in better performance.
+        Values must be in the range `[1, inf)`.
 
     subsample : float, default=1.0
         The fraction of samples to be used for fitting the individual base
@@ -1560,6 +1570,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
         Boosting. `subsample` interacts with the parameter `n_estimators`.
         Choosing `subsample < 1.0` leads to a reduction of variance
         and an increase in bias.
+        Values must be in the range `(0.0, 1.0]`.
 
     criterion : {'friedman_mse', 'squared_error', 'mse'}, \
             default='friedman_mse'
@@ -1578,10 +1589,9 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
     min_samples_split : int or float, default=2
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider `min_samples_split` as the minimum number.
-        - If float, then `min_samples_split` is a fraction and
-          `ceil(min_samples_split * n_samples)` are the minimum
-          number of samples for each split.
+        - If int, values must be in the range `[2, inf)`.
+        - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`
+          will be `ceil(min_samples_split * n_samples)`.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
@@ -1593,10 +1603,9 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
         right branches.  This may have the effect of smoothing the model,
         especially in regression.
 
-        - If int, then consider `min_samples_leaf` as the minimum number.
-        - If float, then `min_samples_leaf` is a fraction and
-          `ceil(min_samples_leaf * n_samples)` are the minimum
-          number of samples for each node.
+        - If int, values must be in the range `[1, inf)`.
+        - If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf`
+          will be `ceil(min_samples_leaf * n_samples)`.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
@@ -1605,16 +1614,19 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
         The minimum weighted fraction of the sum total of weights (of all
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
+        Values must be in the range `[0.0, 0.5]`.
 
     max_depth : int, default=3
         Maximum depth of the individual regression estimators. The maximum
         depth limits the number of nodes in the tree. Tune this parameter
         for best performance; the best value depends on the interaction
         of the input variables.
+        Values must be in the range `[1, inf)`.
 
     min_impurity_decrease : float, default=0.0
         A node will be split if this split induces a decrease of the impurity
         greater than or equal to this value.
+        Values must be in the range `[0.0, inf)`.
 
         The weighted impurity decrease equation is the following::
 
@@ -1650,10 +1662,9 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
     max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None
         The number of features to consider when looking for the best split:
 
-        - If int, then consider `max_features` features at each split.
-        - If float, then `max_features` is a fraction and
-          `int(max_features * n_features)` features are considered at each
-          split.
+        - If int, values must be in the range `[1, inf)`.
+        - If float, values must be in the range `(0.0, 1.0]` and the features
+          considered at each split will be `int(max_features * n_features)`.
         - If "auto", then `max_features=n_features`.
         - If "sqrt", then `max_features=sqrt(n_features)`.
         - If "log2", then `max_features=log2(n_features)`.
@@ -1669,16 +1680,19 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
     alpha : float, default=0.9
         The alpha-quantile of the huber loss function and the quantile
         loss function. Only if ``loss='huber'`` or ``loss='quantile'``.
+        Values must be in the range `(0.0, 1.0)`.
 
     verbose : int, default=0
         Enable verbose output. If 1 then it prints progress and performance
         once in a while (the more trees the lower the frequency). If greater
         than 1 then it prints progress and performance for every tree.
+        Values must be in the range `[0, inf)`.
 
     max_leaf_nodes : int, default=None
         Grow trees with ``max_leaf_nodes`` in best-first fashion.
         Best nodes are defined as relative reduction in impurity.
-        If None then unlimited number of leaf nodes.
+        Values must be in the range `[2, inf)`.
+        If None, then unlimited number of leaf nodes.
 
     warm_start : bool, default=False
         When set to ``True``, reuse the solution of the previous call to fit
@@ -1687,7 +1701,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
 
     validation_fraction : float, default=0.1
         The proportion of training data to set aside as validation set for
-        early stopping. Must be between 0 and 1.
+        early stopping. Values must be in the range `(0.0, 1.0)`.
         Only used if ``n_iter_no_change`` is set to an integer.
 
         .. versionadded:: 0.20
@@ -1700,6 +1714,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
         data as validation and terminate training when validation score is not
         improving in all of the previous ``n_iter_no_change`` numbers of
         iterations.
+        Values must be in the range `[1, inf)`.
 
         .. versionadded:: 0.20
 
@@ -1707,14 +1722,16 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
         Tolerance for the early stopping. When the loss is not improving
         by at least tol for ``n_iter_no_change`` iterations (if set to a
         number), the training stops.
+        Values must be in the range `(0.0, inf)`.
 
         .. versionadded:: 0.20
 
     ccp_alpha : non-negative float, default=0.0
         Complexity parameter used for Minimal Cost-Complexity Pruning. The
         subtree with the largest cost complexity that is smaller than
-        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
-        :ref:`minimal_cost_complexity_pruning` for details.
+        ``ccp_alpha`` will be chosen. By default, no pruning is performed.
+        Values must be in the range `[0.0, inf)`.
+        See :ref:`minimal_cost_complexity_pruning` for details.
 
         .. versionadded:: 0.22