From 56aab84c3722e7861e7a48772767313409b83b00 Mon Sep 17 00:00:00 2001
From: Maciej <zero323@users.noreply.github.com>
Date: Sun, 6 Sep 2020 19:53:14 +0200
Subject: [PATCH] [SPARK-32310] Add *args to different Params constructors
 (#515)

* Add *args to different Params constructors

Resolves #441
---
 third_party/3/pyspark/ml/classification.pyi | 11 +++++++++--
 third_party/3/pyspark/ml/clustering.pyi     |  5 +++++
 third_party/3/pyspark/ml/feature.pyi        | 10 ++++++++++
 third_party/3/pyspark/ml/fpm.pyi            |  3 ++-
 third_party/3/pyspark/ml/recommendation.pyi |  1 +
 third_party/3/pyspark/ml/regression.pyi     | 12 ++++++++++--
 third_party/3/pyspark/ml/tuning.pyi         |  2 ++
 7 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/third_party/3/pyspark/ml/classification.pyi b/third_party/3/pyspark/ml/classification.pyi
index 1cf8440c..04302204 100644
--- a/third_party/3/pyspark/ml/classification.pyi
+++ b/third_party/3/pyspark/ml/classification.pyi
@@ -254,6 +254,7 @@ class _LogisticRegressionParams(
     upperBoundsOnCoefficients: Param[Matrix]
     lowerBoundsOnIntercepts: Param[Vector]
     upperBoundsOnIntercepts: Param[Vector]
+    def __init__(self, *args: Any): ...
     def setThreshold(self: P, value: float) -> P: ...
     def getThreshold(self) -> float: ...
     def setThresholds(self: P, value: List[float]) -> P: ...
@@ -371,7 +372,9 @@ class BinaryLogisticRegressionSummary(
 class BinaryLogisticRegressionTrainingSummary(
     BinaryLogisticRegressionSummary, LogisticRegressionTrainingSummary
 ): ...
-class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams): ...
+
+class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams):
+    def __init__(self, *args: Any): ...
 
 class DecisionTreeClassifier(
     _JavaProbabilisticClassifier[DecisionTreeClassificationModel],
@@ -443,7 +446,8 @@ class DecisionTreeClassificationModel(
     @property
     def featureImportances(self) -> Vector: ...
 
-class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams): ...
+class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams):
+    def __init__(self, *args: Any): ...
 
 class RandomForestClassifier(
     _JavaProbabilisticClassifier[RandomForestClassificationModel],
@@ -544,6 +548,7 @@ class BinaryRandomForestClassificationTrainingSummary(
 class _GBTClassifierParams(_GBTParams, _HasVarianceImpurity):
     supportedLossTypes: List[str]
     lossType: Param[str]
+    def __init__(self, *args: Any): ...
     def getLossType(self) -> str: ...
 
 class GBTClassifier(
@@ -636,6 +641,7 @@ class GBTClassificationModel(
 class _NaiveBayesParams(_PredictorParams, HasWeightCol):
     smoothing: Param[float]
     modelType: Param[str]
+    def __init__(self, *args: Any): ...
     def getSmoothing(self) -> float: ...
     def getModelType(self) -> str: ...
 
@@ -702,6 +708,7 @@ class _MultilayerPerceptronParams(
     layers: Param[List[int]]
     solver: Param[str]
     initialWeights: Param[Vector]
+    def __init__(self, *args: Any): ...
     def getLayers(self) -> List[int]: ...
     def getInitialWeights(self) -> Vector: ...
 
diff --git a/third_party/3/pyspark/ml/clustering.pyi b/third_party/3/pyspark/ml/clustering.pyi
index c0b3563d..e66d1c22 100644
--- a/third_party/3/pyspark/ml/clustering.pyi
+++ b/third_party/3/pyspark/ml/clustering.pyi
@@ -58,6 +58,7 @@ class _GaussianMixtureParams(
     HasBlockSize,
 ):
     k: Param[int]
+    def __init__(self, *args: Any): ...
     def getK(self) -> int: ...
 
 class GaussianMixtureModel(
@@ -149,6 +150,7 @@ class _KMeansParams(
     k: Param[int]
     initMode: Param[str]
     initSteps: Param[int]
+    def __init__(self, *args: Any): ...
     def getK(self) -> int: ...
     def getInitMode(self) -> str: ...
     def getInitSteps(self) -> int: ...
@@ -219,6 +221,7 @@ class _BisectingKMeansParams(
 ):
     k: Param[int]
     minDivisibleClusterSize: Param[float]
+    def __init__(self, *args: Any): ...
     def getK(self) -> int: ...
     def getMinDivisibleClusterSize(self) -> float: ...
 
@@ -291,6 +294,7 @@ class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
     topicConcentration: Param[float]
     topicDistributionCol: Param[str]
     keepLastCheckpoint: Param[bool]
+    def __init__(self, *args: Any): ...
     def setK(self, value: int) -> LDA: ...
     def getOptimizer(self) -> str: ...
     def getLearningOffset(self) -> float: ...
@@ -381,6 +385,7 @@ class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol):
     initMode: Param[str]
     srcCol: Param[str]
     dstCol: Param[str]
+    def __init__(self, *args: Any): ...
     def getK(self) -> int: ...
     def getInitMode(self) -> str: ...
     def getSrcCol(self) -> str: ...
diff --git a/third_party/3/pyspark/ml/feature.pyi b/third_party/3/pyspark/ml/feature.pyi
index c22b0d15..7e2b385b 100644
--- a/third_party/3/pyspark/ml/feature.pyi
+++ b/third_party/3/pyspark/ml/feature.pyi
@@ -80,6 +80,7 @@ class Binarizer(
 
 class _LSHParams(HasInputCol, HasOutputCol):
     numHashTables: Param[int]
+    def __init__(self, *args: Any): ...
     def getNumHashTables(self) -> int: ...
 
 class _LSH(Generic[JM], JavaEstimator[JM], _LSHParams, JavaMLReadable, JavaMLWritable):
@@ -386,6 +387,7 @@ class HashingTF(
 
 class _IDFParams(HasInputCol, HasOutputCol):
     minDocFreq: Param[int]
+    def __init__(self, *args: Any): ...
     def getMinDocFreq(self) -> int: ...
 
 class IDF(JavaEstimator[IDFModel], _IDFParams, JavaMLReadable[IDF], JavaMLWritable):
@@ -558,6 +560,7 @@ class MinHashLSHModel(_LSHModel, JavaMLReadable[MinHashLSHModel], JavaMLWritable
 class _MinMaxScalerParams(HasInputCol, HasOutputCol):
     min: Param[float]
     max: Param[float]
+    def __init__(self, *args: Any): ...
     def getMin(self) -> float: ...
     def getMax(self) -> float: ...
 
@@ -653,6 +656,7 @@ class Normalizer(
 class _OneHotEncoderParams(HasInputCols, HasOutputCols, HasHandleInvalid):
     handleInvalid: Param[str]
     dropLast: Param[bool]
+    def __init__(self, *args: Any): ...
     def getDropLast(self) -> bool: ...
 
 class OneHotEncoder(
@@ -813,6 +817,7 @@ class _RobustScalerParams(HasInputCol, HasOutputCol, HasRelativeError):
     upper: Param[float]
     withCentering: Param[bool]
     withScaling: Param[bool]
+    def __init__(self, *args: Any): ...
     def getLower(self) -> float: ...
     def getUpper(self) -> float: ...
     def getWithCentering(self) -> bool: ...
@@ -913,6 +918,7 @@ class SQLTransformer(JavaTransformer, JavaMLReadable[SQLTransformer], JavaMLWrit
 class _StandardScalerParams(HasInputCol, HasOutputCol):
     withMean: Param[bool]
     withStd: Param[bool]
+    def __init__(self, *args: Any): ...
     def getWithMean(self) -> bool: ...
     def getWithStd(self) -> bool: ...
 
@@ -1178,6 +1184,7 @@ class VectorAssembler(
 class _VectorIndexerParams(HasInputCol, HasOutputCol, HasHandleInvalid):
     maxCategories: Param[int]
     handleInvalid: Param[str]
+    def __init__(self, *args: Any): ...
     def getMaxCategories(self) -> int: ...
 
 class VectorIndexer(
@@ -1256,6 +1263,7 @@ class _Word2VecParams(HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCo
     minCount: Param[int]
     windowSize: Param[int]
     maxSentenceLength: Param[int]
+    def __init__(self, *args: Any): ...
     def getVectorSize(self) -> int: ...
     def getNumPartitions(self) -> int: ...
     def getMinCount(self) -> int: ...
@@ -1358,6 +1366,7 @@ class _RFormulaParams(HasFeaturesCol, HasLabelCol, HasHandleInvalid):
     forceIndexLabel: Param[bool]
     stringIndexerOrderType: Param[str]
     handleInvalid: Param[str]
+    def __init__(self, *args: Any): ...
     def getFormula(self) -> str: ...
     def getForceIndexLabel(self) -> bool: ...
     def getStringIndexerOrderType(self) -> str: ...
@@ -1406,6 +1415,7 @@ class _SelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol):
     fpr: Param[float]
     fdr: Param[float]
     fwe: Param[float]
+    def __init__(self, *args: Any): ...
     def getSelectorType(self) -> str: ...
     def getNumTopFeatures(self) -> int: ...
     def getPercentile(self) -> float: ...
diff --git a/third_party/3/pyspark/ml/fpm.pyi b/third_party/3/pyspark/ml/fpm.pyi
index 6171c8eb..d17d31ec 100644
--- a/third_party/3/pyspark/ml/fpm.pyi
+++ b/third_party/3/pyspark/ml/fpm.pyi
@@ -19,7 +19,7 @@
 # Stubs for pyspark.ml.base (Python 3)
 #
 
-from typing import Optional
+from typing import Any, Optional
 
 from pyspark.ml._typing import P
 from pyspark.ml.util import *
@@ -32,6 +32,7 @@ class _FPGrowthParams(HasPredictionCol):
     minSupport: Param[float]
     numPartitions: Param[int]
     minConfidence: Param[float]
+    def __init__(self, *args: Any): ...
     def getItemsCol(self) -> str: ...
     def getMinSupport(self) -> float: ...
     def getNumPartitions(self) -> int: ...
diff --git a/third_party/3/pyspark/ml/recommendation.pyi b/third_party/3/pyspark/ml/recommendation.pyi
index f71655ef..4d523999 100644
--- a/third_party/3/pyspark/ml/recommendation.pyi
+++ b/third_party/3/pyspark/ml/recommendation.pyi
@@ -46,6 +46,7 @@ class _ALSParams(
     nonnegative: Param[bool]
     intermediateStorageLevel: Param[str]
     finalStorageLevel: Param[str]
+    def __init__(self, *args: Any): ...
     def getRank(self) -> int: ...
     def getNumUserBlocks(self) -> int: ...
     def getNumItemBlocks(self) -> int: ...
diff --git a/third_party/3/pyspark/ml/regression.pyi b/third_party/3/pyspark/ml/regression.pyi
index 078ccab0..5464ebdb 100644
--- a/third_party/3/pyspark/ml/regression.pyi
+++ b/third_party/3/pyspark/ml/regression.pyi
@@ -76,6 +76,7 @@ class _LinearRegressionParams(
     solver: Param[str]
     loss: Param[str]
     epsilon: Param[float]
+    def __init__(self, *args: Any): ...
     def getEpsilon(self) -> float: ...
 
 class LinearRegression(
@@ -251,7 +252,8 @@ class IsotonicRegressionModel(
 
 class _DecisionTreeRegressorParams(
     _DecisionTreeParams, _TreeRegressorParams, HasVarianceCol
-): ...
+):
+    def __init__(self, *args: Any): ...
 
 class DecisionTreeRegressor(
     _JavaRegressor[DecisionTreeRegressionModel],
@@ -323,7 +325,8 @@ class DecisionTreeRegressionModel(
     @property
     def featureImportances(self) -> Vector: ...
 
-class _RandomForestRegressorParams(_RandomForestParams, _TreeRegressorParams): ...
+class _RandomForestRegressorParams(_RandomForestParams, _TreeRegressorParams):
+    def __init__(self, *args: Any): ...
 
 class RandomForestRegressor(
     _JavaRegressor[RandomForestRegressionModel],
@@ -406,6 +409,7 @@ class RandomForestRegressionModel(
 class _GBTRegressorParams(_GBTParams, _TreeRegressorParams):
     supportedLossTypes: List[str]
     lossType: Param[str]
+    def __init__(self, *args: Any): ...
     def getLossType(self) -> str: ...
 
 class GBTRegressor(
@@ -508,6 +512,7 @@ class _AFTSurvivalRegressionParams(
     censorCol: Param[str]
     quantileProbabilities: Param[List[float]]
     quantilesCol: Param[str]
+    def __init__(self, *args: Any): ...
     def getCensorCol(self) -> str: ...
     def getQuantileProbabilities(self) -> List[float]: ...
     def getQuantilesCol(self) -> str: ...
@@ -593,6 +598,7 @@ class _GeneralizedLinearRegressionParams(
     linkPower: Param[float]
     solver: Param[str]
     offsetCol: Param[str]
+    def __init__(self, *args: Any): ...
     def getFamily(self) -> str: ...
     def getLinkPredictionCol(self) -> str: ...
     def getLink(self) -> str: ...
@@ -722,12 +728,14 @@ class _FactorizationMachinesParams(
     HasSeed,
     HasFitIntercept,
     HasRegParam,
+    HasWeightCol,
 ):
     factorSize: Param[int]
     fitLinear: Param[bool]
     miniBatchFraction: Param[float]
     initStd: Param[float]
     solver: Param[str]
+    def __init__(self, *args: Any): ...
     def getFactorSize(self): ...
     def getFitLinear(self): ...
     def getMiniBatchFraction(self): ...
diff --git a/third_party/3/pyspark/ml/tuning.pyi b/third_party/3/pyspark/ml/tuning.pyi
index 78b359b0..ff11f992 100644
--- a/third_party/3/pyspark/ml/tuning.pyi
+++ b/third_party/3/pyspark/ml/tuning.pyi
@@ -48,6 +48,7 @@ class _ValidatorParams(HasSeed):
 class _CrossValidatorParams(_ValidatorParams):
     numFolds: Param[int]
     foldCol: Param[str]
+    def __init__(self, *args: Any): ...
     def getNumFolds(self) -> int: ...
     def getFoldCol(self) -> str: ...
 
@@ -115,6 +116,7 @@ class CrossValidatorModel(
 
 class _TrainValidationSplitParams(_ValidatorParams):
     trainRatio: Param[float]
+    def __init__(self, *args: Any): ...
     def getTrainRatio(self) -> float: ...
 
 class TrainValidationSplit(