From 56aab84c3722e7861e7a48772767313409b83b00 Mon Sep 17 00:00:00 2001 From: Maciej Date: Sun, 6 Sep 2020 19:53:14 +0200 Subject: [PATCH] [SPARK-32310] Add *args to different Params constructors (#515) * Add *args to different Params constructors Resolves #441 --- third_party/3/pyspark/ml/classification.pyi | 11 +++++++++-- third_party/3/pyspark/ml/clustering.pyi | 5 +++++ third_party/3/pyspark/ml/feature.pyi | 10 ++++++++++ third_party/3/pyspark/ml/fpm.pyi | 3 ++- third_party/3/pyspark/ml/recommendation.pyi | 1 + third_party/3/pyspark/ml/regression.pyi | 12 ++++++++++-- third_party/3/pyspark/ml/tuning.pyi | 2 ++ 7 files changed, 39 insertions(+), 5 deletions(-) diff --git a/third_party/3/pyspark/ml/classification.pyi b/third_party/3/pyspark/ml/classification.pyi index 1cf8440c..04302204 100644 --- a/third_party/3/pyspark/ml/classification.pyi +++ b/third_party/3/pyspark/ml/classification.pyi @@ -254,6 +254,7 @@ class _LogisticRegressionParams( upperBoundsOnCoefficients: Param[Matrix] lowerBoundsOnIntercepts: Param[Vector] upperBoundsOnIntercepts: Param[Vector] + def __init__(self, *args: Any): ... def setThreshold(self: P, value: float) -> P: ... def getThreshold(self) -> float: ... def setThresholds(self: P, value: List[float]) -> P: ... @@ -371,7 +372,9 @@ class BinaryLogisticRegressionSummary( class BinaryLogisticRegressionTrainingSummary( BinaryLogisticRegressionSummary, LogisticRegressionTrainingSummary ): ... -class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams): ... + +class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams): + def __init__(self, *args: Any): ... class DecisionTreeClassifier( _JavaProbabilisticClassifier[DecisionTreeClassificationModel], @@ -443,7 +446,8 @@ class DecisionTreeClassificationModel( @property def featureImportances(self) -> Vector: ... -class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams): ... +class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams): + def __init__(self, *args: Any): ... class RandomForestClassifier( _JavaProbabilisticClassifier[RandomForestClassificationModel], @@ -544,6 +548,7 @@ class BinaryRandomForestClassificationTrainingSummary( class _GBTClassifierParams(_GBTParams, _HasVarianceImpurity): supportedLossTypes: List[str] lossType: Param[str] + def __init__(self, *args: Any): ... def getLossType(self) -> str: ... class GBTClassifier( @@ -636,6 +641,7 @@ class GBTClassificationModel( class _NaiveBayesParams(_PredictorParams, HasWeightCol): smoothing: Param[float] modelType: Param[str] + def __init__(self, *args: Any): ... def getSmoothing(self) -> float: ... def getModelType(self) -> str: ... @@ -702,6 +708,7 @@ class _MultilayerPerceptronParams( layers: Param[List[int]] solver: Param[str] initialWeights: Param[Vector] + def __init__(self, *args: Any): ... def getLayers(self) -> List[int]: ... def getInitialWeights(self) -> Vector: ... diff --git a/third_party/3/pyspark/ml/clustering.pyi b/third_party/3/pyspark/ml/clustering.pyi index c0b3563d..e66d1c22 100644 --- a/third_party/3/pyspark/ml/clustering.pyi +++ b/third_party/3/pyspark/ml/clustering.pyi @@ -58,6 +58,7 @@ class _GaussianMixtureParams( HasBlockSize, ): k: Param[int] + def __init__(self, *args: Any): ... def getK(self) -> int: ... class GaussianMixtureModel( @@ -149,6 +150,7 @@ class _KMeansParams( k: Param[int] initMode: Param[str] initSteps: Param[int] + def __init__(self, *args: Any): ... def getK(self) -> int: ... def getInitMode(self) -> str: ... def getInitSteps(self) -> int: ... @@ -219,6 +221,7 @@ class _BisectingKMeansParams( ): k: Param[int] minDivisibleClusterSize: Param[float] + def __init__(self, *args: Any): ... def getK(self) -> int: ... def getMinDivisibleClusterSize(self) -> float: ... @@ -291,6 +294,7 @@ class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval): topicConcentration: Param[float] topicDistributionCol: Param[str] keepLastCheckpoint: Param[bool] + def __init__(self, *args: Any): ... def setK(self, value: int) -> LDA: ... def getOptimizer(self) -> str: ... def getLearningOffset(self) -> float: ... @@ -381,6 +385,7 @@ class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol): initMode: Param[str] srcCol: Param[str] dstCol: Param[str] + def __init__(self, *args: Any): ... def getK(self) -> int: ... def getInitMode(self) -> str: ... def getSrcCol(self) -> str: ... diff --git a/third_party/3/pyspark/ml/feature.pyi b/third_party/3/pyspark/ml/feature.pyi index c22b0d15..7e2b385b 100644 --- a/third_party/3/pyspark/ml/feature.pyi +++ b/third_party/3/pyspark/ml/feature.pyi @@ -80,6 +80,7 @@ class Binarizer( class _LSHParams(HasInputCol, HasOutputCol): numHashTables: Param[int] + def __init__(self, *args: Any): ... def getNumHashTables(self) -> int: ... class _LSH(Generic[JM], JavaEstimator[JM], _LSHParams, JavaMLReadable, JavaMLWritable): @@ -386,6 +387,7 @@ class HashingTF( class _IDFParams(HasInputCol, HasOutputCol): minDocFreq: Param[int] + def __init__(self, *args: Any): ... def getMinDocFreq(self) -> int: ... class IDF(JavaEstimator[IDFModel], _IDFParams, JavaMLReadable[IDF], JavaMLWritable): @@ -558,6 +560,7 @@ class MinHashLSHModel(_LSHModel, JavaMLReadable[MinHashLSHModel], JavaMLWritable class _MinMaxScalerParams(HasInputCol, HasOutputCol): min: Param[float] max: Param[float] + def __init__(self, *args: Any): ... def getMin(self) -> float: ... def getMax(self) -> float: ... @@ -653,6 +656,7 @@ class Normalizer( class _OneHotEncoderParams(HasInputCols, HasOutputCols, HasHandleInvalid): handleInvalid: Param[str] dropLast: Param[bool] + def __init__(self, *args: Any): ... def getDropLast(self) -> bool: ... class OneHotEncoder( @@ -813,6 +817,7 @@ class _RobustScalerParams(HasInputCol, HasOutputCol, HasRelativeError): upper: Param[float] withCentering: Param[bool] withScaling: Param[bool] + def __init__(self, *args: Any): ... def getLower(self) -> float: ... def getUpper(self) -> float: ... def getWithCentering(self) -> bool: ... @@ -913,6 +918,7 @@ class SQLTransformer(JavaTransformer, JavaMLReadable[SQLTransformer], JavaMLWrit class _StandardScalerParams(HasInputCol, HasOutputCol): withMean: Param[bool] withStd: Param[bool] + def __init__(self, *args: Any): ... def getWithMean(self) -> bool: ... def getWithStd(self) -> bool: ... @@ -1178,6 +1184,7 @@ class VectorAssembler( class _VectorIndexerParams(HasInputCol, HasOutputCol, HasHandleInvalid): maxCategories: Param[int] handleInvalid: Param[str] + def __init__(self, *args: Any): ... def getMaxCategories(self) -> int: ... class VectorIndexer( @@ -1256,6 +1263,7 @@ class _Word2VecParams(HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCo minCount: Param[int] windowSize: Param[int] maxSentenceLength: Param[int] + def __init__(self, *args: Any): ... def getVectorSize(self) -> int: ... def getNumPartitions(self) -> int: ... def getMinCount(self) -> int: ... @@ -1358,6 +1366,7 @@ class _RFormulaParams(HasFeaturesCol, HasLabelCol, HasHandleInvalid): forceIndexLabel: Param[bool] stringIndexerOrderType: Param[str] handleInvalid: Param[str] + def __init__(self, *args: Any): ... def getFormula(self) -> str: ... def getForceIndexLabel(self) -> bool: ... def getStringIndexerOrderType(self) -> str: ... @@ -1406,6 +1415,7 @@ class _SelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol): fpr: Param[float] fdr: Param[float] fwe: Param[float] + def __init__(self, *args: Any): ... def getSelectorType(self) -> str: ... def getNumTopFeatures(self) -> int: ... def getPercentile(self) -> float: ... diff --git a/third_party/3/pyspark/ml/fpm.pyi b/third_party/3/pyspark/ml/fpm.pyi index 6171c8eb..d17d31ec 100644 --- a/third_party/3/pyspark/ml/fpm.pyi +++ b/third_party/3/pyspark/ml/fpm.pyi @@ -19,7 +19,7 @@ # Stubs for pyspark.ml.base (Python 3) # -from typing import Optional +from typing import Any, Optional from pyspark.ml._typing import P from pyspark.ml.util import * @@ -32,6 +32,7 @@ class _FPGrowthParams(HasPredictionCol): minSupport: Param[float] numPartitions: Param[int] minConfidence: Param[float] + def __init__(self, *args: Any): ... def getItemsCol(self) -> str: ... def getMinSupport(self) -> float: ... def getNumPartitions(self) -> int: ... diff --git a/third_party/3/pyspark/ml/recommendation.pyi b/third_party/3/pyspark/ml/recommendation.pyi index f71655ef..4d523999 100644 --- a/third_party/3/pyspark/ml/recommendation.pyi +++ b/third_party/3/pyspark/ml/recommendation.pyi @@ -46,6 +46,7 @@ class _ALSParams( nonnegative: Param[bool] intermediateStorageLevel: Param[str] finalStorageLevel: Param[str] + def __init__(self, *args: Any): ... def getRank(self) -> int: ... def getNumUserBlocks(self) -> int: ... def getNumItemBlocks(self) -> int: ... diff --git a/third_party/3/pyspark/ml/regression.pyi b/third_party/3/pyspark/ml/regression.pyi index 078ccab0..5464ebdb 100644 --- a/third_party/3/pyspark/ml/regression.pyi +++ b/third_party/3/pyspark/ml/regression.pyi @@ -76,6 +76,7 @@ class _LinearRegressionParams( solver: Param[str] loss: Param[str] epsilon: Param[float] + def __init__(self, *args: Any): ... def getEpsilon(self) -> float: ... class LinearRegression( @@ -251,7 +252,8 @@ class IsotonicRegressionModel( class _DecisionTreeRegressorParams( _DecisionTreeParams, _TreeRegressorParams, HasVarianceCol -): ... +): + def __init__(self, *args: Any): ... class DecisionTreeRegressor( _JavaRegressor[DecisionTreeRegressionModel], @@ -323,7 +325,8 @@ class DecisionTreeRegressionModel( @property def featureImportances(self) -> Vector: ... -class _RandomForestRegressorParams(_RandomForestParams, _TreeRegressorParams): ... +class _RandomForestRegressorParams(_RandomForestParams, _TreeRegressorParams): + def __init__(self, *args: Any): ... class RandomForestRegressor( _JavaRegressor[RandomForestRegressionModel], @@ -406,6 +409,7 @@ class RandomForestRegressionModel( class _GBTRegressorParams(_GBTParams, _TreeRegressorParams): supportedLossTypes: List[str] lossType: Param[str] + def __init__(self, *args: Any): ... def getLossType(self) -> str: ... class GBTRegressor( @@ -508,6 +512,7 @@ class _AFTSurvivalRegressionParams( censorCol: Param[str] quantileProbabilities: Param[List[float]] quantilesCol: Param[str] + def __init__(self, *args: Any): ... def getCensorCol(self) -> str: ... def getQuantileProbabilities(self) -> List[float]: ... def getQuantilesCol(self) -> str: ... @@ -593,6 +598,7 @@ class _GeneralizedLinearRegressionParams( linkPower: Param[float] solver: Param[str] offsetCol: Param[str] + def __init__(self, *args: Any): ... def getFamily(self) -> str: ... def getLinkPredictionCol(self) -> str: ... def getLink(self) -> str: ... @@ -722,12 +728,14 @@ class _FactorizationMachinesParams( HasSeed, HasFitIntercept, HasRegParam, + HasWeightCol, ): factorSize: Param[int] fitLinear: Param[bool] miniBatchFraction: Param[float] initStd: Param[float] solver: Param[str] + def __init__(self, *args: Any): ... def getFactorSize(self): ... def getFitLinear(self): ... def getMiniBatchFraction(self): ... diff --git a/third_party/3/pyspark/ml/tuning.pyi b/third_party/3/pyspark/ml/tuning.pyi index 78b359b0..ff11f992 100644 --- a/third_party/3/pyspark/ml/tuning.pyi +++ b/third_party/3/pyspark/ml/tuning.pyi @@ -48,6 +48,7 @@ class _ValidatorParams(HasSeed): class _CrossValidatorParams(_ValidatorParams): numFolds: Param[int] foldCol: Param[str] + def __init__(self, *args: Any): ... def getNumFolds(self) -> int: ... def getFoldCol(self) -> str: ... @@ -115,6 +116,7 @@ class CrossValidatorModel( class _TrainValidationSplitParams(_ValidatorParams): trainRatio: Param[float] + def __init__(self, *args: Any): ... def getTrainRatio(self) -> float: ... class TrainValidationSplit(