fix more errors resulting from removing moments calculation

salesforce · TuanNguyen27 · Sep 25, 2019 · Sep 25, 2019 · Sep 25, 2019 · Sep 25, 2019
commit bb858c7b1ee7b2585b6cbe05664663ca2b3397e9
@@ -167,15 +167,14 @@ class ModelInsightsTest extends FlatSpec with PassengerSparkFixtureTest with Dou
  }
 
  def getFeatureMomentsAndCard(inputModel: FeatureLike[Prediction],
- DF: DataFrame): (Map[String, Moments], Map[String, TextStats]) = {
+ DF: DataFrame): Map[String, Moments] = {
  lazy val workFlow = new OpWorkflow().setResultFeatures(inputModel).setInputDataset(DF)
  lazy val dummyReader = workFlow.getReader()
  lazy val workFlowRFF = workFlow.withRawFeatureFilter(Some(dummyReader), None)
  lazy val model = workFlowRFF.train()
  val insights = model.modelInsights(inputModel)
- val featureMoments = insights.features.map(f => f.featureName -> f.distributions.head.moments.get).toMap
  val featureCardinality = insights.features.map(f => f.featureName -> f.distributions.head.cardEstimate.get).toMap
- return (featureMoments, featureCardinality)
+ return featureCardinality
  }
 
  val params = new OpParams()
@@ -777,23 +776,15 @@ class ModelInsightsTest extends FlatSpec with PassengerSparkFixtureTest with Dou
  absError2 should be < tol * smallCoeffSum / 2
  }
 
- it should "correctly return moments calculation and cardinality calculation for numeric features" in {
+ it should "correctly return cardinality calculation for numeric features" in {
 
  import spark.implicits._
  val df = linRegDF._3
  val meanTol = 0.01
  val varTol = 0.01
- val (moments, cardinality) = getFeatureMomentsAndCard(standardizedLinpred, linRegDF._3)
+ val cardinality = getFeatureMomentsAndCard(standardizedLinpred, linRegDF._3)
 
  // Go through each feature and check that the mean, variance, and unique counts match the data
- moments.foreach { case (featureName, value) => {
- value.count shouldBe 1000
- val (expectedMean, expectedVariance) =
- df.select(avg(featureName), variance(featureName)).as[(Double, Double)].collect().head
- math.abs((value.mean - expectedMean) / expectedMean) < meanTol shouldBe true
- math.abs((value.variance - expectedVariance) / expectedVariance) < varTol shouldBe true
- }
- }
 
  cardinality.foreach { case (featureName, value) => {
  val actualUniques = df.select(featureName).as[Double].collect().toSet

@@ -202,7 +202,7 @@ class FeatureDistributionTest extends FlatSpec with PassengerSparkFixtureTest wi
  it should "marshall to/from json" in {
  val fd1 = FeatureDistribution("A", None, 10, 1, Array(1, 4, 0, 0, 6), Array.empty)
  val fd2 = FeatureDistribution("A", None, 10, 1, Array(1, 4, 0, 0, 6),
- Array.empty, Some(TextStats(Map("foo" -> 1, "bar" ->2))),
+ Array.empty, Some("String"), Some(TextStats(Map("foo" -> 1, "bar" ->2))),
  FeatureDistributionType.Scoring)
  val json = FeatureDistribution.toJson(Array(fd1, fd2))
  FeatureDistribution.fromJson(json) match {