Skip to content

Commit

Permalink
Revert "Turn off logging for cardEstimate & track token length instea…
Browse files Browse the repository at this point in the history
…d of token value (#416)" (#419)
  • Loading branch information
leahmcguire authored and tovbinm committed Oct 11, 2019
1 parent e0ea2ed commit 6589656
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ case class FeatureDistribution
"nulls" -> nulls.toString,
"distribution" -> distribution.mkString("[", ",", "]"),
"summaryInfo" -> summaryInfo.mkString("[", ",", "]"),
"cardinality" -> cardEstimate.map(_.toString).getOrElse(""),
"moments" -> moments.map(_.toString).getOrElse("")
).map { case (n, v) => s"$n = $v" }.mkString(", ")

Expand Down Expand Up @@ -278,7 +279,7 @@ object FeatureDistribution {
*/
private def cardinalityValues(values: ProcessedSeq): TextStats = {
val population = values match {
case Left(seq) => seq.map(_.size.toString)
case Left(seq) => seq
case Right(seq) => seq.map(_.toString)
}
TextStats(population.groupBy(identity).map{case (key, value) => (key, value.size)})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class FeatureDistributionTest extends FlatSpec with PassengerSparkFixtureTest wi
distribs(3).distribution.sum shouldBe 0
distribs(4).distribution.sum shouldBe 3
distribs(4).summaryInfo.length shouldBe bins
distribs(2).cardEstimate.get shouldBe TextStats(Map("4" -> 1, "6" -> 1))
distribs(2).cardEstimate.get shouldBe TextStats(Map("male" -> 1, "female" -> 1))
distribs(2).moments.get shouldBe Moments(2, 5.0, 2.0, 0.0, 2.0)
distribs(4).cardEstimate.get shouldBe TextStats(Map("5.0" -> 1, "1.0" -> 1, "3.0" -> 1))
distribs(4).moments.get shouldBe Moments(3, 3.0, 8.0, 0.0, 32.0)
Expand Down Expand Up @@ -196,7 +196,7 @@ class FeatureDistributionTest extends FlatSpec with PassengerSparkFixtureTest wi
it should "have toString" in {
FeatureDistribution("A", None, 10, 1, Array(1, 4, 0, 0, 6), Array.empty).toString() shouldBe
"FeatureDistribution(type = Training, name = A, key = None, count = 10, nulls = 1, " +
"distribution = [1.0,4.0,0.0,0.0,6.0], summaryInfo = [], moments = )"
"distribution = [1.0,4.0,0.0,0.0,6.0], summaryInfo = [], cardinality = , moments = )"
}

it should "marshall to/from json" in {
Expand Down

0 comments on commit 6589656

Please sign in to comment.