Skip to content

Commit

Permalink
Turn off logging for cardEstimate & track token length instead of tok…
Browse files Browse the repository at this point in the history
…en value (#416)
  • Loading branch information
TuanNguyen27 committed Oct 10, 2019
1 parent 53dd954 commit e0ea2ed
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,6 @@ case class FeatureDistribution
"nulls" -> nulls.toString,
"distribution" -> distribution.mkString("[", ",", "]"),
"summaryInfo" -> summaryInfo.mkString("[", ",", "]"),
"cardinality" -> cardEstimate.map(_.toString).getOrElse(""),
"moments" -> moments.map(_.toString).getOrElse("")
).map { case (n, v) => s"$n = $v" }.mkString(", ")

Expand Down Expand Up @@ -279,7 +278,7 @@ object FeatureDistribution {
*/
private def cardinalityValues(values: ProcessedSeq): TextStats = {
val population = values match {
case Left(seq) => seq
case Left(seq) => seq.map(_.size.toString)
case Right(seq) => seq.map(_.toString)
}
TextStats(population.groupBy(identity).map{case (key, value) => (key, value.size)})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class FeatureDistributionTest extends FlatSpec with PassengerSparkFixtureTest wi
distribs(3).distribution.sum shouldBe 0
distribs(4).distribution.sum shouldBe 3
distribs(4).summaryInfo.length shouldBe bins
distribs(2).cardEstimate.get shouldBe TextStats(Map("male" -> 1, "female" -> 1))
distribs(2).cardEstimate.get shouldBe TextStats(Map("4" -> 1, "6" -> 1))
distribs(2).moments.get shouldBe Moments(2, 5.0, 2.0, 0.0, 2.0)
distribs(4).cardEstimate.get shouldBe TextStats(Map("5.0" -> 1, "1.0" -> 1, "3.0" -> 1))
distribs(4).moments.get shouldBe Moments(3, 3.0, 8.0, 0.0, 32.0)
Expand Down Expand Up @@ -196,7 +196,7 @@ class FeatureDistributionTest extends FlatSpec with PassengerSparkFixtureTest wi
it should "have toString" in {
FeatureDistribution("A", None, 10, 1, Array(1, 4, 0, 0, 6), Array.empty).toString() shouldBe
"FeatureDistribution(type = Training, name = A, key = None, count = 10, nulls = 1, " +
"distribution = [1.0,4.0,0.0,0.0,6.0], summaryInfo = [], cardinality = , moments = )"
"distribution = [1.0,4.0,0.0,0.0,6.0], summaryInfo = [], moments = )"
}

it should "marshall to/from json" in {
Expand Down

0 comments on commit e0ea2ed

Please sign in to comment.