salesforce · tovbinm · Sep 14, 2018 · Sep 7, 2018 · Sep 7, 2018 · Sep 7, 2018
diff --git a/core/src/main/scala/com/salesforce/op/evaluators/OpBinaryClassifyBinEvaluator.scala b/core/src/main/scala/com/salesforce/op/evaluators/OpBinaryClassifyBinEvaluator.scala
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2017, Salesforce.com, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * * Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package com.salesforce.op.evaluators
+
+import com.salesforce.op.UID
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.types.DoubleType
+import org.slf4j.LoggerFactory
+import org.apache.spark.Partitioner
+
+/**
+ *
+ * Evaluator for Binary Classification which provides statistics about the predicted scores.
+ * This evaluator creates the specified number of bins and computes the statistics for each bin
+ * and returns BinaryClassificationBinMetrics, which contains
+ *
+ * Total number of data points per bin
+ * Average Score per bin
+ * Average Conversion rate per bin
+ * Bin Centers for each bin
+ * BrierScore for the overall dataset is also computed, which is a default metric as well.
+ *
+ * @param name name of default metric
+ * @param isLargerBetter is metric better if larger
+ * @param uid uid for instance
+ */
+private[op] class OpBinaryClassifyBinEvaluator
+(
+ override val name: EvalMetric = OpEvaluatorNames.Binary,
+ override val isLargerBetter: Boolean = true,
+ override val uid: String = UID[BinaryClassificationBinMetrics],
+ val numBins: Int = 100
+) extends OpBinaryClassificationEvaluatorBase[BinaryClassificationBinMetrics](uid = uid) {
+
+ @transient private lazy val log = LoggerFactory.getLogger(this.getClass)
+
+ def getDefaultMetric: BinaryClassificationBinMetrics => Double = _.BrierScore
+
+ override def evaluateAll(data: Dataset[_]): BinaryClassificationBinMetrics = {
+ val labelColName = getLabelCol
+ val dataUse = makeDataToUse(data, labelColName)
+
+ val (rawPredictionColName, predictionColName, probabilityColName) =
+ (getRawPredictionCol, getPredictionValueCol, getProbabilityCol)
+ log.debug(
+ "Evaluating metrics on columns :\n label : {}\n rawPrediction : {}\n prediction : {}\n probability : {}\n",
+ labelColName, rawPredictionColName, predictionColName, probabilityColName
+ )
+
+ import dataUse.sparkSession.implicits._
+ val rdd = dataUse.select(predictionColName, labelColName).as[(Double, Double)].rdd
+
+ if (rdd.isEmpty()) {
+ log.error("The dataset is empty")
+ BinaryClassificationBinMetrics(0.0, Seq.empty[Double], Seq.empty[Long], Seq.empty[Double], Seq.empty[Double])
+ } else {
+ val scoreAndLabels =
+ dataUse.select(col(probabilityColName), col(labelColName).cast(DoubleType)).rdd.map {
+ case Row(prob: Vector, label: Double) => (prob(1), label)
+ case Row(prob: Double, label: Double) => (prob, label)
+ }
+
+ if (numBins == 0) {
+ log.error("numBins is set to 0. Returning empty metrics")
+ BinaryClassificationBinMetrics(0.0, Seq.empty[Double], Seq.empty[Long], Seq.empty[Double], Seq.empty[Double])
+ } else {
+ // Find the significant digit to which the scores needs to be rounded, based of numBins.
+ val significantDigitToRoundOff = math.log10(numBins).toInt + 1
+ val scoreAndLabelsRounded = for {i <- scoreAndLabels}
+ yield (BigDecimal(i._1).setScale(significantDigitToRoundOff,
+ BigDecimal.RoundingMode.HALF_UP).toDouble, (i._1, i._2))
+
+ // Create `numBins` bins and place each score in its corresponding bin.
+ val binnedValues = scoreAndLabelsRounded.partitionBy(new OpBinPartitioner(numBins)).values
+
+ // compute the average score per bin
+ val averageScore = binnedValues.mapPartitions(scores => {
+ val (totalScore, count) = scores.foldLeft(0.0, 0)(
+ (r: (Double, Int), s: (Double, Double)) => (r._1 + s._1, r._2 + 1))
+ Iterator(if (count == 0) 0.0 else totalScore / count)
+ }).collect().toSeq
+
+ // compute the average conversion rate per bin. Convertion rate is the number of 1's in labels.
+ val averageConvertionRate = binnedValues.mapPartitions(scores => {
+ val (totalConversion, count) = scores.foldLeft(0.0, 0)(
+ (r: (Double, Int), s: (Double, Double)) => (r._1 + s._2, r._2 + 1))
+ Iterator(if (count == 0) 0.0 else totalConversion / count)
+ }).collect().toSeq
+
+ // compute total number of data points in each bin.
+ val numberOfDataPoints = binnedValues.mapPartitions(scores => Iterator(scores.length.toLong)).collect().toSeq
+
+ // binCenters is the center point in each bin.
+ // e.g., for bins [(0.0 - 0.5), (0.5 - 1.0)], bin centers are [0.25, 0.75].
+ val binCenters = (for {i <- 0 to numBins} yield ((i + 0.5) / numBins)).dropRight(1)
+
+ // brier score of entire dataset.
+ val brierScore = scoreAndLabels.map { case (score, label) => math.pow((score - label), 2) }.mean()
+
+ val metrics = BinaryClassificationBinMetrics(
+ BrierScore = brierScore,
+ BinCenters = binCenters,
+ NumberOfDataPoints = numberOfDataPoints,
+ AverageScore = averageScore,
+ AverageConversionRate = averageConvertionRate
+ )
+
+ log.info("Evaluated metrics: {}", metrics.toString)
+ metrics
+ }
+ }
+ }
+}
+
+// BinPartitioner which partition the bins.
+class OpBinPartitioner(override val numPartitions: Int) extends Partitioner {
+
+ // computes the bin number(0-indexed) to which the datapoint is assigned.
+ // For Score 1.0, overflow happens. So, use math.min(last_bin, bin_index__computed).
+ def getPartition(key: Any): Int = key match {
+ case score: Double => math.min(numPartitions - 1, (score * numPartitions).toInt)
+ }
+}
+
+/**
+ * Metrics of BinaryClassificationBinMetrics
+ *
+ * @param BinCenters center of each bin
+ * @param NumberOfDataPoints total number of data points in each bin
+ * @param AverageScore average score in each bin
+ * @param AverageConversionRate average conversion rate in each bin
+ * @param BrierScore brier score for overall dataset
+ */
+case class BinaryClassificationBinMetrics
+(
+ BrierScore: Double,
+ BinCenters: Seq[Double],
+ NumberOfDataPoints: Seq[Long],
+ AverageScore: Seq[Double],
+ AverageConversionRate: Seq[Double]
+) extends EvaluationMetrics
diff --git a/core/src/test/scala/com/salesforce/op/evaluators/OpBinaryClassifyBinEvaluatorTest.scala b/core/src/test/scala/com/salesforce/op/evaluators/OpBinaryClassifyBinEvaluatorTest.scala
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2017, Salesforce.com, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * * Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package com.salesforce.op.evaluators
+
+import com.salesforce.op.test.TestSparkContext
+import org.junit.runner.RunWith
+import org.scalatest.FlatSpec
+import org.scalatest.junit.JUnitRunner
+
+@RunWith(classOf[JUnitRunner])
+class OpBinaryClassifyBinEvaluatorTest extends FlatSpec with TestSparkContext {
+
+ val labelName = "label"
+ val predictionLabel = "pred"
+
+ val dataset_test = Seq(
+ (Map("probability_1" -> 0.99999, "probability_0" -> 0.0001, "prediction" -> 1.0), 1.0),
+ (Map("probability_1" -> 0.99999, "probability_0" -> 0.0001, "prediction" -> 1.0), 1.0),
+ (Map("probability_1" -> 0.00541, "probability_0" -> 0.99560, "prediction" -> 1.0), 0.0),
+ (Map("probability_1" -> 0.70, "probability_0" -> 0.30, "prediction" -> 1.0), 0.0),
+ (Map("probability_1" -> 0.001, "probability_0" -> 0.999, "prediction" -> 0.0), 0.0)
+ )
+
+ val dataset_skewed = Seq(
+ (Map("probability_1" -> 0.99999, "probability_0" -> 0.0001, "prediction" -> 1.0), 1.0),
+ (Map("probability_1" -> 0.99999, "probability_0" -> 0.0001, "prediction" -> 1.0), 1.0),
+ (Map("probability_1" -> 0.9987, "probability_0" -> 0.001, "prediction" -> 1.0), 1.0),
+ (Map("probability_1" -> 0.946, "probability_0" -> 0.0541, "prediction" -> 1.0), 1.0)
+ )
+
+ val emptyDataSet = Seq.empty[(Map[String, Double], Double)]
+
+ Spec[OpBinaryClassifyBinEvaluator] should "return the bin metrics" in {
+ val df = spark.createDataFrame(dataset_test).toDF(predictionLabel, labelName)
+
+ val metrics = new OpBinaryClassifyBinEvaluator(numBins = 4)
+ .setLabelCol(labelName).setPredictionCol(predictionLabel).evaluateAll(df)
+
+ BigDecimal(metrics.BrierScore).setScale(3, BigDecimal.RoundingMode.HALF_UP).toDouble shouldBe 0.098
+ metrics.BinCenters shouldBe Seq(0.125, 0.375, 0.625, 0.875)
+ metrics.NumberOfDataPoints shouldBe Seq(2, 0, 1, 2)
+ metrics.AverageScore shouldBe Seq(0.003205, 0.0, 0.7, 0.99999)
+ metrics.AverageConversionRate shouldBe Seq(0.0, 0.0, 0.0, 1.0)
+ }
+
+ it should "return the empty bin metrics for numBins == 0" in {
+ val df = spark.createDataFrame(dataset_test).toDF(predictionLabel, labelName)
+
+ val metrics = new OpBinaryClassifyBinEvaluator(numBins = 0)
+ .setLabelCol(labelName).setPredictionCol(predictionLabel).evaluateAll(df)
+
+ metrics.BrierScore shouldBe 0.0
+ metrics.BinCenters shouldBe Seq.empty[Double]
+ metrics.NumberOfDataPoints shouldBe Seq.empty[Long]
+ metrics.AverageScore shouldBe Seq.empty[Double]
+ metrics.AverageConversionRate shouldBe Seq.empty[Double]
+ }
+
+ it should "return the empty bin metrics for empty data" in {
+ val df = spark.createDataFrame(emptyDataSet).toDF(predictionLabel, labelName)
+
+ val metrics = new OpBinaryClassifyBinEvaluator(numBins = 10)
+ .setLabelCol(labelName).setPredictionCol(predictionLabel).evaluateAll(df)
+
+ metrics.BrierScore shouldBe 0.0
+ metrics.BinCenters shouldBe Seq.empty[Double]
+ metrics.NumberOfDataPoints shouldBe Seq.empty[Long]
+ metrics.AverageScore shouldBe Seq.empty[Double]
+ metrics.AverageConversionRate shouldBe Seq.empty[Double]
+ }
+
+ it should "return the bin metrics for skewed data" in {
+ val df = spark.createDataFrame(dataset_skewed).toDF(predictionLabel, labelName)
+
+ val metrics = new OpBinaryClassifyBinEvaluator(numBins = 5)
+ .setLabelCol(labelName).setPredictionCol(predictionLabel).evaluateAll(df)
+
+ metrics.BrierScore shouldBe 7.294225500000013E-4
+ metrics.BinCenters shouldBe Seq(0.1, 0.3, 0.5, 0.7, 0.9)
+ metrics.NumberOfDataPoints shouldBe Seq(0, 0, 0, 0, 4)
+ metrics.AverageScore shouldBe Seq(0.0, 0.0, 0.0, 0.0, 0.98617)
+ metrics.AverageConversionRate shouldBe Seq(0.0, 0.0, 0.0, 0.0, 1.0)
+ }
+
+ it should "return the default metric as BrierScore" in {
+ val df = spark.createDataFrame(dataset_test).toDF(predictionLabel, labelName)
+
+ val evaluator = new OpBinaryClassifyBinEvaluator(numBins = 4)
+ .setLabelCol(labelName).setPredictionCol(predictionLabel)
+
+ val brierScore = evaluator.getDefaultMetric(evaluator.evaluateAll(df))
+ BigDecimal(brierScore).setScale(3, BigDecimal.RoundingMode.HALF_UP).toDouble shouldBe 0.098
+ }
+}