Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metadata to OpStandardScaler to allow for descaling #378

Merged
merged 11 commits into from
Aug 5, 2019
Prev Previous commit
Next Next commit
Add test for descaling to OpScalarStandardScaler tests
  • Loading branch information
Erica Chiu committed Aug 1, 2019
commit 804a7e7d8d1e077c6203903b6ebc449c8aca8223
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

import scala.util.{Failure, Success}


@RunWith(classOf[JUnitRunner])
class OpScalarStandardScalerTest extends OpEstimatorSpec[RealNN, UnaryModel[RealNN, RealNN], OpScalarStandardScaler] {
Expand Down Expand Up @@ -148,6 +150,35 @@ class OpScalarStandardScalerTest extends OpEstimatorSpec[RealNN, UnaryModel[Real
assert(sumSqDist <= 0.000001, "===> the sum of squared distances between actual and expected should be zero.")
}

it should "descale and work in standardized workflow" in {
val featureNormalizer = new OpScalarStandardScaler().setInput(testF)
val normedOutput = featureNormalizer.getOutput()
val metadata = featureNormalizer.fit(inputData).getMetadata()
val expectedStd = 90.0 * math.sqrt(37.0)
erica-chiu marked this conversation as resolved.
Show resolved Hide resolved
val expectedMean = 370.0
val expectedSlope = 1 / expectedStd
val expectedIntercept = expectedMean / expectedStd
ScalerMetadata(metadata) match {
case Failure(err) => fail(err)
case Success(meta) =>
meta shouldBe a [ScalerMetadata]
meta.scalingType shouldBe ScalingType.Linear
meta.scalingArgs shouldBe a [LinearScalerArgs]
meta.scalingArgs.asInstanceOf[LinearScalerArgs].slope - expectedSlope should be < 0.001
erica-chiu marked this conversation as resolved.
Show resolved Hide resolved
meta.scalingArgs.asInstanceOf[LinearScalerArgs].intercept - expectedIntercept should be < 0.001
}

val descaledResponse = new DescalerTransformer[RealNN, RealNN, RealNN]()
.setInput(normedOutput, normedOutput).getOutput()
val workflow = new OpWorkflow().setResultFeatures(descaledResponse)
val wfModel = workflow.setInputDataset(inputData).train()
val transformed = wfModel.score()

val actual = transformed.collect().map(_.getAs[Double](1))
val expected = Array(-730.0, -640.0, 260.0)
erica-chiu marked this conversation as resolved.
Show resolved Hide resolved
all(actual.zip(expected).map(x => math.abs(x._2 - x._1))) should be < 0.0001
}

private def validateDataframeDoubleColumn(normalizedFeatureDF: DataFrame, scaledFeatureName: String,
targetColumnName: String): Double = {
val sqDistUdf = udf { (leftCol: Double, rightCol: Double) => Math.pow(leftCol - rightCol, 2) }
Expand Down