Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metadata to OpStandardScaler to allow for descaling #378

Merged
merged 11 commits into from
Aug 5, 2019
Prev Previous commit
Next Next commit
Improve tests
  • Loading branch information
Erica Chiu committed Aug 2, 2019
commit c6deab57eb47611b0279805e856107a0fc6668b3
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,13 @@ class OpScalarStandardScaler

val std = scalerModel.std.toArray
erica-chiu marked this conversation as resolved.
Show resolved Hide resolved
val mean = scalerModel.mean.toArray
val stdMean = std.sum / std.length
val meanMean = mean.sum / mean.length
val scalingArgs = LinearScalerArgs(1 / stdMean, meanMean / stdMean)
val meta = ScalerMetadata(ScalingType.Linear, scalingArgs).toMetadata()
setMetadata(meta)
if (std.length == 1) {
val stdMean = std.head
val meanMean = mean.head
val scalingArgs = LinearScalerArgs(1 / stdMean, - meanMean / stdMean)
val meta = ScalerMetadata(ScalingType.Linear, scalingArgs).toMetadata()
setMetadata(meta)
}

new OpScalarStandardScalerModel(
std = std,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class OpScalarStandardScalerTest extends OpEstimatorSpec[RealNN, UnaryModel[Real
1.150792911137501.toRealNN
)

val descaleValues = Seq(10.0, 100.0, 1000.0)
val (descaleData, testD) = TestFeatureBuilder(descaleValues.map(_.toRealNN))

val (inputData, testF) = TestFeatureBuilder(Seq(10, 100, 1000).map(_.toRealNN))

Expand Down Expand Up @@ -151,31 +153,35 @@ class OpScalarStandardScalerTest extends OpEstimatorSpec[RealNN, UnaryModel[Real
}

it should "descale and work in standardized workflow" in {
val featureNormalizer = new OpScalarStandardScaler().setInput(testF)
val featureNormalizer = new OpScalarStandardScaler().setInput(testD)
val normedOutput = featureNormalizer.getOutput()
val metadata = featureNormalizer.fit(inputData).getMetadata()
val expectedStd = 90.0 * math.sqrt(37.0)
val expectedMean = 370.0
val metadata = featureNormalizer.fit(descaleData).getMetadata()

val expectedMean = descaleValues.sum / descaleValues.length
val expectedStd = math.sqrt(descaleValues.map(value => math.pow(expectedMean - value, 2)).sum
/ (descaleValues.length - 1))
val expectedSlope = 1 / expectedStd
val expectedIntercept = expectedMean / expectedStd
val expectedIntercept = - expectedMean / expectedStd
ScalerMetadata(metadata) match {
case Failure(err) => fail(err)
case Success(meta) =>
meta shouldBe a[ScalerMetadata]
meta.scalingType shouldBe ScalingType.Linear
meta.scalingArgs shouldBe a[LinearScalerArgs]
meta.scalingArgs.asInstanceOf[LinearScalerArgs].slope - expectedSlope should be < 0.001
meta.scalingArgs.asInstanceOf[LinearScalerArgs].intercept - expectedIntercept should be < 0.001
math.abs((meta.scalingArgs.asInstanceOf[LinearScalerArgs].slope - expectedSlope)
/ expectedSlope) should be < 0.001
math.abs((meta.scalingArgs.asInstanceOf[LinearScalerArgs].intercept - expectedIntercept)
/ expectedIntercept) should be < 0.001
}

val descaledResponse = new DescalerTransformer[RealNN, RealNN, RealNN]()
.setInput(normedOutput, normedOutput).getOutput()
val workflow = new OpWorkflow().setResultFeatures(descaledResponse)
val wfModel = workflow.setInputDataset(inputData).train()
val wfModel = workflow.setInputDataset(descaleData).train()
val transformed = wfModel.score()

val actual = transformed.collect().map(_.getAs[Double](1))
val expected = Array(-730.0, -640.0, 260.0)
val expected : Seq[Double] = descaleValues
all(actual.zip(expected).map(x => math.abs(x._2 - x._1))) should be < 0.0001
}

Expand Down