Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Metadata changes for sensitive feature information #457

Merged
merged 18 commits into from
Jan 29, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Removed enum from SensitiveFeatureInformation per PR comments
  • Loading branch information
MWYang committed Jan 24, 2020
commit a8504da0fefff9cbc09afc0203faafea08b7ee6a
10 changes: 5 additions & 5 deletions core/src/test/scala/com/salesforce/op/ModelInsightsTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ class ModelInsightsTest extends FlatSpec with PassengerSparkFixtureTest with Dou
},
Seq("f1", "f0").map(name => name -> FeatureHistory(originFeatures = Seq(name), stages = Seq())).toMap,
Map(
"f0" -> Seq(SensitiveFeatureInformation.Name(0.0, Seq.empty[String], 0.0, 0.0, 1.0, "f0", None, false))
"f0" -> Seq(SensitiveNameInformation(0.0, Seq.empty[String], 0.0, 0.0, 1.0, "f0", None, false))
)
)

Expand Down Expand Up @@ -627,7 +627,7 @@ class ModelInsightsTest extends FlatSpec with PassengerSparkFixtureTest with Dou
f0In.featureType shouldBe classOf[PickList].getName
f0In.derivedFeatures.size shouldBe 2
f0In.sensitiveInformation match {
case Seq(SensitiveFeatureInformation.Name(
case Seq(SensitiveNameInformation(
probName, genderDetectResults, probMale, probFemale, probOther, name, mapKey, actionTaken
)) =>
actionTaken shouldBe false
Expand Down Expand Up @@ -727,10 +727,10 @@ class ModelInsightsTest extends FlatSpec with PassengerSparkFixtureTest with Dou
},
Seq("f1", "f0").map(name => name -> FeatureHistory(originFeatures = Seq(name), stages = Seq())).toMap,
Map(
"f0" -> Seq(SensitiveFeatureInformation.Name(
"f0" -> Seq(SensitiveNameInformation(
0.0, Seq.empty[String], 0.0, 0.0, 1.0, "f0", None, false
)),
"f_notInMeta" -> Seq(SensitiveFeatureInformation.Name(
"f_notInMeta" -> Seq(SensitiveNameInformation(
1.0, Seq.empty[String], 0.0, 0.0, 1.0, "f_notInMeta", None, true
))
)
Expand All @@ -748,7 +748,7 @@ class ModelInsightsTest extends FlatSpec with PassengerSparkFixtureTest with Dou
f_notInMeta_butInInsights.featureType shouldBe classOf[Text].getName
f_notInMeta_butInInsights.derivedFeatures.size shouldBe 0
f_notInMeta_butInInsights.sensitiveInformation match {
case Seq(SensitiveFeatureInformation.Name(
case Seq(SensitiveNameInformation(
probName, genderDetectResults, probMale, probFemale, probOther, name, mapKey, actionTaken
)) =>
actionTaken shouldBe true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class OPVectorMetadataTest extends PropSpec with TestCommon with PropertyChecks
): Map[String, Seq[SensitiveFeatureInformation]] = {
val sensitiveInfoSeq = sensitiveInfoSeqRaw map {
case ((probName, genderDetectResults, probMale, probFemale, probOther), featureName, mapKey, actionTaken) =>
SensitiveFeatureInformation.Name(
SensitiveNameInformation(
probName, genderDetectResults, probMale, probFemale, probOther, featureName, mapKey, actionTaken
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,85 +33,30 @@ package com.salesforce.op
import com.salesforce.op.utils.json.JsonLike
import com.salesforce.op.utils.spark.RichMetadata._
import org.apache.spark.sql.types.{Metadata, MetadataBuilder}
import enumeratum._

/**
* A base class for different SensitiveFeatureInformation (implemented as an enum)
* Currently, only Name types are supported but there are placeholders for other possibilities
* A base class for different SensitiveFeatureInformation
* The following three params are required for every kind of SensitiveFeatureInformation
*
* @param name the name of the raw feature
* @param key optionally, the name of the key (if the raw feature is a Map type)
* @param actionTaken whether the handling of the raw feature changed b/c it was detected as sensitive
*/
sealed class SensitiveFeatureInformation
sealed abstract class SensitiveFeatureInformation
(
val name: String,
val key: Option[String] = None,
val actionTaken: Boolean = false
) extends EnumEntry with JsonLike {

/**
* Convert to Spark metadata
*
* @return metadata representation
*/
def toMetadata: Metadata = {
this match {
case SensitiveFeatureInformation.Name(
probName, genderDetectResults, probMale, probFemale, probOther, name, key, actionTaken
) =>
new MetadataBuilder()
.putString(SensitiveFeatureInformation.NameKey, name)
.putString(SensitiveFeatureInformation.MapKeyKey, key.getOrElse(""))
.putBoolean(SensitiveFeatureInformation.ActionTakenKey, actionTaken)
.putString(SensitiveFeatureInformation.TypeKey, this.entryName)
.putDouble(SensitiveFeatureInformation.Name.ProbNameKey, probName)
.putStringArray(SensitiveFeatureInformation.Name.GenderDetectStratsKey, genderDetectResults.toArray)
.putDouble(SensitiveFeatureInformation.Name.ProbMaleKey, probMale)
.putDouble(SensitiveFeatureInformation.Name.ProbFemaleKey, probFemale)
.putDouble(SensitiveFeatureInformation.Name.ProbOtherKey, probOther)
.build()
case _ => throw new RuntimeException(
"Metadata for sensitive features other than names have not been implemented.")
}
}
) extends JsonLike {
val EntryName: String
def toMetadata: Metadata
}
case object SensitiveFeatureInformation extends Enum[SensitiveFeatureInformation] {

object SensitiveFeatureInformation {
val NameKey = "FeatureName"
val MapKeyKey = "MapKey"
val ActionTakenKey = "ActionTaken"
val TypeKey = "DetectedSensitiveFeatureKind"
val values: Seq[SensitiveFeatureInformation] = findValues

// Utilized by SmartTextVectorizer's name detection
case class Name
(
probName: Double,
genderDetectResults: Seq[String],
probMale: Double,
probFemale: Double,
probOther: Double,
override val name: String,
override val key: Option[String] = None,
override val actionTaken: Boolean = false
) extends SensitiveFeatureInformation(name, key, actionTaken) {
override val entryName: String = SensitiveFeatureInformation.Name.EntryName
}
case object Name {
val EntryName = "Name"
val ProbNameKey = "ProbName"
val GenderDetectStratsKey = "GenderDetectStrats"
val ProbMaleKey = "ProbMale"
val ProbFemaleKey = "ProbFemale"
val ProbOtherKey = "ProbOther"
}

// Not yet implemented
case object Salutation extends SensitiveFeatureInformation("None", None, false)
case object BirthDate extends SensitiveFeatureInformation("None", None, false)
case object PostalCode extends SensitiveFeatureInformation("None", None, false)
case object Other extends SensitiveFeatureInformation("None", None, false)

/**
* Build metadata from Map of [[SensitiveFeatureInformation]] instances
Expand Down Expand Up @@ -144,13 +89,13 @@ case object SensitiveFeatureInformation extends Enum[SensitiveFeatureInformation
*/
def fromMetadata(meta: Metadata): SensitiveFeatureInformation = {
meta.getString(SensitiveFeatureInformation.TypeKey) match {
case SensitiveFeatureInformation.Name.EntryName =>
SensitiveFeatureInformation.Name(
meta.getDouble(SensitiveFeatureInformation.Name.ProbNameKey),
meta.getStringArray(SensitiveFeatureInformation.Name.GenderDetectStratsKey),
meta.getDouble(SensitiveFeatureInformation.Name.ProbMaleKey),
meta.getDouble(SensitiveFeatureInformation.Name.ProbFemaleKey),
meta.getDouble(SensitiveFeatureInformation.Name.ProbOtherKey),
case SensitiveNameInformation.EntryName =>
SensitiveNameInformation(
meta.getDouble(SensitiveNameInformation.ProbNameKey),
meta.getStringArray(SensitiveNameInformation.GenderDetectStratsKey),
meta.getDouble(SensitiveNameInformation.ProbMaleKey),
meta.getDouble(SensitiveNameInformation.ProbFemaleKey),
meta.getDouble(SensitiveNameInformation.ProbOtherKey),
meta.getString(SensitiveFeatureInformation.NameKey),
{
val mapKey = meta.getString(SensitiveFeatureInformation.MapKeyKey)
Expand All @@ -163,3 +108,42 @@ case object SensitiveFeatureInformation extends Enum[SensitiveFeatureInformation
}
}
}

case class SensitiveNameInformation
(
probName: Double,
genderDetectResults: Seq[String],
probMale: Double,
probFemale: Double,
probOther: Double,
override val name: String,
override val key: Option[String] = None,
override val actionTaken: Boolean = false
) extends SensitiveFeatureInformation(name, key, actionTaken) {
override val EntryName: String = SensitiveNameInformation.EntryName
override def toMetadata: Metadata = {
new MetadataBuilder()
.putString(SensitiveFeatureInformation.NameKey, name)
.putString(SensitiveFeatureInformation.MapKeyKey, key.getOrElse(""))
.putBoolean(SensitiveFeatureInformation.ActionTakenKey, actionTaken)
.putString(SensitiveFeatureInformation.TypeKey, this.EntryName)
.putDouble(SensitiveNameInformation.ProbNameKey, probName)
.putStringArray(SensitiveNameInformation.GenderDetectStratsKey, genderDetectResults.toArray)
.putDouble(SensitiveNameInformation.ProbMaleKey, probMale)
.putDouble(SensitiveNameInformation.ProbFemaleKey, probFemale)
.putDouble(SensitiveNameInformation.ProbOtherKey, probOther)
.build()
}
}

case object SensitiveNameInformation {
val EntryName = "SensitiveNameInformation"
val ProbNameKey = "ProbName"
val GenderDetectStratsKey = "GenderDetectStrats"
val ProbMaleKey = "ProbMale"
val ProbFemaleKey = "ProbFemale"
val ProbOtherKey = "ProbOther"
}

// TODO: Use this everywhere
case class GenderDetectionResults(strategyString: String, pctUnidentified: Double) extends JsonLike
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class SensitiveFeatureInformationTest extends FlatSpec with TestCommon {
val mapKey: Option[String] = None
val actionTaken = true

val sensitiveFeatureInfo: SensitiveFeatureInformation.Name = SensitiveFeatureInformation.Name(
val sensitiveFeatureInfo: SensitiveNameInformation = SensitiveNameInformation(
probName, genderDetectResults, probMale, probFemale, probOther, name, mapKey, actionTaken
)

Expand All @@ -59,26 +59,26 @@ class SensitiveFeatureInformationTest extends FlatSpec with TestCommon {
metadata.contains(SensitiveFeatureInformation.MapKeyKey) shouldBe true
metadata.contains(SensitiveFeatureInformation.ActionTakenKey) shouldBe true
metadata.contains(SensitiveFeatureInformation.TypeKey) shouldBe true
metadata.contains(SensitiveFeatureInformation.Name.ProbNameKey) shouldBe true
metadata.contains(SensitiveFeatureInformation.Name.GenderDetectStratsKey) shouldBe true
metadata.contains(SensitiveFeatureInformation.Name.ProbMaleKey) shouldBe true
metadata.contains(SensitiveFeatureInformation.Name.ProbFemaleKey) shouldBe true
metadata.contains(SensitiveFeatureInformation.Name.ProbOtherKey) shouldBe true
metadata.contains(SensitiveNameInformation.ProbNameKey) shouldBe true
metadata.contains(SensitiveNameInformation.GenderDetectStratsKey) shouldBe true
metadata.contains(SensitiveNameInformation.ProbMaleKey) shouldBe true
metadata.contains(SensitiveNameInformation.ProbFemaleKey) shouldBe true
metadata.contains(SensitiveNameInformation.ProbOtherKey) shouldBe true

metadata.getString(SensitiveFeatureInformation.NameKey) shouldBe name
metadata.getString(SensitiveFeatureInformation.MapKeyKey) shouldBe ""
metadata.getBoolean(SensitiveFeatureInformation.ActionTakenKey) shouldBe actionTaken
metadata.getString(SensitiveFeatureInformation.TypeKey) shouldBe SensitiveFeatureInformation.Name.EntryName
metadata.getDouble(SensitiveFeatureInformation.Name.ProbNameKey) shouldBe probName
metadata.getStringArray(SensitiveFeatureInformation.Name.GenderDetectStratsKey) shouldBe genderDetectResults
metadata.getDouble(SensitiveFeatureInformation.Name.ProbMaleKey) shouldBe probMale
metadata.getDouble(SensitiveFeatureInformation.Name.ProbFemaleKey) shouldBe probFemale
metadata.getDouble(SensitiveFeatureInformation.Name.ProbOtherKey) shouldBe probOther
metadata.getString(SensitiveFeatureInformation.TypeKey) shouldBe SensitiveNameInformation.EntryName
metadata.getDouble(SensitiveNameInformation.ProbNameKey) shouldBe probName
metadata.getStringArray(SensitiveNameInformation.GenderDetectStratsKey) shouldBe genderDetectResults
metadata.getDouble(SensitiveNameInformation.ProbMaleKey) shouldBe probMale
metadata.getDouble(SensitiveNameInformation.ProbFemaleKey) shouldBe probFemale
metadata.getDouble(SensitiveNameInformation.ProbOtherKey) shouldBe probOther
}

it should "create metadata from a map" in {
val info1 = sensitiveFeatureInfo
val info2 = SensitiveFeatureInformation.Name(0.0, Seq(""), 0.0, 0.0, 0.0, "f2", Some("key"), actionTaken = true)
val info2 = SensitiveNameInformation(0.0, Seq(""), 0.0, 0.0, 0.0, "f2", Some("key"), actionTaken = true)
val map = Map("1" -> Seq(info1), "2" -> Seq(info2))
val metadata = SensitiveFeatureInformation.toMetadata(map)

Expand All @@ -90,33 +90,33 @@ class SensitiveFeatureInformationTest extends FlatSpec with TestCommon {
f1.contains(SensitiveFeatureInformation.MapKeyKey) shouldBe true
f1.contains(SensitiveFeatureInformation.TypeKey) shouldBe true
f1.contains(SensitiveFeatureInformation.TypeKey) shouldBe true
f1.contains(SensitiveFeatureInformation.Name.GenderDetectStratsKey) shouldBe true
f1.contains(SensitiveFeatureInformation.Name.ProbMaleKey) shouldBe true
f1.contains(SensitiveFeatureInformation.Name.ProbFemaleKey) shouldBe true
f1.contains(SensitiveFeatureInformation.Name.ProbOtherKey) shouldBe true
f1.getStringArray(SensitiveFeatureInformation.Name.GenderDetectStratsKey) shouldBe genderDetectResults
f1.getDouble(SensitiveFeatureInformation.Name.ProbMaleKey) shouldBe probMale
f1.getDouble(SensitiveFeatureInformation.Name.ProbFemaleKey) shouldBe probFemale
f1.getDouble(SensitiveFeatureInformation.Name.ProbOtherKey) shouldBe probOther
f1.contains(SensitiveNameInformation.GenderDetectStratsKey) shouldBe true
f1.contains(SensitiveNameInformation.ProbMaleKey) shouldBe true
f1.contains(SensitiveNameInformation.ProbFemaleKey) shouldBe true
f1.contains(SensitiveNameInformation.ProbOtherKey) shouldBe true
f1.getStringArray(SensitiveNameInformation.GenderDetectStratsKey) shouldBe genderDetectResults
f1.getDouble(SensitiveNameInformation.ProbMaleKey) shouldBe probMale
f1.getDouble(SensitiveNameInformation.ProbFemaleKey) shouldBe probFemale
f1.getDouble(SensitiveNameInformation.ProbOtherKey) shouldBe probOther

val f2 = metadata.getMetadataArray("2").head
f2.contains(SensitiveFeatureInformation.NameKey) shouldBe true
f2.contains(SensitiveFeatureInformation.MapKeyKey) shouldBe true
f2.contains(SensitiveFeatureInformation.TypeKey) shouldBe true
f2.contains(SensitiveFeatureInformation.TypeKey) shouldBe true
f2.contains(SensitiveFeatureInformation.Name.GenderDetectStratsKey) shouldBe true
f2.contains(SensitiveFeatureInformation.Name.ProbMaleKey) shouldBe true
f2.contains(SensitiveFeatureInformation.Name.ProbFemaleKey) shouldBe true
f2.contains(SensitiveFeatureInformation.Name.ProbOtherKey) shouldBe true
f2.getStringArray(SensitiveFeatureInformation.Name.GenderDetectStratsKey) shouldBe Seq("")
f2.getDouble(SensitiveFeatureInformation.Name.ProbMaleKey) shouldBe 0.0
f2.getDouble(SensitiveFeatureInformation.Name.ProbFemaleKey) shouldBe 0.0
f2.getDouble(SensitiveFeatureInformation.Name.ProbOtherKey) shouldBe 0.0
f2.contains(SensitiveNameInformation.GenderDetectStratsKey) shouldBe true
f2.contains(SensitiveNameInformation.ProbMaleKey) shouldBe true
f2.contains(SensitiveNameInformation.ProbFemaleKey) shouldBe true
f2.contains(SensitiveNameInformation.ProbOtherKey) shouldBe true
f2.getStringArray(SensitiveNameInformation.GenderDetectStratsKey) shouldBe Seq("")
f2.getDouble(SensitiveNameInformation.ProbMaleKey) shouldBe 0.0
f2.getDouble(SensitiveNameInformation.ProbFemaleKey) shouldBe 0.0
f2.getDouble(SensitiveNameInformation.ProbOtherKey) shouldBe 0.0
}

it should "create a map from metadata" in {
val info1 = sensitiveFeatureInfo
val info2 = SensitiveFeatureInformation.Name(0.0, Seq(""), 0.0, 0.0, 0.0, "f2", Some("key"), actionTaken = true)
val info2 = SensitiveNameInformation(0.0, Seq(""), 0.0, 0.0, 0.0, "f2", Some("key"), actionTaken = true)

val mapMetadata = new MetadataBuilder()
.putMetadataArray("1", Array(info1.toMetadata))
Expand Down