Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'levels' parameter to as_discrete() #957

Merged
merged 21 commits into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
4a7f510
`as_discrete(levels)` generates 'data_meta/series_annotation'.
OLarionova-HORIS Nov 17, 2023
d8a3b44
Fix creating of 'factorLevelsByVar'-map .
OLarionova-HORIS Nov 17, 2023
89f09b2
Fix `applyTransform` to avoid error 'value is not in the domain'.
OLarionova-HORIS Nov 17, 2023
11452cf
Add examples.
OLarionova-HORIS Nov 17, 2023
4c36cad
Add tests.
OLarionova-HORIS Nov 22, 2023
4c74e31
Correct specified 'factor_levels' according to actual data and append…
OLarionova-HORIS Nov 23, 2023
d40eb00
Update test notebook.
OLarionova-HORIS Nov 23, 2023
a747f90
Fix description of 'levels' parameter.
OLarionova-HORIS Nov 24, 2023
122f46b
Minor.
OLarionova-HORIS Nov 24, 2023
d1cd7bf
Make variables specified in 'series_annotations' with levels discrete.
OLarionova-HORIS Nov 28, 2023
b53782b
Skip creation of 'mapping_annotations' for variable with specified 'f…
OLarionova-HORIS Nov 29, 2023
79180cf
Few improvements.
OLarionova-HORIS Nov 29, 2023
0c2037d
Refactor code.
OLarionova-HORIS Nov 30, 2023
83a4488
Add order to series_annotations.
OLarionova-HORIS Nov 30, 2023
4353f2a
Minor refactoring.
OLarionova-HORIS Nov 30, 2023
1407676
Refactor python code.
OLarionova-HORIS Dec 1, 2023
9173754
Minor refactoring (python).
OLarionova-HORIS Dec 1, 2023
4b6c988
Add ordering for levels in example.
OLarionova-HORIS Dec 1, 2023
3ff0510
Add example with facets to notebook.
OLarionova-HORIS Dec 5, 2023
35f55fb
Add demo notebook. Update future_changes.md.
OLarionova-HORIS Dec 5, 2023
01d497d
Improve notebook.
OLarionova-HORIS Dec 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Correct specified 'factor_levels' according to actual data and append…
… to layer-spec part.
  • Loading branch information
OLarionova-HORIS committed Dec 5, 2023
commit 4c74e31f81e9a804c56a086d3e429318e9bdc903
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,7 @@ object DataFrameUtil {
transformVar: DataFrame.Variable,
transform: Transform
): DataFrame {
val transformed = ScaleUtil.applyTransform(
// todo to avoid error: value is not in the domain
data[variable].map { value -> value.takeIf { it in data.distinctValues(variable) } },
transform
)
val transformed = ScaleUtil.applyTransform(data[variable], transform)
return data.builder()
.putNumeric(transformVar, transformed)
.build()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

package org.jetbrains.letsPlot.core.spec.back

import org.jetbrains.letsPlot.commons.intern.filterNotNullKeys
import org.jetbrains.letsPlot.core.plot.base.*
import org.jetbrains.letsPlot.core.plot.base.DataFrame.Variable
import org.jetbrains.letsPlot.core.plot.base.data.DataFrameUtil
Expand Down Expand Up @@ -49,6 +50,15 @@ open class PlotConfigBackend(
}
}

// match the specified 'factor_levels' to the actual contents of the data set (on combined df before stat)
val specifiedFactorLeversByLayers = layerConfigs.map { layerConfig ->
prepareLayerFactorLevelsByVariable(
layerConfig.combinedData,
plotDataMeta = getMap(DATA_META),
layerDataMeta = layerConfig.getMap(DATA_META)
)
}

// replace layer data with data after stat
layerConfigs.withIndex().forEach { (layerIndex, layerConfig) ->
// optimization: only replace layer' data if 'combined' data was changed (because of stat or sampling occurred)
Expand All @@ -62,24 +72,26 @@ open class PlotConfigBackend(
dropUnusedDataBeforeEncoding(layerConfigs)

// Re-create the "natural order" existed before faceting.
if (facets.isDefined) {
// if (facets.isDefined) {
// When faceting, each layer' data was split to panels, then re-combined with loss of 'natural order'.
layerConfigs.forEach { layerConfig ->
layerConfigs.forEachIndexed { layerIndex, layerConfig ->
val layerData = layerConfig.ownData
if (facets.isFacettable(layerData)) {
val factorLevels = specifiedFactorLeversByLayers[layerIndex]
if (facets.isFacettable(layerData) || factorLevels.isNotEmpty()) {
val layerDataMetaUpdated = addFactorLevelsDataMeta(
layerData = layerData,
layerDataMeta = layerConfig.getMap(DATA_META),
stat = layerConfig.stat,
varBindings = layerConfig.varBindings,
transformByAes = transformByAes,
orderOptions = layerConfig.orderOptions,
yOrientation = layerConfig.isYOrientation
yOrientation = layerConfig.isYOrientation,
specifiedLayerFactorLevers = factorLevels
)
layerConfig.update(DATA_META, layerDataMetaUpdated)
}
}
}
}
}
// }
}

private fun dropUnusedDataBeforeEncoding(layerConfigs: List<LayerConfig>) {
Expand Down Expand Up @@ -273,6 +285,7 @@ open class PlotConfigBackend(
transformByAes: Map<Aes<*>, Transform>,
orderOptions: List<OrderOption>,
yOrientation: Boolean,
specifiedLayerFactorLevers: Map<String, List<Any>>
): Map<String, Any> {

// Use "discrete transforms" to re-create the "natural order" existed before faceting.
Expand Down Expand Up @@ -304,7 +317,30 @@ open class PlotConfigBackend(
levelsByVariable[variable.name] = orderedDistinctValues
}

// specified factors
levelsByVariable += specifiedLayerFactorLevers

return DataMetaUtil.updateFactorLevelsByVariable(layerDataMeta, levelsByVariable)
}

private fun prepareLayerFactorLevelsByVariable(
data: DataFrame,
plotDataMeta: Map<*, *>,
layerDataMeta: Map<*, *>
): Map<String, List<Any>> {
val plotFactorLevelsByVar = DataMetaUtil.getFactorLevelsByVariable(plotDataMeta)
val layerFactorLevelsByVar = DataMetaUtil.getFactorLevelsByVariable(layerDataMeta)
val factorLevelsByVar = (plotFactorLevelsByVar + layerFactorLevelsByVar)
.mapKeys { (varName, _) -> data.variables().find { it.name == varName } }
.filterNotNullKeys()

return factorLevelsByVar.map { (variable, levels) ->
// append missed values to the tail of specified levels
val distinctValues = data.distinctValues(variable)
val tail = distinctValues - levels.toSet()
val factors = levels + tail
variable.name to factors
}.toMap()
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -173,25 +173,20 @@ internal object DataConfigUtil {

if (clientSide) {
val variables = rawCombinedData.variables()
val discreteVariables = (variables.filter(rawCombinedData::isDiscrete) + variablesToMarkAsDiscrete).toSet()
val orderSpecs = OrderOptionUtil.createOrderSpecs(orderOptions, variables, varBindings, aggregateOperation)
val factorLevelsByVar = DataMetaUtil.getFactorLevelsByVariable(ownDataMeta)
.mapKeys { (varName, _) -> variables.find { it.name == varName } }
.filterNotNullKeys()

val plotFactorLevelsByVar = DataMetaUtil.getFactorLevelsByVariable(plotDataMeta)
val layerFactorLevelsByVar = DataMetaUtil.getFactorLevelsByVariable(ownDataMeta)
val factorLevelsByVar = (plotFactorLevelsByVar + layerFactorLevelsByVar)
val factorLevelsByVar = layerFactorLevelsByVar
.flatMap { (varName, levels) ->
val variablesWithLevels: MutableList<DataFrame.Variable> = combinedMappingOptions
val variable = variables.find { it.name == varName }
val mappedVariables = combinedMappingOptions
.filterValues { it == varName }
.keys
.mapNotNull { aesName ->
varBindings.find { it.aes.name == aesName }?.variable
}
.toMutableList()
if (variablesWithLevels.isEmpty()) {
variables.find { it.name == varName }?.let { variablesWithLevels += it }
}
.mapNotNull { aesName -> varBindings.find { it.aes.name == aesName }?.variable }
val variablesWithLevels = (mappedVariables + variable)
.filterNotNull()
.filter { it in discreteVariables }
variablesWithLevels.map { it to levels }
}
.toMap()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

package org.jetbrains.letsPlot.core.spec.config

import demoAndTestShared.parsePlotSpec
import org.jetbrains.letsPlot.core.plot.base.data.DataFrameUtil
import org.jetbrains.letsPlot.core.spec.config.AsDiscreteTest.*
import org.jetbrains.letsPlot.core.spec.front.PlotConfigFrontend
Expand Down Expand Up @@ -100,14 +101,14 @@ class DataMetaFactorLevelsTest {
seriesAnnotations = seriesAnnotationsSpec(
mapOf(
"name" to listOf("c", "b", "a"),
//"c" to listOf(2.0, 3.0, 1.0)
"c" to listOf(2.0, 3.0, 1.0)
)
),
mappingAnnotations = null
)
transformToClientPlotConfig(spec)
.assertDistinctValues("name", listOf("c", "b", "a", "d"))
//.assertDistinctValues("c", listOf(1.0, 2.0, 3.0, 4.0))
.assertDistinctValues("c", listOf(1.0, 2.0, 3.0, 4.0)) // not to apply levels to non-discrete
}

private fun checkWithMappingAndSeriesAnnotations(
Expand All @@ -126,8 +127,8 @@ class DataMetaFactorLevelsTest {
mappingStorage
)
transformToClientPlotConfig(spec)
//.assertDistinctValues("name", listOf("c", "b", "a", "d"))
// .assertDistinctValues("c", listOf(1.0, 2.0, 3.0, 4.0))
.assertDistinctValues("name", listOf("c", "b", "a", "d"))
.assertDistinctValues("c", listOf(1.0, 2.0, 3.0, 4.0))
.assertDistinctValues("x.name", listOf("c", "b", "a", "d"))
.assertDistinctValues("fill.c", listOf(2.0, 3.0, 1.0, 4.0))
}
Expand Down Expand Up @@ -166,6 +167,27 @@ class DataMetaFactorLevelsTest {
)
}

@Test
fun test_series_annotations() {
val spec = """{
'kind': 'plot',
'data': {'name': ['a', 'b', 'c']},
'mapping': {'x': 'name' },
'data_meta': {
'series_annotations': [
{
'column': 'name',
'factor_levels': ['a', 'c', 'b']
}
]
},
'layers': [ { 'geom': 'bar' } ]
}""".trimIndent()

transformToClientPlotConfig(parsePlotSpec(spec))
.assertDistinctValues("name", listOf("a", "c", "b"))
}

companion object {
fun PlotConfigFrontend.assertDistinctValues(
varName: String,
Expand Down