Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'levels' parameter to as_discrete() #957

Merged
merged 21 commits into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
4a7f510
`as_discrete(levels)` generates 'data_meta/series_annotation'.
OLarionova-HORIS Nov 17, 2023
d8a3b44
Fix creating of 'factorLevelsByVar'-map .
OLarionova-HORIS Nov 17, 2023
89f09b2
Fix `applyTransform` to avoid error 'value is not in the domain'.
OLarionova-HORIS Nov 17, 2023
11452cf
Add examples.
OLarionova-HORIS Nov 17, 2023
4c36cad
Add tests.
OLarionova-HORIS Nov 22, 2023
4c74e31
Correct specified 'factor_levels' according to actual data and append…
OLarionova-HORIS Nov 23, 2023
d40eb00
Update test notebook.
OLarionova-HORIS Nov 23, 2023
a747f90
Fix description of 'levels' parameter.
OLarionova-HORIS Nov 24, 2023
122f46b
Minor.
OLarionova-HORIS Nov 24, 2023
d1cd7bf
Make variables specified in 'series_annotations' with levels discrete.
OLarionova-HORIS Nov 28, 2023
b53782b
Skip creation of 'mapping_annotations' for variable with specified 'f…
OLarionova-HORIS Nov 29, 2023
79180cf
Few improvements.
OLarionova-HORIS Nov 29, 2023
0c2037d
Refactor code.
OLarionova-HORIS Nov 30, 2023
83a4488
Add order to series_annotations.
OLarionova-HORIS Nov 30, 2023
4353f2a
Minor refactoring.
OLarionova-HORIS Nov 30, 2023
1407676
Refactor python code.
OLarionova-HORIS Dec 1, 2023
9173754
Minor refactoring (python).
OLarionova-HORIS Dec 1, 2023
4b6c988
Add ordering for levels in example.
OLarionova-HORIS Dec 1, 2023
3ff0510
Add example with facets to notebook.
OLarionova-HORIS Dec 5, 2023
35f55fb
Add demo notebook. Update future_changes.md.
OLarionova-HORIS Dec 5, 2023
01d497d
Improve notebook.
OLarionova-HORIS Dec 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ fun <K, V> Map<K?, V>.filterNotNullKeys(): Map<K, V> {
.toMap()
}

fun <K, V> Map<K, V?>.filterNotNullValues(): Map<K, V> {
return entries
.asSequence()
.mapNotNull { (k, v) -> v?.let { k to v } }
.toMap()
}

fun <T> Collection<T>.splitBy(comp: Comparator<T>): List<List<T>> {
val result = mutableListOf<List<T>>()
var chunk = mutableListOf<T>()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ class AsDiscrete {
fillFactorWithScaleColor(),
layerData_DiscreteGroup(),
smoothStatAsDiscrete(),
smoothStatWithGroup()
smoothStatWithGroup(),
factorLevels()
)
}

Expand Down Expand Up @@ -226,4 +227,34 @@ class AsDiscrete {
return parsePlotSpec(spec)
}

private fun factorLevels(): MutableMap<String, Any> {
val spec = """{
'data': {
'name' : ['c', 'c', 'a', 'a', 'd', 'b', 'b', 'a'],
'value': [1, 2, 3, 2, 2, 1, 4, 1]
},
'kind': 'plot',
'layers': [
{
'geom': 'bar',
'stat': 'identity',
'mapping': {'x': 'name', 'y': 'value', 'fill': 'value'},
'data_meta': {
'series_annotations': [
{
'column': 'name',
'factor_levels': ['a','c','b']
},
{
'column': 'value',
'factor_levels': [1,2,3],
'order': -1
}
]
}
}
]
}""".trimIndent()
return parsePlotSpec(spec)
}
}
579 changes: 579 additions & 0 deletions docs/dev/notebooks/as_discrete_levels.ipynb

Large diffs are not rendered by default.

476 changes: 476 additions & 0 deletions docs/f-23f/factor_levels.ipynb

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions future_changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
See: [example notebook](https://nbviewer.jupyter.org/github/JetBrains/lets-plot/blob/master/docs/f-23f/scale_lablim.ipynb).


- The `levels` parameter in `as_discrete` function [[#931](https://github.com/JetBrains/lets-plot/issues/931)].

See: [example notebook](https://nbviewer.jupyter.org/github/JetBrains/lets-plot/blob/master/docs/f-23f/factor_levels.ipynb).


### Changed

- The `plot_margin` parameter in `theme()` and the `margin` parameter in `element_text()` accept a number or a list of numbers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ object Option {
const val COLUMN = "column" // a.k.a. variable name
const val TYPE = "type"
const val FACTOR_LEVELS = "factor_levels" // annotation for discrete variables
const val ORDER = "order"

// Values of the "TYPE" property
object DateTime {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

package org.jetbrains.letsPlot.core.spec.back

import org.jetbrains.letsPlot.commons.intern.filterNotNullKeys
import org.jetbrains.letsPlot.core.plot.base.*
import org.jetbrains.letsPlot.core.plot.base.DataFrame.Variable
import org.jetbrains.letsPlot.core.plot.base.data.DataFrameUtil
Expand Down Expand Up @@ -49,6 +50,15 @@ open class PlotConfigBackend(
}
}

// match the specified 'factor_levels' to the actual contents of the data set (on combined df before stat)
val specifiedFactorLevelsByLayers = layerConfigs.map { layerConfig ->
prepareLayerFactorLevelsByVariable(
layerConfig.combinedData,
plotDataMeta = getMap(DATA_META),
layerDataMeta = layerConfig.getMap(DATA_META)
)
}

// replace layer data with data after stat
layerConfigs.withIndex().forEach { (layerIndex, layerConfig) ->
// optimization: only replace layer' data if 'combined' data was changed (because of stat or sampling occurred)
Expand All @@ -61,24 +71,28 @@ open class PlotConfigBackend(
// Clean-up data before sending it to the front-end.
dropUnusedDataBeforeEncoding(layerConfigs)

// Re-create the "natural order" existed before faceting.
if (facets.isDefined) {
// When faceting, each layer' data was split to panels, then re-combined with loss of 'natural order'.
layerConfigs.forEach { layerConfig ->
val layerData = layerConfig.ownData
if (facets.isFacettable(layerData)) {
// Re-create the "natural order" existed before faceting
// or apply the specified order
if (facets.isDefined || specifiedFactorLevelsByLayers.any { it.isNotEmpty() }) {
layerConfigs.zip(specifiedFactorLevelsByLayers)
.filter { (layerConfig, factorLevels) ->
// When faceting, each layer' data was split to panels, then re-combined with loss of 'natural order'.
facets.isFacettable(layerConfig.ownData)
|| factorLevels.isNotEmpty()
}
.forEach { (layerConfig, factorLevels) ->
val layerDataMetaUpdated = addFactorLevelsDataMeta(
layerData = layerData,
layerData = layerConfig.ownData,
layerDataMeta = layerConfig.getMap(DATA_META),
stat = layerConfig.stat,
varBindings = layerConfig.varBindings,
transformByAes = transformByAes,
orderOptions = layerConfig.orderOptions,
yOrientation = layerConfig.isYOrientation
yOrientation = layerConfig.isYOrientation,
specifiedLayerFactorLevers = factorLevels
)
layerConfig.update(DATA_META, layerDataMetaUpdated)
}
}
}
}

Expand Down Expand Up @@ -273,6 +287,7 @@ open class PlotConfigBackend(
transformByAes: Map<Aes<*>, Transform>,
orderOptions: List<OrderOption>,
yOrientation: Boolean,
specifiedLayerFactorLevers: Map<String, List<Any>>
): Map<String, Any> {

// Use "discrete transforms" to re-create the "natural order" existed before faceting.
Expand Down Expand Up @@ -304,7 +319,34 @@ open class PlotConfigBackend(
levelsByVariable[variable.name] = orderedDistinctValues
}

// apply specified factors
levelsByVariable += specifiedLayerFactorLevers

return DataMetaUtil.updateFactorLevelsByVariable(layerDataMeta, levelsByVariable)
}

private fun prepareLayerFactorLevelsByVariable(
data: DataFrame,
plotDataMeta: Map<*, *>,
layerDataMeta: Map<*, *>
): Map<String, List<Any>> {
val plotFactorLevelsByVar = DataMetaUtil.getFactorLevelsByVariable(plotDataMeta)
val layerFactorLevelsByVar = DataMetaUtil.getFactorLevelsByVariable(layerDataMeta)
val factorLevelsByVar = (plotFactorLevelsByVar + layerFactorLevelsByVar)
.mapKeys { (varName, _) -> data.variables().find { it.name == varName } }
.filterNotNullKeys()

val orderDirectionsByVar = DataMetaUtil.getFactorLevelsOrderByVariable(plotDataMeta) +
DataMetaUtil.getFactorLevelsOrderByVariable(layerDataMeta)

return factorLevelsByVar.map { (variable, levels) ->
// append missed values to the tail of specified levels
val distinctValues = data.distinctValues(variable)
val tail = distinctValues - levels.toSet()
val order = orderDirectionsByVar.getOrElse(variable.name) { 0 }
val factors = (levels + tail).let { if (order >= 0) it else it.reversed() }
variable.name to factors
}.toMap()
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@ internal object DataConfigUtil {
val asDiscreteVariables = varBindings.filter { it.aes.name in asDiscreteAesSet }.map { it.variable.name }
val variablesToMarkAsDiscrete = rawCombinedData.variables().filter { it.name in asDiscreteVariables }

// categorical variables
val categoricalVariables = DataMetaUtil.getCategoricalVariables(ownDataMeta).let { categoricalVarNames ->
rawCombinedData.variables().filter { it.name in categoricalVarNames }
}

fun DataFrame.Builder.addVariables(
variables: List<DataFrame.Variable>,
put: (DataFrame.Builder, DataFrame.Variable, List<*>) -> DataFrame.Builder
Expand All @@ -169,6 +174,7 @@ internal object DataConfigUtil {

addVariables(variablesToMarkAsDateTime, DataFrame.Builder::putDateTime)
addVariables(variablesToMarkAsDiscrete, DataFrame.Builder::putDiscrete)
addVariables(categoricalVariables, DataFrame.Builder::putDiscrete)

if (clientSide) {
val variables = rawCombinedData.variables()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

package org.jetbrains.letsPlot.core.spec.config

import org.jetbrains.letsPlot.commons.intern.filterNotNullValues
import org.jetbrains.letsPlot.core.plot.builder.data.OrderOptionUtil
import org.jetbrains.letsPlot.core.spec.*
import org.jetbrains.letsPlot.core.spec.Option.Meta.MappingAnnotation
Expand Down Expand Up @@ -96,12 +97,25 @@ object DataMetaUtil {
?: emptySet()
}

fun getCategoricalVariables(dataMeta: Map<*, *>): Set<String> {
return getFactorLevelsByVariable(dataMeta).keys
}

fun getFactorLevelsByVariable(dataMeta: Map<*, *>): Map<String, List<Any>> {
return (dataMeta
.getMaps(SeriesAnnotation.TAG)
?.associate { it.getString(COLUMN)!! to it.getList(FACTOR_LEVELS) }
?.filterValues { list -> list?.isNotEmpty() ?: false }
?.mapValues { (_, list) -> list!!.map { v -> v as Any } }
?.filterNotNullValues()
?.mapValues { (_, factorLevels) -> factorLevels.map { v -> v as Any } }
?: emptyMap())
}

fun getFactorLevelsOrderByVariable(dataMeta: Map<*, *>): Map<String, Int> {
return (dataMeta
.getMaps(SeriesAnnotation.TAG)
?.associate { it.getString(COLUMN)!! to it.getNumber(SeriesAnnotation.ORDER) }
?.filterNotNullValues()
?.mapValues { (_, order) -> order.toInt() }
?: emptyMap())
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,34 @@ class DataMetaStoreFactorLevelsTest {
.hasDataMetaFacetLevels("chrom", listOf("chr1", "chr2", "chr4", "chr5"))
}

@Test
fun `specified 'factor_levels' for variable`() {
val plotSpecs = plotSpecs_With_FactorLevels(
varListWithLevels = mapOf(
"chrom" to listOf("chr5", "chr4", "chr2", "chr1"),
"arm" to listOf("q", "p")
)
)
val layerConfigs = BackendTestUtil.createLayerConfigs(plotSpecs)
SingleLayerAssert.assertThat(layerConfigs)
.hasDataMetaFacetLevels("chrom", listOf("chr5", "chr4", "chr2", "chr1"))
.hasDataMetaFacetLevels("arm", listOf("q", "p"))
}

@Test
fun `specified 'factor_levels' will be consistent with the actual dataset contents`() {
val plotSpecs = plotSpecs_With_FactorLevels(
varListWithLevels = mapOf(
"chrom" to listOf("chr4", "chr5"), // should append "chr1", "chr2"
"arm" to listOf("q") // should append "p"
)
)
val layerConfigs = BackendTestUtil.createLayerConfigs(plotSpecs)
SingleLayerAssert.assertThat(layerConfigs)
.hasDataMetaFacetLevels("chrom", listOf("chr4", "chr5", "chr1", "chr2"))
.hasDataMetaFacetLevels("arm", listOf("q", "p"))
}


private companion object {
// See LP issue #746
Expand Down Expand Up @@ -165,21 +193,33 @@ class DataMetaStoreFactorLevelsTest {
)

private fun plotSpecs_No_Ordering_No_DateTime(): MutableMap<String, Any> {
val plotSpec = plotSpec(null, null)
val plotSpec = plotSpec(
asDiscreteAnnotationsSpec = null,
dateTimeAnnotationsSpec = null,
factorLevelsAnnotationsSpec = null
)
plotSpec["data"] = DATA_20
return plotSpec
}

private fun plotSpecs_With_asDiscrete(aesList: List<String>, order: Boolean): MutableMap<String, Any> {
fun plotSpecs_With_asDiscrete(aesList: List<String>, order: Boolean): MutableMap<String, Any> {
val asDiscreteAnnotations = asDiscreteAnnotationsSpec(aesList, order)
val plotSpec = plotSpec(asDiscreteAnnotations, null)
val plotSpec = plotSpec(
asDiscreteAnnotations,
dateTimeAnnotationsSpec = null,
factorLevelsAnnotationsSpec = null
)
plotSpec["data"] = DATA_20
return plotSpec
}

private fun plotSpecs_With_DateTime(varList: List<String>): MutableMap<String, Any> {
val dateTimeAnnotations = dateTimeAnnotationsSpec(varList)
val plotSpec = plotSpec(null, dateTimeAnnotations)
val plotSpec = plotSpec(
asDiscreteAnnotationsSpec = null,
dateTimeAnnotationsSpec = dateTimeAnnotations,
factorLevelsAnnotationsSpec = null
)
plotSpec["data"] = DATA_20
return plotSpec
}
Expand All @@ -191,21 +231,40 @@ class DataMetaStoreFactorLevelsTest {
): MutableMap<String, Any> {
val dateTimeAnnotations = dateTimeAnnotationsSpec(varList)
val asDiscreteAnnotations = asDiscreteAnnotationsSpec(aesList, order)
val plotSpec = plotSpec(asDiscreteAnnotations, dateTimeAnnotations)
val plotSpec = plotSpec(
asDiscreteAnnotations,
dateTimeAnnotations,
factorLevelsAnnotationsSpec = null
)
plotSpec["data"] = DATA_20
return plotSpec
}

fun plotSpecs_With_FactorLevels(
varListWithLevels: Map<String, List<Any>>
): MutableMap<String, Any> {
val factorLevelsAnnotationsSpec = factorLevelsAnnotationsSpec(varListWithLevels)
val plotSpec = plotSpec(
asDiscreteAnnotationsSpec = null,
dateTimeAnnotationsSpec = null,
factorLevelsAnnotationsSpec = factorLevelsAnnotationsSpec
)
plotSpec["data"] = DATA_20
return plotSpec
}

private fun plotSpec(
asDiscreteAnnotationsSpec: String?,
dateTimeAnnotationsSpec: String?
dateTimeAnnotationsSpec: String?,
factorLevelsAnnotationsSpec: String?
): MutableMap<String, Any> {
val annotationSpecs = ArrayList<String>().apply {
if (asDiscreteAnnotationsSpec != null) {
add("'mapping_annotations': [$asDiscreteAnnotationsSpec]")
}
if (dateTimeAnnotationsSpec != null) {
add("'series_annotations': [$dateTimeAnnotationsSpec]")
val seriesAnnotations = listOfNotNull(dateTimeAnnotationsSpec, factorLevelsAnnotationsSpec).joinToString()
if (seriesAnnotations.isNotEmpty()) {
add("'series_annotations': [$seriesAnnotations]")
}
}

Expand Down Expand Up @@ -247,6 +306,17 @@ class DataMetaStoreFactorLevelsTest {
}
}

private fun factorLevelsAnnotationsSpec(varListWithLevels: Map<String, List<Any>>): String {
return varListWithLevels.toList().joinToString { (variable, factorLevels) ->
"""
{
'column': '$variable',
'factor_levels': [ ${factorLevels.joinToString { "\'$it\'" }} ]
}
""".trimIndent()
}
}

private fun dateTimeAnnotationsSpec(varList: List<String>): String {
return varList.joinToString { variable ->
"""
Expand Down
Loading