Skip to content

Commit

Permalink
Apply date/time format to discrete scale (#1075)
Browse files Browse the repository at this point in the history
* Correct 'scale'-spec on backend for discrete+datetime variables.

* Use the same formatter as for continuous scale.

* Add pattern alternatives to choose the best one.

* Fix scale correction.

* Add tests.

* Remove redundant checking to detect the discrete+datetime scale.

* Fix some comments.

* Update future_changes.md.

* Add limit to the number of values to choose a formatter. Refactoring.

* Reformat code.
  • Loading branch information
OLarionova-HORIS committed Apr 10, 2024
1 parent bd74d41 commit afefca4
Show file tree
Hide file tree
Showing 6 changed files with 265 additions and 7 deletions.
1 change: 1 addition & 0 deletions future_changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
- `geom_density2d`: the doc missing some 'computed' variables [[#1062](https://github.com/JetBrains/lets-plot/issues/1062)].
- Weird and problematic behavior : lets-plot does not respect x and y. Sizing problem ?[[#1068](https://github.com/JetBrains/lets-plot/issues/1068)].
- Plot layout breaks when marginal layers are used #1074 [[#1074](https://github.com/JetBrains/lets-plot/issues/1074)].
- Discrete scale doesn't work for datetime data [[LPK-231](https://github.com/JetBrains/lets-plot-kotlin/issues/231)].
- Add `linetype` parameter in `elementLine()` and `elementRect()` [[LPK-235](https://github.com/JetBrains/lets-plot-kotlin/issues/235)].
- Any way to line-wrap facet labels? [[LPK-237](https://github.com/JetBrains/lets-plot-kotlin/issues/237)].
- Missing marginal gridlines.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ class DateTimeBreaksHelper(

override val breaks: List<Double>
val formatter: (Number) -> String
val pattern: String

init {

val step = targetStep
if (step < 1000) { // milliseconds
formatter = TimeScaleTickFormatterFactory(minInterval).getFormatter(step)
val interval = TimeScaleTickFormatterFactory(minInterval)
formatter = interval.getFormatter(step)
pattern = interval.formatPattern(step)
// compute step so that it is multiple of automatic time steps
breaks = LinearBreaksHelper(rangeStart, rangeEnd, count).breaks

Expand All @@ -42,9 +44,11 @@ class DateTimeBreaksHelper(
if (ticks != null && ticks.size <= count) {
// same or smaller interval requested -> stay with min interval
formatter = minInterval!!.tickFormatter
pattern = minInterval.tickFormatPattern
// otherwise - larger step requested -> compute ticks
} else if (step > YearInterval.MS) { // years
formatter = YearInterval.TICK_FORMATTER
pattern = YearInterval.TICK_FORMAT
ticks = ArrayList()
val startDateTime = TimeUtil.asDateTimeUTC(start)
var startYear = startDateTime.year
Expand All @@ -64,6 +68,7 @@ class DateTimeBreaksHelper(
} else {
val interval = NiceTimeInterval.forMillis(step)
formatter = interval.tickFormatter
pattern = interval.tickFormatPattern
ticks = interval.range(start, end).toMutableList()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ internal class TimeScaleTickFormatterFactory(
return formatterDateUTC(formatPattern(step))
}

private fun formatPattern(step: Double): String {
internal fun formatPattern(step: Double): String {
if (step < 1000) { // milliseconds
return TimeInterval.milliseconds(1).tickFormatPattern
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,7 @@ internal object PlotConfigUtil {
}
}

internal fun createScaleConfigs(
scaleOptionsList: List<*>,
aopConversion: AesOptionConversion
): List<ScaleConfig<Any>> {
internal fun mergeScaleOptions(scaleOptionsList: List<*>): HashMap<Aes<Any>, MutableMap<String, Any>> {
// merge options by 'aes'
val mergedOpts = HashMap<Aes<Any>, MutableMap<String, Any>>()
for (opts in scaleOptionsList) {
Expand All @@ -196,6 +193,14 @@ internal object PlotConfigUtil {

mergedOpts[aes]!!.putAll(optsMap)
}
return mergedOpts
}

internal fun createScaleConfigs(
scaleOptionsList: List<*>,
aopConversion: AesOptionConversion
): List<ScaleConfig<Any>> {
val mergedOpts = mergeScaleOptions(scaleOptionsList)

return mergedOpts.map { (aes, options) ->
ScaleConfig(aes, options, aopConversion)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@

package org.jetbrains.letsPlot.core.spec.back

import org.jetbrains.letsPlot.commons.formatting.string.StringFormat
import org.jetbrains.letsPlot.commons.intern.filterNotNullKeys
import org.jetbrains.letsPlot.core.commons.data.SeriesUtil
import org.jetbrains.letsPlot.core.plot.base.*
import org.jetbrains.letsPlot.core.plot.base.DataFrame.Variable
import org.jetbrains.letsPlot.core.plot.base.data.DataFrameUtil
import org.jetbrains.letsPlot.core.plot.base.scale.breaks.DateTimeBreaksHelper
import org.jetbrains.letsPlot.core.plot.base.stat.Stats
import org.jetbrains.letsPlot.core.plot.base.theme.Theme
import org.jetbrains.letsPlot.core.plot.builder.VarBinding
Expand All @@ -17,9 +20,11 @@ import org.jetbrains.letsPlot.core.plot.builder.data.DataProcessing
import org.jetbrains.letsPlot.core.plot.builder.data.OrderOptionUtil.OrderOption
import org.jetbrains.letsPlot.core.plot.builder.data.YOrientationUtil
import org.jetbrains.letsPlot.core.plot.builder.tooltip.data.DataFrameField
import org.jetbrains.letsPlot.core.spec.Option
import org.jetbrains.letsPlot.core.spec.Option.Meta.DATA_META
import org.jetbrains.letsPlot.core.spec.Option.Meta.GeoDataFrame.GDF
import org.jetbrains.letsPlot.core.spec.Option.Meta.GeoDataFrame.GEOMETRY
import org.jetbrains.letsPlot.core.spec.Option.Plot.SCALES
import org.jetbrains.letsPlot.core.spec.PlotConfigUtil
import org.jetbrains.letsPlot.core.spec.back.data.BackendDataProcUtil
import org.jetbrains.letsPlot.core.spec.back.data.PlotSampling
Expand Down Expand Up @@ -52,6 +57,33 @@ open class PlotConfigBackend(
* WARN! Side effects - performs modifications deep in specs tree
*/
internal fun updatePlotSpec() {

// Correct scales
val plotDateTimeColumns = DataMetaUtil.getDateTimeColumns(getMap(DATA_META))
layerConfigs.map { layerConfig ->
val dateTimeColumns = plotDateTimeColumns + DataMetaUtil.getDateTimeColumns(layerConfig.getMap(DATA_META))

// Detect date/time variables with mapping to discrete scale
val dateTimeDiscreteBindings = layerConfig.varBindings
.filter { it.variable.name in dateTimeColumns }
.filter { scaleProviderByAes[it.aes]?.discreteDomain == true }

val scaleUpdated = dateTimeDiscreteBindings.mapNotNull { binding ->
val distinctValues = layerConfig.combinedData.distinctValues(binding.variable)
selectDateTimeFormat(distinctValues)?.let { format ->
mapOf(
Option.Scale.AES to binding.aes.name,
Option.Scale.DATE_TIME to true,
Option.Scale.FORMAT to format
)
}
}
if (scaleUpdated.isNotEmpty()) {
val mergedOpts = PlotConfigUtil.mergeScaleOptions(scaleUpdated + getList(SCALES)).values.toList()
update(SCALES, mergedOpts)
}
}

val layerIndexWhereSamplingOccurred = HashSet<Int>()

val dataByLayerAfterStat = dataByLayerAfterStat() { layerIndex, message ->
Expand Down Expand Up @@ -360,5 +392,42 @@ open class PlotConfigBackend(
variable.name to factors
}.toMap()
}

private const val VALUES_LIMIT_TO_SELECT_FORMAT = 1_000_000
private fun selectDateTimeFormat(distinctValues: Set<Any>): String? {
if (distinctValues.any { it !is Number }) {
return null
}

// Try the same formatter that is used for the continuous scale
val breaksPattern = SeriesUtil.toDoubleList(distinctValues.toList())
?.let { doubleList -> SeriesUtil.range(doubleList) }
?.let { range -> DateTimeBreaksHelper(range.lowerEnd, range.upperEnd, distinctValues.size).pattern }

// Other patterns to choose the most good one
val patterns = listOf(
"%Y",
"%Y-%m",
"%Y-%m-%d",
"%Y-%m-%d %H:%M",
"%Y-%m-%d %H:%M:%S",
)
if (distinctValues.size > VALUES_LIMIT_TO_SELECT_FORMAT) {
return breaksPattern ?: patterns.last()
}
(listOfNotNull(breaksPattern) + patterns).forEach { pattern ->
val formatter = StringFormat.forOneArg(pattern, type = StringFormat.FormatType.DATETIME_FORMAT)
val formattedValues = mutableSetOf<String>()
for (value in distinctValues) {
if (!formattedValues.add(formatter.format(value))) {
break
}
}
if (formattedValues.size == distinctValues.size) {
return pattern
}
}
return patterns.last()
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
/*
* Copyright (c) 2024. JetBrains s.r.o.
* Use of this source code is governed by the MIT license that can be found in the LICENSE file.
*/

package org.jetbrains.letsPlot.core.spec.config

import demoAndTestShared.TestingGeomLayersBuilder
import org.jetbrains.letsPlot.commons.intern.datetime.Date
import org.jetbrains.letsPlot.commons.intern.datetime.DateTime
import org.jetbrains.letsPlot.commons.intern.datetime.Duration
import org.jetbrains.letsPlot.commons.intern.datetime.Month
import org.jetbrains.letsPlot.commons.intern.datetime.tz.TimeZone
import org.jetbrains.letsPlot.core.commons.data.SeriesUtil
import org.jetbrains.letsPlot.core.plot.base.Aes
import org.jetbrains.letsPlot.core.plot.base.scale.transform.Transforms
import org.jetbrains.letsPlot.core.spec.Option
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertNotNull
import kotlin.test.assertTrue

class ScaleFormatWhenDiscreteDateTimeTest {

@Test
fun `both - continuous and discrete scale labels - should be formatted as date-time`() {
val instants = List(5) {
DateTime(Date(1, Month.JANUARY, 2021)).add(Duration.DAY.mul(it.toLong()))
}.map { TimeZone.UTC.toInstant(it).timeSinceEpoch.toDouble() }

// For a discrete scale, a formatter is applied as for a continuous scale
val expectedLabels = listOf(
"Jan 1", "Jan 2", "Jan 3", "Jan 4", "Jan 5"
)

checkScales(instants, expectedLabels, expectedLabels)
}

@Test
fun `data when discrete scale chooses a better formatter than the continuous scale`() {
val instants = List(3) {
DateTime(Date(1, Month.JANUARY, 2021)).add(Duration.DAY.mul(it.toLong()))
}.map { TimeZone.UTC.toInstant(it).timeSinceEpoch.toDouble() }

val formattedForContinuous = listOf(
"00:00", "12:00", "00:00", "12:00", "00:00"
)
// For discrete scale: if to get the DateTimeBreaksHelper's formatter (which the continuous scale uses),
// the labels will be formatted as follows: [00:00, 00:00, 00:00]
// => better formatter will be applied
val formattedForDiscrete = listOf(
"2021-01-01", "2021-01-02", "2021-01-03"
)

checkScales(instants, formattedForDiscrete, formattedForContinuous)
}

private fun checkScaleLabels(
dataValues: List<Double>,
discreteScales: List<Aes<*>>,
asDiscreteAes: List<Aes<*>>,
expectedLabelsForDiscrete: List<String>,
expectedLabelForContinuous: List<String>
) {
val geomLayer = TestingGeomLayersBuilder.getSingleGeomLayer(
plotSpec(dataValues, discreteScales, asDiscreteAes)
)

fun checkFormatting(aes: Aes<*>, isDiscreteScale: Boolean) {
assertTrue(aes in geomLayer.scaleMap)
val scale = geomLayer.scaleMap[aes]!!

assertTrue(scale.isContinuous != isDiscreteScale)
if (scale.isContinuous) {
val breaksGenerator =
(scale.getBreaksGenerator() as Transforms.BreaksGeneratorForTransformedDomain).breaksGenerator
val range = SeriesUtil.range(dataValues)
assertNotNull(range)
val scaleLabels = breaksGenerator.generateBreaks(range, dataValues.size).labels
assertEquals(expectedLabelForContinuous, scaleLabels, "Wrong scale labels for $aes")
} else {
assertEquals(expectedLabelsForDiscrete, scale.getScaleBreaks().labels, "Wrong scale labels for $aes")
}
}

fun isDiscreteScale(aes: Aes<*>) = aes in discreteScales || aes in asDiscreteAes

checkFormatting(Aes.X, isDiscreteScale(Aes.X))
checkFormatting(Aes.COLOR, isDiscreteScale(Aes.COLOR))
}

private fun checkScales(
dataValues: List<Double>,
expectedLabelsForDiscrete: List<String>,
expectedLabelForContinuous: List<String>
) {
checkScaleLabels(
dataValues,
discreteScales = emptyList(),
asDiscreteAes = emptyList(),
expectedLabelsForDiscrete,
expectedLabelForContinuous
)
checkScaleLabels(
dataValues,
discreteScales = emptyList(),
asDiscreteAes = listOf(Aes.COLOR),
expectedLabelsForDiscrete,
expectedLabelForContinuous
)
checkScaleLabels(
dataValues,
discreteScales = emptyList(),
asDiscreteAes = listOf(Aes.X, Aes.COLOR),
expectedLabelsForDiscrete,
expectedLabelForContinuous
)

checkScaleLabels(
dataValues,
discreteScales = listOf(Aes.COLOR),
asDiscreteAes = emptyList(),
expectedLabelsForDiscrete,
expectedLabelForContinuous
)

checkScaleLabels(
dataValues,
discreteScales = listOf(Aes.X, Aes.COLOR),
asDiscreteAes = emptyList(),
expectedLabelsForDiscrete,
expectedLabelForContinuous
)
}

private fun plotSpec(
instants: List<Double>,
discreteScales: List<Aes<*>>,
asDiscreteAes: List<Aes<*>>,
): MutableMap<String, Any> {
fun discreteScale(aes: Aes<*>) = mapOf(
Option.Scale.AES to aes.name,
Option.Scale.DISCRETE_DOMAIN to true
)

fun asDiscreteAnnotation(aes: Aes<*>) = mapOf(
Option.Meta.MappingAnnotation.AES to aes.name,
Option.Meta.MappingAnnotation.ANNOTATION to Option.Meta.MappingAnnotation.AS_DISCRETE
)

fun mappingAnnotation(aesList: List<Aes<*>>) = mapOf(
Option.Meta.MappingAnnotation.TAG to aesList.map(::asDiscreteAnnotation)
)

fun dateTimeAnnotation(columnName: String) = mapOf(
Option.Meta.SeriesAnnotation.TAG to listOf(
mapOf(
Option.Meta.SeriesAnnotation.COLUMN to columnName,
Option.Meta.SeriesAnnotation.TYPE to Option.Meta.SeriesAnnotation.DateTime.DATE_TIME
)
)
)

return mutableMapOf(
Option.Meta.KIND to Option.Meta.Kind.PLOT,
Option.PlotBase.DATA to mapOf("v" to instants),
Option.PlotBase.MAPPING to mapOf(
Aes.X.name to "v",
Aes.COLOR.name to "v",
),
Option.Plot.LAYERS to listOf(
mapOf(Option.Layer.GEOM to Option.GeomName.POINT)
),
Option.Plot.SCALES to discreteScales.map(::discreteScale),
Option.Meta.DATA_META to dateTimeAnnotation("v") + mappingAnnotation(asDiscreteAes)
)
}
}

0 comments on commit afefca4

Please sign in to comment.